Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/arrays/datetimes.py: 16%
608 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1from __future__ import annotations
3from datetime import (
4 datetime,
5 time,
6 timedelta,
7 tzinfo,
8)
9from typing import (
10 TYPE_CHECKING,
11 Literal,
12 cast,
13)
14import warnings
16import numpy as np
18from pandas._libs import (
19 lib,
20 tslib,
21)
22from pandas._libs.tslibs import (
23 BaseOffset,
24 NaT,
25 NaTType,
26 Resolution,
27 Timestamp,
28 astype_overflowsafe,
29 fields,
30 get_resolution,
31 get_unit_from_dtype,
32 ints_to_pydatetime,
33 is_date_array_normalized,
34 is_supported_unit,
35 is_unitless,
36 normalize_i8_timestamps,
37 timezones,
38 to_offset,
39 tz_convert_from_utc,
40 tzconversion,
41)
42from pandas._typing import npt
43from pandas.errors import (
44 OutOfBoundsDatetime,
45 PerformanceWarning,
46)
47from pandas.util._exceptions import find_stack_level
48from pandas.util._validators import validate_inclusive
50from pandas.core.dtypes.astype import astype_dt64_to_dt64tz
51from pandas.core.dtypes.common import (
52 DT64NS_DTYPE,
53 INT64_DTYPE,
54 is_bool_dtype,
55 is_datetime64_any_dtype,
56 is_datetime64_dtype,
57 is_datetime64_ns_dtype,
58 is_datetime64tz_dtype,
59 is_dtype_equal,
60 is_extension_array_dtype,
61 is_float_dtype,
62 is_object_dtype,
63 is_period_dtype,
64 is_sparse,
65 is_string_dtype,
66 is_timedelta64_dtype,
67 pandas_dtype,
68)
69from pandas.core.dtypes.dtypes import DatetimeTZDtype
70from pandas.core.dtypes.missing import isna
72from pandas.core.arrays import datetimelike as dtl
73from pandas.core.arrays._ranges import generate_regular_range
74import pandas.core.common as com
76from pandas.tseries.frequencies import get_period_alias
77from pandas.tseries.offsets import (
78 BDay,
79 Day,
80 Tick,
81)
83if TYPE_CHECKING: 83 ↛ 85line 83 didn't jump to line 85, because the condition on line 83 was never true
85 from pandas import DataFrame
86 from pandas.core.arrays import (
87 PeriodArray,
88 TimedeltaArray,
89 )
91_midnight = time(0, 0)
94def tz_to_dtype(tz: tzinfo | None, unit: str = "ns"):
95 """
96 Return a datetime64[ns] dtype appropriate for the given timezone.
98 Parameters
99 ----------
100 tz : tzinfo or None
101 unit : str, default "ns"
103 Returns
104 -------
105 np.dtype or Datetime64TZDType
106 """
107 if tz is None:
108 return np.dtype(f"M8[{unit}]")
109 else:
110 return DatetimeTZDtype(tz=tz, unit=unit)
113def _field_accessor(name: str, field: str, docstring=None):
114 def f(self):
115 values = self._local_timestamps()
117 if field in self._bool_ops:
118 result: np.ndarray
120 if field.endswith(("start", "end")):
121 freq = self.freq
122 month_kw = 12
123 if freq:
124 kwds = freq.kwds
125 month_kw = kwds.get("startingMonth", kwds.get("month", 12))
127 result = fields.get_start_end_field(
128 values, field, self.freqstr, month_kw, reso=self._reso
129 )
130 else:
131 result = fields.get_date_field(values, field, reso=self._reso)
133 # these return a boolean by-definition
134 return result
136 if field in self._object_ops:
137 result = fields.get_date_name_field(values, field, reso=self._reso)
138 result = self._maybe_mask_results(result, fill_value=None)
140 else:
141 result = fields.get_date_field(values, field, reso=self._reso)
142 result = self._maybe_mask_results(
143 result, fill_value=None, convert="float64"
144 )
146 return result
148 f.__name__ = name
149 f.__doc__ = docstring
150 return property(f)
153class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps):
154 """
155 Pandas ExtensionArray for tz-naive or tz-aware datetime data.
157 .. warning::
159 DatetimeArray is currently experimental, and its API may change
160 without warning. In particular, :attr:`DatetimeArray.dtype` is
161 expected to change to always be an instance of an ``ExtensionDtype``
162 subclass.
164 Parameters
165 ----------
166 values : Series, Index, DatetimeArray, ndarray
167 The datetime data.
169 For DatetimeArray `values` (or a Series or Index boxing one),
170 `dtype` and `freq` will be extracted from `values`.
172 dtype : numpy.dtype or DatetimeTZDtype
173 Note that the only NumPy dtype allowed is 'datetime64[ns]'.
174 freq : str or Offset, optional
175 The frequency.
176 copy : bool, default False
177 Whether to copy the underlying array of values.
179 Attributes
180 ----------
181 None
183 Methods
184 -------
185 None
186 """
188 _typ = "datetimearray"
189 _internal_fill_value = np.datetime64("NaT", "ns")
190 _recognized_scalars = (datetime, np.datetime64)
191 _is_recognized_dtype = is_datetime64_any_dtype
192 _infer_matches = ("datetime", "datetime64", "date")
194 @property
195 def _scalar_type(self) -> type[Timestamp]:
196 return Timestamp
198 # define my properties & methods for delegation
199 _bool_ops: list[str] = [
200 "is_month_start",
201 "is_month_end",
202 "is_quarter_start",
203 "is_quarter_end",
204 "is_year_start",
205 "is_year_end",
206 "is_leap_year",
207 ]
208 _object_ops: list[str] = ["freq", "tz"]
209 _field_ops: list[str] = [
210 "year",
211 "month",
212 "day",
213 "hour",
214 "minute",
215 "second",
216 "weekofyear",
217 "week",
218 "weekday",
219 "dayofweek",
220 "day_of_week",
221 "dayofyear",
222 "day_of_year",
223 "quarter",
224 "days_in_month",
225 "daysinmonth",
226 "microsecond",
227 "nanosecond",
228 ]
229 _other_ops: list[str] = ["date", "time", "timetz"]
230 _datetimelike_ops: list[str] = _field_ops + _object_ops + _bool_ops + _other_ops
231 _datetimelike_methods: list[str] = [
232 "to_period",
233 "tz_localize",
234 "tz_convert",
235 "normalize",
236 "strftime",
237 "round",
238 "floor",
239 "ceil",
240 "month_name",
241 "day_name",
242 ]
244 # ndim is inherited from ExtensionArray, must exist to ensure
245 # Timestamp.__richcmp__(DateTimeArray) operates pointwise
247 # ensure that operations with numpy arrays defer to our implementation
248 __array_priority__ = 1000
250 # -----------------------------------------------------------------
251 # Constructors
253 _dtype: np.dtype | DatetimeTZDtype
254 _freq: BaseOffset | None = None
255 _default_dtype = DT64NS_DTYPE # used in TimeLikeOps.__init__
257 @classmethod
258 def _validate_dtype(cls, values, dtype):
259 # used in TimeLikeOps.__init__
260 _validate_dt64_dtype(values.dtype)
261 dtype = _validate_dt64_dtype(dtype)
262 return dtype
264 # error: Signature of "_simple_new" incompatible with supertype "NDArrayBacked"
265 @classmethod
266 def _simple_new( # type: ignore[override]
267 cls,
268 values: np.ndarray,
269 freq: BaseOffset | None = None,
270 dtype=DT64NS_DTYPE,
271 ) -> DatetimeArray:
272 assert isinstance(values, np.ndarray)
273 assert dtype.kind == "M"
274 if isinstance(dtype, np.dtype):
275 assert dtype == values.dtype
276 assert not is_unitless(dtype)
277 else:
278 # DatetimeTZDtype. If we have e.g. DatetimeTZDtype[us, UTC],
279 # then values.dtype should be M8[us].
280 assert dtype._reso == get_unit_from_dtype(values.dtype)
282 result = super()._simple_new(values, dtype)
283 result._freq = freq
284 return result
286 @classmethod
287 def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False):
288 return cls._from_sequence_not_strict(scalars, dtype=dtype, copy=copy)
290 @classmethod
291 def _from_sequence_not_strict(
292 cls,
293 data,
294 dtype=None,
295 copy: bool = False,
296 tz=None,
297 freq: str | BaseOffset | lib.NoDefault | None = lib.no_default,
298 dayfirst: bool = False,
299 yearfirst: bool = False,
300 ambiguous="raise",
301 ):
302 explicit_none = freq is None
303 freq = freq if freq is not lib.no_default else None
305 freq, freq_infer = dtl.maybe_infer_freq(freq)
307 subarr, tz, inferred_freq = _sequence_to_dt64ns(
308 data,
309 dtype=dtype,
310 copy=copy,
311 tz=tz,
312 dayfirst=dayfirst,
313 yearfirst=yearfirst,
314 ambiguous=ambiguous,
315 )
317 freq, freq_infer = dtl.validate_inferred_freq(freq, inferred_freq, freq_infer)
318 if explicit_none:
319 freq = None
321 dtype = tz_to_dtype(tz)
322 result = cls._simple_new(subarr, freq=freq, dtype=dtype)
324 if inferred_freq is None and freq is not None:
325 # this condition precludes `freq_infer`
326 cls._validate_frequency(result, freq, ambiguous=ambiguous)
328 elif freq_infer:
329 # Set _freq directly to bypass duplicative _validate_frequency
330 # check.
331 result._freq = to_offset(result.inferred_freq)
333 return result
335 @classmethod
336 def _generate_range(
337 cls,
338 start,
339 end,
340 periods,
341 freq,
342 tz=None,
343 normalize=False,
344 ambiguous="raise",
345 nonexistent="raise",
346 inclusive="both",
347 ):
349 periods = dtl.validate_periods(periods)
350 if freq is None and any(x is None for x in [periods, start, end]):
351 raise ValueError("Must provide freq argument if no data is supplied")
353 if com.count_not_none(start, end, periods, freq) != 3:
354 raise ValueError(
355 "Of the four parameters: start, end, periods, "
356 "and freq, exactly three must be specified"
357 )
358 freq = to_offset(freq)
360 if start is not None:
361 start = Timestamp(start)
363 if end is not None:
364 end = Timestamp(end)
366 if start is NaT or end is NaT:
367 raise ValueError("Neither `start` nor `end` can be NaT")
369 left_inclusive, right_inclusive = validate_inclusive(inclusive)
370 start, end, _normalized = _maybe_normalize_endpoints(start, end, normalize)
371 tz = _infer_tz_from_endpoints(start, end, tz)
373 if tz is not None:
374 # Localize the start and end arguments
375 start_tz = None if start is None else start.tz
376 end_tz = None if end is None else end.tz
377 start = _maybe_localize_point(
378 start, start_tz, start, freq, tz, ambiguous, nonexistent
379 )
380 end = _maybe_localize_point(
381 end, end_tz, end, freq, tz, ambiguous, nonexistent
382 )
383 if freq is not None:
384 # We break Day arithmetic (fixed 24 hour) here and opt for
385 # Day to mean calendar day (23/24/25 hour). Therefore, strip
386 # tz info from start and day to avoid DST arithmetic
387 if isinstance(freq, Day):
388 if start is not None:
389 start = start.tz_localize(None)
390 if end is not None:
391 end = end.tz_localize(None)
393 if isinstance(freq, Tick):
394 i8values = generate_regular_range(start, end, periods, freq)
395 else:
396 xdr = generate_range(start=start, end=end, periods=periods, offset=freq)
397 i8values = np.array([x.value for x in xdr], dtype=np.int64)
399 endpoint_tz = start.tz if start is not None else end.tz
401 if tz is not None and endpoint_tz is None:
403 if not timezones.is_utc(tz):
404 # short-circuit tz_localize_to_utc which would make
405 # an unnecessary copy with UTC but be a no-op.
406 i8values = tzconversion.tz_localize_to_utc(
407 i8values, tz, ambiguous=ambiguous, nonexistent=nonexistent
408 )
410 # i8values is localized datetime64 array -> have to convert
411 # start/end as well to compare
412 if start is not None:
413 start = start.tz_localize(tz, ambiguous, nonexistent)
414 if end is not None:
415 end = end.tz_localize(tz, ambiguous, nonexistent)
416 else:
417 # Create a linearly spaced date_range in local time
418 # Nanosecond-granularity timestamps aren't always correctly
419 # representable with doubles, so we limit the range that we
420 # pass to np.linspace as much as possible
421 i8values = (
422 np.linspace(0, end.value - start.value, periods, dtype="int64")
423 + start.value
424 )
425 if i8values.dtype != "i8":
426 # 2022-01-09 I (brock) am not sure if it is possible for this
427 # to overflow and cast to e.g. f8, but if it does we need to cast
428 i8values = i8values.astype("i8")
430 if start == end:
431 if not left_inclusive and not right_inclusive:
432 i8values = i8values[1:-1]
433 else:
434 start_i8 = Timestamp(start).value
435 end_i8 = Timestamp(end).value
436 if not left_inclusive or not right_inclusive:
437 if not left_inclusive and len(i8values) and i8values[0] == start_i8:
438 i8values = i8values[1:]
439 if not right_inclusive and len(i8values) and i8values[-1] == end_i8:
440 i8values = i8values[:-1]
442 dt64_values = i8values.view("datetime64[ns]")
443 dtype = tz_to_dtype(tz)
444 return cls._simple_new(dt64_values, freq=freq, dtype=dtype)
446 # -----------------------------------------------------------------
447 # DatetimeLike Interface
449 def _unbox_scalar(self, value, setitem: bool = False) -> np.datetime64:
450 if not isinstance(value, self._scalar_type) and value is not NaT:
451 raise ValueError("'value' should be a Timestamp.")
452 self._check_compatible_with(value, setitem=setitem)
453 return value.asm8
455 def _scalar_from_string(self, value) -> Timestamp | NaTType:
456 return Timestamp(value, tz=self.tz)
458 def _check_compatible_with(self, other, setitem: bool = False):
459 if other is NaT:
460 return
461 self._assert_tzawareness_compat(other)
462 if setitem:
463 # Stricter check for setitem vs comparison methods
464 if self.tz is not None and not timezones.tz_compare(self.tz, other.tz):
465 # TODO(2.0): remove this check. GH#37605
466 warnings.warn(
467 "Setitem-like behavior with mismatched timezones is deprecated "
468 "and will change in a future version. Instead of raising "
469 "(or for Index, Series, and DataFrame methods, coercing to "
470 "object dtype), the value being set (or passed as a "
471 "fill_value, or inserted) will be cast to the existing "
472 "DatetimeArray/DatetimeIndex/Series/DataFrame column's "
473 "timezone. To retain the old behavior, explicitly cast to "
474 "object dtype before the operation.",
475 FutureWarning,
476 stacklevel=find_stack_level(),
477 )
478 raise ValueError(f"Timezones don't match. '{self.tz}' != '{other.tz}'")
480 # -----------------------------------------------------------------
481 # Descriptive Properties
483 def _box_func(self, x: np.datetime64) -> Timestamp | NaTType:
484 # GH#42228
485 value = x.view("i8")
486 ts = Timestamp._from_value_and_reso(value, reso=self._reso, tz=self.tz)
487 # Non-overlapping identity check (left operand type: "Timestamp",
488 # right operand type: "NaTType")
489 if ts is not NaT: # type: ignore[comparison-overlap]
490 # GH#41586
491 # do this instead of passing to the constructor to avoid FutureWarning
492 ts._set_freq(self.freq)
493 return ts
495 @property
496 # error: Return type "Union[dtype, DatetimeTZDtype]" of "dtype"
497 # incompatible with return type "ExtensionDtype" in supertype
498 # "ExtensionArray"
499 def dtype(self) -> np.dtype | DatetimeTZDtype: # type: ignore[override]
500 """
501 The dtype for the DatetimeArray.
503 .. warning::
505 A future version of pandas will change dtype to never be a
506 ``numpy.dtype``. Instead, :attr:`DatetimeArray.dtype` will
507 always be an instance of an ``ExtensionDtype`` subclass.
509 Returns
510 -------
511 numpy.dtype or DatetimeTZDtype
512 If the values are tz-naive, then ``np.dtype('datetime64[ns]')``
513 is returned.
515 If the values are tz-aware, then the ``DatetimeTZDtype``
516 is returned.
517 """
518 return self._dtype
520 @property
521 def tz(self) -> tzinfo | None:
522 """
523 Return the timezone.
525 Returns
526 -------
527 datetime.tzinfo, pytz.tzinfo.BaseTZInfo, dateutil.tz.tz.tzfile, or None
528 Returns None when the array is tz-naive.
529 """
530 # GH 18595
531 return getattr(self.dtype, "tz", None)
533 @tz.setter
534 def tz(self, value):
535 # GH 3746: Prevent localizing or converting the index by setting tz
536 raise AttributeError(
537 "Cannot directly set timezone. Use tz_localize() "
538 "or tz_convert() as appropriate"
539 )
541 @property
542 def tzinfo(self) -> tzinfo | None:
543 """
544 Alias for tz attribute
545 """
546 return self.tz
548 @property # NB: override with cache_readonly in immutable subclasses
549 def is_normalized(self) -> bool:
550 """
551 Returns True if all of the dates are at midnight ("no time")
552 """
553 return is_date_array_normalized(self.asi8, self.tz, reso=self._reso)
555 @property # NB: override with cache_readonly in immutable subclasses
556 def _resolution_obj(self) -> Resolution:
557 return get_resolution(self.asi8, self.tz, reso=self._reso)
559 # ----------------------------------------------------------------
560 # Array-Like / EA-Interface Methods
562 def __array__(self, dtype=None) -> np.ndarray:
563 if dtype is None and self.tz:
564 # The default for tz-aware is object, to preserve tz info
565 dtype = object
567 return super().__array__(dtype=dtype)
569 def __iter__(self):
570 """
571 Return an iterator over the boxed values
573 Yields
574 ------
575 tstamp : Timestamp
576 """
577 if self.ndim > 1:
578 for i in range(len(self)):
579 yield self[i]
580 else:
581 # convert in chunks of 10k for efficiency
582 data = self.asi8
583 length = len(self)
584 chunksize = 10000
585 chunks = (length // chunksize) + 1
587 for i in range(chunks):
588 start_i = i * chunksize
589 end_i = min((i + 1) * chunksize, length)
590 converted = ints_to_pydatetime(
591 data[start_i:end_i],
592 tz=self.tz,
593 freq=self.freq,
594 box="timestamp",
595 reso=self._reso,
596 )
597 yield from converted
599 def astype(self, dtype, copy: bool = True):
600 # We handle
601 # --> datetime
602 # --> period
603 # DatetimeLikeArrayMixin Super handles the rest.
604 dtype = pandas_dtype(dtype)
606 if is_dtype_equal(dtype, self.dtype):
607 if copy:
608 return self.copy()
609 return self
611 elif (
612 self.tz is None
613 and is_datetime64_dtype(dtype)
614 and not is_unitless(dtype)
615 and is_supported_unit(get_unit_from_dtype(dtype))
616 ):
617 # unit conversion e.g. datetime64[s]
618 res_values = astype_overflowsafe(self._ndarray, dtype, copy=True)
619 return type(self)._simple_new(res_values, dtype=res_values.dtype)
620 # TODO: preserve freq?
622 elif is_datetime64_ns_dtype(dtype):
623 return astype_dt64_to_dt64tz(self, dtype, copy, via_utc=False)
625 elif self.tz is not None and isinstance(dtype, DatetimeTZDtype):
626 # tzaware unit conversion e.g. datetime64[s, UTC]
627 np_dtype = np.dtype(dtype.str)
628 res_values = astype_overflowsafe(self._ndarray, np_dtype, copy=copy)
629 return type(self)._simple_new(res_values, dtype=dtype)
630 # TODO: preserve freq?
632 elif (
633 self.tz is None
634 and is_datetime64_dtype(dtype)
635 and dtype != self.dtype
636 and is_unitless(dtype)
637 ):
638 # TODO(2.0): just fall through to dtl.DatetimeLikeArrayMixin.astype
639 warnings.warn(
640 "Passing unit-less datetime64 dtype to .astype is deprecated "
641 "and will raise in a future version. Pass 'datetime64[ns]' instead",
642 FutureWarning,
643 stacklevel=find_stack_level(),
644 )
645 # unit conversion e.g. datetime64[s]
646 return self._ndarray.astype(dtype)
648 elif is_period_dtype(dtype):
649 return self.to_period(freq=dtype.freq)
650 return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy)
652 # -----------------------------------------------------------------
653 # Rendering Methods
655 def _format_native_types(
656 self, *, na_rep="NaT", date_format=None, **kwargs
657 ) -> npt.NDArray[np.object_]:
658 from pandas.io.formats.format import get_format_datetime64_from_values
660 fmt = get_format_datetime64_from_values(self, date_format)
662 return tslib.format_array_from_datetime(
663 self.asi8, tz=self.tz, format=fmt, na_rep=na_rep, reso=self._reso
664 )
666 # -----------------------------------------------------------------
667 # Comparison Methods
669 def _has_same_tz(self, other) -> bool:
671 # vzone shouldn't be None if value is non-datetime like
672 if isinstance(other, np.datetime64):
673 # convert to Timestamp as np.datetime64 doesn't have tz attr
674 other = Timestamp(other)
676 if not hasattr(other, "tzinfo"):
677 return False
678 other_tz = other.tzinfo
679 return timezones.tz_compare(self.tzinfo, other_tz)
681 def _assert_tzawareness_compat(self, other) -> None:
682 # adapted from _Timestamp._assert_tzawareness_compat
683 other_tz = getattr(other, "tzinfo", None)
684 other_dtype = getattr(other, "dtype", None)
686 if is_datetime64tz_dtype(other_dtype):
687 # Get tzinfo from Series dtype
688 other_tz = other.dtype.tz
689 if other is NaT:
690 # pd.NaT quacks both aware and naive
691 pass
692 elif self.tz is None:
693 if other_tz is not None:
694 raise TypeError(
695 "Cannot compare tz-naive and tz-aware datetime-like objects."
696 )
697 elif other_tz is None:
698 raise TypeError(
699 "Cannot compare tz-naive and tz-aware datetime-like objects"
700 )
702 # -----------------------------------------------------------------
703 # Arithmetic Methods
705 def _add_offset(self, offset) -> DatetimeArray:
707 assert not isinstance(offset, Tick)
709 if self.tz is not None:
710 values = self.tz_localize(None)
711 else:
712 values = self
714 try:
715 result = offset._apply_array(values).view(values.dtype)
716 except NotImplementedError:
717 warnings.warn(
718 "Non-vectorized DateOffset being applied to Series or DatetimeIndex.",
719 PerformanceWarning,
720 stacklevel=find_stack_level(),
721 )
722 result = self.astype("O") + offset
723 result = type(self)._from_sequence(result)
724 if not len(self):
725 # GH#30336 _from_sequence won't be able to infer self.tz
726 return result.tz_localize(self.tz)
728 else:
729 result = DatetimeArray._simple_new(result, dtype=result.dtype)
730 if self.tz is not None:
731 # FIXME: tz_localize with non-nano
732 result = result.tz_localize(self.tz)
734 return result
736 # -----------------------------------------------------------------
737 # Timezone Conversion and Localization Methods
739 def _local_timestamps(self) -> npt.NDArray[np.int64]:
740 """
741 Convert to an i8 (unix-like nanosecond timestamp) representation
742 while keeping the local timezone and not using UTC.
743 This is used to calculate time-of-day information as if the timestamps
744 were timezone-naive.
745 """
746 if self.tz is None or timezones.is_utc(self.tz):
747 # Avoid the copy that would be made in tzconversion
748 return self.asi8
749 return tz_convert_from_utc(self.asi8, self.tz, reso=self._reso)
751 def tz_convert(self, tz) -> DatetimeArray:
752 """
753 Convert tz-aware Datetime Array/Index from one time zone to another.
755 Parameters
756 ----------
757 tz : str, pytz.timezone, dateutil.tz.tzfile or None
758 Time zone for time. Corresponding timestamps would be converted
759 to this time zone of the Datetime Array/Index. A `tz` of None will
760 convert to UTC and remove the timezone information.
762 Returns
763 -------
764 Array or Index
766 Raises
767 ------
768 TypeError
769 If Datetime Array/Index is tz-naive.
771 See Also
772 --------
773 DatetimeIndex.tz : A timezone that has a variable offset from UTC.
774 DatetimeIndex.tz_localize : Localize tz-naive DatetimeIndex to a
775 given time zone, or remove timezone from a tz-aware DatetimeIndex.
777 Examples
778 --------
779 With the `tz` parameter, we can change the DatetimeIndex
780 to other time zones:
782 >>> dti = pd.date_range(start='2014-08-01 09:00',
783 ... freq='H', periods=3, tz='Europe/Berlin')
785 >>> dti
786 DatetimeIndex(['2014-08-01 09:00:00+02:00',
787 '2014-08-01 10:00:00+02:00',
788 '2014-08-01 11:00:00+02:00'],
789 dtype='datetime64[ns, Europe/Berlin]', freq='H')
791 >>> dti.tz_convert('US/Central')
792 DatetimeIndex(['2014-08-01 02:00:00-05:00',
793 '2014-08-01 03:00:00-05:00',
794 '2014-08-01 04:00:00-05:00'],
795 dtype='datetime64[ns, US/Central]', freq='H')
797 With the ``tz=None``, we can remove the timezone (after converting
798 to UTC if necessary):
800 >>> dti = pd.date_range(start='2014-08-01 09:00', freq='H',
801 ... periods=3, tz='Europe/Berlin')
803 >>> dti
804 DatetimeIndex(['2014-08-01 09:00:00+02:00',
805 '2014-08-01 10:00:00+02:00',
806 '2014-08-01 11:00:00+02:00'],
807 dtype='datetime64[ns, Europe/Berlin]', freq='H')
809 >>> dti.tz_convert(None)
810 DatetimeIndex(['2014-08-01 07:00:00',
811 '2014-08-01 08:00:00',
812 '2014-08-01 09:00:00'],
813 dtype='datetime64[ns]', freq='H')
814 """
815 tz = timezones.maybe_get_tz(tz)
817 if self.tz is None:
818 # tz naive, use tz_localize
819 raise TypeError(
820 "Cannot convert tz-naive timestamps, use tz_localize to localize"
821 )
823 # No conversion since timestamps are all UTC to begin with
824 dtype = tz_to_dtype(tz, unit=self._unit)
825 return self._simple_new(self._ndarray, dtype=dtype, freq=self.freq)
827 @dtl.ravel_compat
828 def tz_localize(self, tz, ambiguous="raise", nonexistent="raise") -> DatetimeArray:
829 """
830 Localize tz-naive Datetime Array/Index to tz-aware Datetime Array/Index.
832 This method takes a time zone (tz) naive Datetime Array/Index object
833 and makes this time zone aware. It does not move the time to another
834 time zone.
836 This method can also be used to do the inverse -- to create a time
837 zone unaware object from an aware object. To that end, pass `tz=None`.
839 Parameters
840 ----------
841 tz : str, pytz.timezone, dateutil.tz.tzfile or None
842 Time zone to convert timestamps to. Passing ``None`` will
843 remove the time zone information preserving local time.
844 ambiguous : 'infer', 'NaT', bool array, default 'raise'
845 When clocks moved backward due to DST, ambiguous times may arise.
846 For example in Central European Time (UTC+01), when going from
847 03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at
848 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the
849 `ambiguous` parameter dictates how ambiguous times should be
850 handled.
852 - 'infer' will attempt to infer fall dst-transition hours based on
853 order
854 - bool-ndarray where True signifies a DST time, False signifies a
855 non-DST time (note that this flag is only applicable for
856 ambiguous times)
857 - 'NaT' will return NaT where there are ambiguous times
858 - 'raise' will raise an AmbiguousTimeError if there are ambiguous
859 times.
861 nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \
862default 'raise'
863 A nonexistent time does not exist in a particular timezone
864 where clocks moved forward due to DST.
866 - 'shift_forward' will shift the nonexistent time forward to the
867 closest existing time
868 - 'shift_backward' will shift the nonexistent time backward to the
869 closest existing time
870 - 'NaT' will return NaT where there are nonexistent times
871 - timedelta objects will shift nonexistent times by the timedelta
872 - 'raise' will raise an NonExistentTimeError if there are
873 nonexistent times.
875 Returns
876 -------
877 Same type as self
878 Array/Index converted to the specified time zone.
880 Raises
881 ------
882 TypeError
883 If the Datetime Array/Index is tz-aware and tz is not None.
885 See Also
886 --------
887 DatetimeIndex.tz_convert : Convert tz-aware DatetimeIndex from
888 one time zone to another.
890 Examples
891 --------
892 >>> tz_naive = pd.date_range('2018-03-01 09:00', periods=3)
893 >>> tz_naive
894 DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00',
895 '2018-03-03 09:00:00'],
896 dtype='datetime64[ns]', freq='D')
898 Localize DatetimeIndex in US/Eastern time zone:
900 >>> tz_aware = tz_naive.tz_localize(tz='US/Eastern')
901 >>> tz_aware
902 DatetimeIndex(['2018-03-01 09:00:00-05:00',
903 '2018-03-02 09:00:00-05:00',
904 '2018-03-03 09:00:00-05:00'],
905 dtype='datetime64[ns, US/Eastern]', freq=None)
907 With the ``tz=None``, we can remove the time zone information
908 while keeping the local time (not converted to UTC):
910 >>> tz_aware.tz_localize(None)
911 DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00',
912 '2018-03-03 09:00:00'],
913 dtype='datetime64[ns]', freq=None)
915 Be careful with DST changes. When there is sequential data, pandas can
916 infer the DST time:
918 >>> s = pd.to_datetime(pd.Series(['2018-10-28 01:30:00',
919 ... '2018-10-28 02:00:00',
920 ... '2018-10-28 02:30:00',
921 ... '2018-10-28 02:00:00',
922 ... '2018-10-28 02:30:00',
923 ... '2018-10-28 03:00:00',
924 ... '2018-10-28 03:30:00']))
925 >>> s.dt.tz_localize('CET', ambiguous='infer')
926 0 2018-10-28 01:30:00+02:00
927 1 2018-10-28 02:00:00+02:00
928 2 2018-10-28 02:30:00+02:00
929 3 2018-10-28 02:00:00+01:00
930 4 2018-10-28 02:30:00+01:00
931 5 2018-10-28 03:00:00+01:00
932 6 2018-10-28 03:30:00+01:00
933 dtype: datetime64[ns, CET]
935 In some cases, inferring the DST is impossible. In such cases, you can
936 pass an ndarray to the ambiguous parameter to set the DST explicitly
938 >>> s = pd.to_datetime(pd.Series(['2018-10-28 01:20:00',
939 ... '2018-10-28 02:36:00',
940 ... '2018-10-28 03:46:00']))
941 >>> s.dt.tz_localize('CET', ambiguous=np.array([True, True, False]))
942 0 2018-10-28 01:20:00+02:00
943 1 2018-10-28 02:36:00+02:00
944 2 2018-10-28 03:46:00+01:00
945 dtype: datetime64[ns, CET]
947 If the DST transition causes nonexistent times, you can shift these
948 dates forward or backwards with a timedelta object or `'shift_forward'`
949 or `'shift_backwards'`.
951 >>> s = pd.to_datetime(pd.Series(['2015-03-29 02:30:00',
952 ... '2015-03-29 03:30:00']))
953 >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_forward')
954 0 2015-03-29 03:00:00+02:00
955 1 2015-03-29 03:30:00+02:00
956 dtype: datetime64[ns, Europe/Warsaw]
958 >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_backward')
959 0 2015-03-29 01:59:59.999999999+01:00
960 1 2015-03-29 03:30:00+02:00
961 dtype: datetime64[ns, Europe/Warsaw]
963 >>> s.dt.tz_localize('Europe/Warsaw', nonexistent=pd.Timedelta('1H'))
964 0 2015-03-29 03:30:00+02:00
965 1 2015-03-29 03:30:00+02:00
966 dtype: datetime64[ns, Europe/Warsaw]
967 """
968 nonexistent_options = ("raise", "NaT", "shift_forward", "shift_backward")
969 if nonexistent not in nonexistent_options and not isinstance(
970 nonexistent, timedelta
971 ):
972 raise ValueError(
973 "The nonexistent argument must be one of 'raise', "
974 "'NaT', 'shift_forward', 'shift_backward' or "
975 "a timedelta object"
976 )
978 if self.tz is not None:
979 if tz is None:
980 new_dates = tz_convert_from_utc(self.asi8, self.tz)
981 else:
982 raise TypeError("Already tz-aware, use tz_convert to convert.")
983 else:
984 tz = timezones.maybe_get_tz(tz)
985 # Convert to UTC
987 new_dates = tzconversion.tz_localize_to_utc(
988 self.asi8,
989 tz,
990 ambiguous=ambiguous,
991 nonexistent=nonexistent,
992 reso=self._reso,
993 )
994 new_dates = new_dates.view(f"M8[{self._unit}]")
995 dtype = tz_to_dtype(tz, unit=self._unit)
997 freq = None
998 if timezones.is_utc(tz) or (len(self) == 1 and not isna(new_dates[0])):
999 # we can preserve freq
1000 # TODO: Also for fixed-offsets
1001 freq = self.freq
1002 elif tz is None and self.tz is None:
1003 # no-op
1004 freq = self.freq
1005 return self._simple_new(new_dates, dtype=dtype, freq=freq)
1007 # ----------------------------------------------------------------
1008 # Conversion Methods - Vectorized analogues of Timestamp methods
1010 def to_pydatetime(self) -> npt.NDArray[np.object_]:
1011 """
1012 Return an ndarray of datetime.datetime objects.
1014 Returns
1015 -------
1016 datetimes : ndarray[object]
1017 """
1018 return ints_to_pydatetime(self.asi8, tz=self.tz, reso=self._reso)
1020 def normalize(self) -> DatetimeArray:
1021 """
1022 Convert times to midnight.
1024 The time component of the date-time is converted to midnight i.e.
1025 00:00:00. This is useful in cases, when the time does not matter.
1026 Length is unaltered. The timezones are unaffected.
1028 This method is available on Series with datetime values under
1029 the ``.dt`` accessor, and directly on Datetime Array/Index.
1031 Returns
1032 -------
1033 DatetimeArray, DatetimeIndex or Series
1034 The same type as the original data. Series will have the same
1035 name and index. DatetimeIndex will have the same name.
1037 See Also
1038 --------
1039 floor : Floor the datetimes to the specified freq.
1040 ceil : Ceil the datetimes to the specified freq.
1041 round : Round the datetimes to the specified freq.
1043 Examples
1044 --------
1045 >>> idx = pd.date_range(start='2014-08-01 10:00', freq='H',
1046 ... periods=3, tz='Asia/Calcutta')
1047 >>> idx
1048 DatetimeIndex(['2014-08-01 10:00:00+05:30',
1049 '2014-08-01 11:00:00+05:30',
1050 '2014-08-01 12:00:00+05:30'],
1051 dtype='datetime64[ns, Asia/Calcutta]', freq='H')
1052 >>> idx.normalize()
1053 DatetimeIndex(['2014-08-01 00:00:00+05:30',
1054 '2014-08-01 00:00:00+05:30',
1055 '2014-08-01 00:00:00+05:30'],
1056 dtype='datetime64[ns, Asia/Calcutta]', freq=None)
1057 """
1058 new_values = normalize_i8_timestamps(self.asi8, self.tz, reso=self._reso)
1059 dt64_values = new_values.view(self._ndarray.dtype)
1061 dta = type(self)._simple_new(dt64_values, dtype=dt64_values.dtype)
1062 dta = dta._with_freq("infer")
1063 if self.tz is not None:
1064 dta = dta.tz_localize(self.tz)
1065 return dta
1067 def to_period(self, freq=None) -> PeriodArray:
1068 """
1069 Cast to PeriodArray/Index at a particular frequency.
1071 Converts DatetimeArray/Index to PeriodArray/Index.
1073 Parameters
1074 ----------
1075 freq : str or Offset, optional
1076 One of pandas' :ref:`offset strings <timeseries.offset_aliases>`
1077 or an Offset object. Will be inferred by default.
1079 Returns
1080 -------
1081 PeriodArray/Index
1083 Raises
1084 ------
1085 ValueError
1086 When converting a DatetimeArray/Index with non-regular values,
1087 so that a frequency cannot be inferred.
1089 See Also
1090 --------
1091 PeriodIndex: Immutable ndarray holding ordinal values.
1092 DatetimeIndex.to_pydatetime: Return DatetimeIndex as object.
1094 Examples
1095 --------
1096 >>> df = pd.DataFrame({"y": [1, 2, 3]},
1097 ... index=pd.to_datetime(["2000-03-31 00:00:00",
1098 ... "2000-05-31 00:00:00",
1099 ... "2000-08-31 00:00:00"]))
1100 >>> df.index.to_period("M")
1101 PeriodIndex(['2000-03', '2000-05', '2000-08'],
1102 dtype='period[M]')
1104 Infer the daily frequency
1106 >>> idx = pd.date_range("2017-01-01", periods=2)
1107 >>> idx.to_period()
1108 PeriodIndex(['2017-01-01', '2017-01-02'],
1109 dtype='period[D]')
1110 """
1111 from pandas.core.arrays import PeriodArray
1113 if self.tz is not None:
1114 warnings.warn(
1115 "Converting to PeriodArray/Index representation "
1116 "will drop timezone information.",
1117 UserWarning,
1118 stacklevel=find_stack_level(),
1119 )
1121 if freq is None:
1122 freq = self.freqstr or self.inferred_freq
1124 if freq is None:
1125 raise ValueError(
1126 "You must pass a freq argument as current index has none."
1127 )
1129 res = get_period_alias(freq)
1131 # https://github.com/pandas-dev/pandas/issues/33358
1132 if res is None:
1133 res = freq
1135 freq = res
1137 return PeriodArray._from_datetime64(self._ndarray, freq, tz=self.tz)
1139 def to_perioddelta(self, freq) -> TimedeltaArray:
1140 """
1141 Calculate deltas between self values and self converted to Periods at a freq.
1143 Used for vectorized offsets.
1145 Parameters
1146 ----------
1147 freq : Period frequency
1149 Returns
1150 -------
1151 TimedeltaArray/Index
1152 """
1153 # Deprecaation GH#34853
1154 warnings.warn(
1155 "to_perioddelta is deprecated and will be removed in a "
1156 "future version. "
1157 "Use `dtindex - dtindex.to_period(freq).to_timestamp()` instead.",
1158 FutureWarning,
1159 # stacklevel chosen to be correct for when called from DatetimeIndex
1160 stacklevel=find_stack_level(),
1161 )
1162 from pandas.core.arrays.timedeltas import TimedeltaArray
1164 if self._ndarray.dtype != "M8[ns]":
1165 raise NotImplementedError("Only supported for nanosecond resolution.")
1167 i8delta = self.asi8 - self.to_period(freq).to_timestamp().asi8
1168 m8delta = i8delta.view("m8[ns]")
1169 return TimedeltaArray(m8delta)
1171 # -----------------------------------------------------------------
1172 # Properties - Vectorized Timestamp Properties/Methods
1174 def month_name(self, locale=None) -> npt.NDArray[np.object_]:
1175 """
1176 Return the month names with specified locale.
1178 Parameters
1179 ----------
1180 locale : str, optional
1181 Locale determining the language in which to return the month name.
1182 Default is English locale.
1184 Returns
1185 -------
1186 Series or Index
1187 Series or Index of month names.
1189 Examples
1190 --------
1191 >>> s = pd.Series(pd.date_range(start='2018-01', freq='M', periods=3))
1192 >>> s
1193 0 2018-01-31
1194 1 2018-02-28
1195 2 2018-03-31
1196 dtype: datetime64[ns]
1197 >>> s.dt.month_name()
1198 0 January
1199 1 February
1200 2 March
1201 dtype: object
1203 >>> idx = pd.date_range(start='2018-01', freq='M', periods=3)
1204 >>> idx
1205 DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31'],
1206 dtype='datetime64[ns]', freq='M')
1207 >>> idx.month_name()
1208 Index(['January', 'February', 'March'], dtype='object')
1209 """
1210 values = self._local_timestamps()
1212 result = fields.get_date_name_field(
1213 values, "month_name", locale=locale, reso=self._reso
1214 )
1215 result = self._maybe_mask_results(result, fill_value=None)
1216 return result
1218 def day_name(self, locale=None) -> npt.NDArray[np.object_]:
1219 """
1220 Return the day names with specified locale.
1222 Parameters
1223 ----------
1224 locale : str, optional
1225 Locale determining the language in which to return the day name.
1226 Default is English locale.
1228 Returns
1229 -------
1230 Series or Index
1231 Series or Index of day names.
1233 Examples
1234 --------
1235 >>> s = pd.Series(pd.date_range(start='2018-01-01', freq='D', periods=3))
1236 >>> s
1237 0 2018-01-01
1238 1 2018-01-02
1239 2 2018-01-03
1240 dtype: datetime64[ns]
1241 >>> s.dt.day_name()
1242 0 Monday
1243 1 Tuesday
1244 2 Wednesday
1245 dtype: object
1247 >>> idx = pd.date_range(start='2018-01-01', freq='D', periods=3)
1248 >>> idx
1249 DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'],
1250 dtype='datetime64[ns]', freq='D')
1251 >>> idx.day_name()
1252 Index(['Monday', 'Tuesday', 'Wednesday'], dtype='object')
1253 """
1254 values = self._local_timestamps()
1256 result = fields.get_date_name_field(
1257 values, "day_name", locale=locale, reso=self._reso
1258 )
1259 result = self._maybe_mask_results(result, fill_value=None)
1260 return result
1262 @property
1263 def time(self) -> npt.NDArray[np.object_]:
1264 """
1265 Returns numpy array of :class:`datetime.time` objects.
1267 The time part of the Timestamps.
1268 """
1269 # If the Timestamps have a timezone that is not UTC,
1270 # convert them into their i8 representation while
1271 # keeping their timezone and not using UTC
1272 timestamps = self._local_timestamps()
1274 return ints_to_pydatetime(timestamps, box="time", reso=self._reso)
1276 @property
1277 def timetz(self) -> npt.NDArray[np.object_]:
1278 """
1279 Returns numpy array of :class:`datetime.time` objects with timezones.
1281 The time part of the Timestamps.
1282 """
1283 return ints_to_pydatetime(self.asi8, self.tz, box="time", reso=self._reso)
1285 @property
1286 def date(self) -> npt.NDArray[np.object_]:
1287 """
1288 Returns numpy array of python :class:`datetime.date` objects.
1290 Namely, the date part of Timestamps without time and
1291 timezone information.
1292 """
1293 # If the Timestamps have a timezone that is not UTC,
1294 # convert them into their i8 representation while
1295 # keeping their timezone and not using UTC
1296 timestamps = self._local_timestamps()
1298 return ints_to_pydatetime(timestamps, box="date", reso=self._reso)
1300 def isocalendar(self) -> DataFrame:
1301 """
1302 Calculate year, week, and day according to the ISO 8601 standard.
1304 .. versionadded:: 1.1.0
1306 Returns
1307 -------
1308 DataFrame
1309 With columns year, week and day.
1311 See Also
1312 --------
1313 Timestamp.isocalendar : Function return a 3-tuple containing ISO year,
1314 week number, and weekday for the given Timestamp object.
1315 datetime.date.isocalendar : Return a named tuple object with
1316 three components: year, week and weekday.
1318 Examples
1319 --------
1320 >>> idx = pd.date_range(start='2019-12-29', freq='D', periods=4)
1321 >>> idx.isocalendar()
1322 year week day
1323 2019-12-29 2019 52 7
1324 2019-12-30 2020 1 1
1325 2019-12-31 2020 1 2
1326 2020-01-01 2020 1 3
1327 >>> idx.isocalendar().week
1328 2019-12-29 52
1329 2019-12-30 1
1330 2019-12-31 1
1331 2020-01-01 1
1332 Freq: D, Name: week, dtype: UInt32
1333 """
1334 from pandas import DataFrame
1336 values = self._local_timestamps()
1337 sarray = fields.build_isocalendar_sarray(values, reso=self._reso)
1338 iso_calendar_df = DataFrame(
1339 sarray, columns=["year", "week", "day"], dtype="UInt32"
1340 )
1341 if self._hasna:
1342 iso_calendar_df.iloc[self._isnan] = None
1343 return iso_calendar_df
1345 @property
1346 def weekofyear(self):
1347 """
1348 The week ordinal of the year.
1350 .. deprecated:: 1.1.0
1352 weekofyear and week have been deprecated.
1353 Please use DatetimeIndex.isocalendar().week instead.
1354 """
1355 warnings.warn(
1356 "weekofyear and week have been deprecated, please use "
1357 "DatetimeIndex.isocalendar().week instead, which returns "
1358 "a Series. To exactly reproduce the behavior of week and "
1359 "weekofyear and return an Index, you may call "
1360 "pd.Int64Index(idx.isocalendar().week)",
1361 FutureWarning,
1362 stacklevel=find_stack_level(),
1363 )
1364 week_series = self.isocalendar().week
1365 if week_series.hasnans:
1366 return week_series.to_numpy(dtype="float64", na_value=np.nan)
1367 return week_series.to_numpy(dtype="int64")
1369 week = weekofyear
1371 year = _field_accessor(
1372 "year",
1373 "Y",
1374 """
1375 The year of the datetime.
1377 Examples
1378 --------
1379 >>> datetime_series = pd.Series(
1380 ... pd.date_range("2000-01-01", periods=3, freq="Y")
1381 ... )
1382 >>> datetime_series
1383 0 2000-12-31
1384 1 2001-12-31
1385 2 2002-12-31
1386 dtype: datetime64[ns]
1387 >>> datetime_series.dt.year
1388 0 2000
1389 1 2001
1390 2 2002
1391 dtype: int64
1392 """,
1393 )
1394 month = _field_accessor(
1395 "month",
1396 "M",
1397 """
1398 The month as January=1, December=12.
1400 Examples
1401 --------
1402 >>> datetime_series = pd.Series(
1403 ... pd.date_range("2000-01-01", periods=3, freq="M")
1404 ... )
1405 >>> datetime_series
1406 0 2000-01-31
1407 1 2000-02-29
1408 2 2000-03-31
1409 dtype: datetime64[ns]
1410 >>> datetime_series.dt.month
1411 0 1
1412 1 2
1413 2 3
1414 dtype: int64
1415 """,
1416 )
1417 day = _field_accessor(
1418 "day",
1419 "D",
1420 """
1421 The day of the datetime.
1423 Examples
1424 --------
1425 >>> datetime_series = pd.Series(
1426 ... pd.date_range("2000-01-01", periods=3, freq="D")
1427 ... )
1428 >>> datetime_series
1429 0 2000-01-01
1430 1 2000-01-02
1431 2 2000-01-03
1432 dtype: datetime64[ns]
1433 >>> datetime_series.dt.day
1434 0 1
1435 1 2
1436 2 3
1437 dtype: int64
1438 """,
1439 )
1440 hour = _field_accessor(
1441 "hour",
1442 "h",
1443 """
1444 The hours of the datetime.
1446 Examples
1447 --------
1448 >>> datetime_series = pd.Series(
1449 ... pd.date_range("2000-01-01", periods=3, freq="h")
1450 ... )
1451 >>> datetime_series
1452 0 2000-01-01 00:00:00
1453 1 2000-01-01 01:00:00
1454 2 2000-01-01 02:00:00
1455 dtype: datetime64[ns]
1456 >>> datetime_series.dt.hour
1457 0 0
1458 1 1
1459 2 2
1460 dtype: int64
1461 """,
1462 )
1463 minute = _field_accessor(
1464 "minute",
1465 "m",
1466 """
1467 The minutes of the datetime.
1469 Examples
1470 --------
1471 >>> datetime_series = pd.Series(
1472 ... pd.date_range("2000-01-01", periods=3, freq="T")
1473 ... )
1474 >>> datetime_series
1475 0 2000-01-01 00:00:00
1476 1 2000-01-01 00:01:00
1477 2 2000-01-01 00:02:00
1478 dtype: datetime64[ns]
1479 >>> datetime_series.dt.minute
1480 0 0
1481 1 1
1482 2 2
1483 dtype: int64
1484 """,
1485 )
1486 second = _field_accessor(
1487 "second",
1488 "s",
1489 """
1490 The seconds of the datetime.
1492 Examples
1493 --------
1494 >>> datetime_series = pd.Series(
1495 ... pd.date_range("2000-01-01", periods=3, freq="s")
1496 ... )
1497 >>> datetime_series
1498 0 2000-01-01 00:00:00
1499 1 2000-01-01 00:00:01
1500 2 2000-01-01 00:00:02
1501 dtype: datetime64[ns]
1502 >>> datetime_series.dt.second
1503 0 0
1504 1 1
1505 2 2
1506 dtype: int64
1507 """,
1508 )
1509 microsecond = _field_accessor(
1510 "microsecond",
1511 "us",
1512 """
1513 The microseconds of the datetime.
1515 Examples
1516 --------
1517 >>> datetime_series = pd.Series(
1518 ... pd.date_range("2000-01-01", periods=3, freq="us")
1519 ... )
1520 >>> datetime_series
1521 0 2000-01-01 00:00:00.000000
1522 1 2000-01-01 00:00:00.000001
1523 2 2000-01-01 00:00:00.000002
1524 dtype: datetime64[ns]
1525 >>> datetime_series.dt.microsecond
1526 0 0
1527 1 1
1528 2 2
1529 dtype: int64
1530 """,
1531 )
1532 nanosecond = _field_accessor(
1533 "nanosecond",
1534 "ns",
1535 """
1536 The nanoseconds of the datetime.
1538 Examples
1539 --------
1540 >>> datetime_series = pd.Series(
1541 ... pd.date_range("2000-01-01", periods=3, freq="ns")
1542 ... )
1543 >>> datetime_series
1544 0 2000-01-01 00:00:00.000000000
1545 1 2000-01-01 00:00:00.000000001
1546 2 2000-01-01 00:00:00.000000002
1547 dtype: datetime64[ns]
1548 >>> datetime_series.dt.nanosecond
1549 0 0
1550 1 1
1551 2 2
1552 dtype: int64
1553 """,
1554 )
1555 _dayofweek_doc = """
1556 The day of the week with Monday=0, Sunday=6.
1558 Return the day of the week. It is assumed the week starts on
1559 Monday, which is denoted by 0 and ends on Sunday which is denoted
1560 by 6. This method is available on both Series with datetime
1561 values (using the `dt` accessor) or DatetimeIndex.
1563 Returns
1564 -------
1565 Series or Index
1566 Containing integers indicating the day number.
1568 See Also
1569 --------
1570 Series.dt.dayofweek : Alias.
1571 Series.dt.weekday : Alias.
1572 Series.dt.day_name : Returns the name of the day of the week.
1574 Examples
1575 --------
1576 >>> s = pd.date_range('2016-12-31', '2017-01-08', freq='D').to_series()
1577 >>> s.dt.dayofweek
1578 2016-12-31 5
1579 2017-01-01 6
1580 2017-01-02 0
1581 2017-01-03 1
1582 2017-01-04 2
1583 2017-01-05 3
1584 2017-01-06 4
1585 2017-01-07 5
1586 2017-01-08 6
1587 Freq: D, dtype: int64
1588 """
1589 day_of_week = _field_accessor("day_of_week", "dow", _dayofweek_doc)
1590 dayofweek = day_of_week
1591 weekday = day_of_week
1593 day_of_year = _field_accessor(
1594 "dayofyear",
1595 "doy",
1596 """
1597 The ordinal day of the year.
1598 """,
1599 )
1600 dayofyear = day_of_year
1601 quarter = _field_accessor(
1602 "quarter",
1603 "q",
1604 """
1605 The quarter of the date.
1606 """,
1607 )
1608 days_in_month = _field_accessor(
1609 "days_in_month",
1610 "dim",
1611 """
1612 The number of days in the month.
1613 """,
1614 )
1615 daysinmonth = days_in_month
1616 _is_month_doc = """
1617 Indicates whether the date is the {first_or_last} day of the month.
1619 Returns
1620 -------
1621 Series or array
1622 For Series, returns a Series with boolean values.
1623 For DatetimeIndex, returns a boolean array.
1625 See Also
1626 --------
1627 is_month_start : Return a boolean indicating whether the date
1628 is the first day of the month.
1629 is_month_end : Return a boolean indicating whether the date
1630 is the last day of the month.
1632 Examples
1633 --------
1634 This method is available on Series with datetime values under
1635 the ``.dt`` accessor, and directly on DatetimeIndex.
1637 >>> s = pd.Series(pd.date_range("2018-02-27", periods=3))
1638 >>> s
1639 0 2018-02-27
1640 1 2018-02-28
1641 2 2018-03-01
1642 dtype: datetime64[ns]
1643 >>> s.dt.is_month_start
1644 0 False
1645 1 False
1646 2 True
1647 dtype: bool
1648 >>> s.dt.is_month_end
1649 0 False
1650 1 True
1651 2 False
1652 dtype: bool
1654 >>> idx = pd.date_range("2018-02-27", periods=3)
1655 >>> idx.is_month_start
1656 array([False, False, True])
1657 >>> idx.is_month_end
1658 array([False, True, False])
1659 """
1660 is_month_start = _field_accessor(
1661 "is_month_start", "is_month_start", _is_month_doc.format(first_or_last="first")
1662 )
1664 is_month_end = _field_accessor(
1665 "is_month_end", "is_month_end", _is_month_doc.format(first_or_last="last")
1666 )
1668 is_quarter_start = _field_accessor(
1669 "is_quarter_start",
1670 "is_quarter_start",
1671 """
1672 Indicator for whether the date is the first day of a quarter.
1674 Returns
1675 -------
1676 is_quarter_start : Series or DatetimeIndex
1677 The same type as the original data with boolean values. Series will
1678 have the same name and index. DatetimeIndex will have the same
1679 name.
1681 See Also
1682 --------
1683 quarter : Return the quarter of the date.
1684 is_quarter_end : Similar property for indicating the quarter start.
1686 Examples
1687 --------
1688 This method is available on Series with datetime values under
1689 the ``.dt`` accessor, and directly on DatetimeIndex.
1691 >>> df = pd.DataFrame({'dates': pd.date_range("2017-03-30",
1692 ... periods=4)})
1693 >>> df.assign(quarter=df.dates.dt.quarter,
1694 ... is_quarter_start=df.dates.dt.is_quarter_start)
1695 dates quarter is_quarter_start
1696 0 2017-03-30 1 False
1697 1 2017-03-31 1 False
1698 2 2017-04-01 2 True
1699 3 2017-04-02 2 False
1701 >>> idx = pd.date_range('2017-03-30', periods=4)
1702 >>> idx
1703 DatetimeIndex(['2017-03-30', '2017-03-31', '2017-04-01', '2017-04-02'],
1704 dtype='datetime64[ns]', freq='D')
1706 >>> idx.is_quarter_start
1707 array([False, False, True, False])
1708 """,
1709 )
1710 is_quarter_end = _field_accessor(
1711 "is_quarter_end",
1712 "is_quarter_end",
1713 """
1714 Indicator for whether the date is the last day of a quarter.
1716 Returns
1717 -------
1718 is_quarter_end : Series or DatetimeIndex
1719 The same type as the original data with boolean values. Series will
1720 have the same name and index. DatetimeIndex will have the same
1721 name.
1723 See Also
1724 --------
1725 quarter : Return the quarter of the date.
1726 is_quarter_start : Similar property indicating the quarter start.
1728 Examples
1729 --------
1730 This method is available on Series with datetime values under
1731 the ``.dt`` accessor, and directly on DatetimeIndex.
1733 >>> df = pd.DataFrame({'dates': pd.date_range("2017-03-30",
1734 ... periods=4)})
1735 >>> df.assign(quarter=df.dates.dt.quarter,
1736 ... is_quarter_end=df.dates.dt.is_quarter_end)
1737 dates quarter is_quarter_end
1738 0 2017-03-30 1 False
1739 1 2017-03-31 1 True
1740 2 2017-04-01 2 False
1741 3 2017-04-02 2 False
1743 >>> idx = pd.date_range('2017-03-30', periods=4)
1744 >>> idx
1745 DatetimeIndex(['2017-03-30', '2017-03-31', '2017-04-01', '2017-04-02'],
1746 dtype='datetime64[ns]', freq='D')
1748 >>> idx.is_quarter_end
1749 array([False, True, False, False])
1750 """,
1751 )
1752 is_year_start = _field_accessor(
1753 "is_year_start",
1754 "is_year_start",
1755 """
1756 Indicate whether the date is the first day of a year.
1758 Returns
1759 -------
1760 Series or DatetimeIndex
1761 The same type as the original data with boolean values. Series will
1762 have the same name and index. DatetimeIndex will have the same
1763 name.
1765 See Also
1766 --------
1767 is_year_end : Similar property indicating the last day of the year.
1769 Examples
1770 --------
1771 This method is available on Series with datetime values under
1772 the ``.dt`` accessor, and directly on DatetimeIndex.
1774 >>> dates = pd.Series(pd.date_range("2017-12-30", periods=3))
1775 >>> dates
1776 0 2017-12-30
1777 1 2017-12-31
1778 2 2018-01-01
1779 dtype: datetime64[ns]
1781 >>> dates.dt.is_year_start
1782 0 False
1783 1 False
1784 2 True
1785 dtype: bool
1787 >>> idx = pd.date_range("2017-12-30", periods=3)
1788 >>> idx
1789 DatetimeIndex(['2017-12-30', '2017-12-31', '2018-01-01'],
1790 dtype='datetime64[ns]', freq='D')
1792 >>> idx.is_year_start
1793 array([False, False, True])
1794 """,
1795 )
1796 is_year_end = _field_accessor(
1797 "is_year_end",
1798 "is_year_end",
1799 """
1800 Indicate whether the date is the last day of the year.
1802 Returns
1803 -------
1804 Series or DatetimeIndex
1805 The same type as the original data with boolean values. Series will
1806 have the same name and index. DatetimeIndex will have the same
1807 name.
1809 See Also
1810 --------
1811 is_year_start : Similar property indicating the start of the year.
1813 Examples
1814 --------
1815 This method is available on Series with datetime values under
1816 the ``.dt`` accessor, and directly on DatetimeIndex.
1818 >>> dates = pd.Series(pd.date_range("2017-12-30", periods=3))
1819 >>> dates
1820 0 2017-12-30
1821 1 2017-12-31
1822 2 2018-01-01
1823 dtype: datetime64[ns]
1825 >>> dates.dt.is_year_end
1826 0 False
1827 1 True
1828 2 False
1829 dtype: bool
1831 >>> idx = pd.date_range("2017-12-30", periods=3)
1832 >>> idx
1833 DatetimeIndex(['2017-12-30', '2017-12-31', '2018-01-01'],
1834 dtype='datetime64[ns]', freq='D')
1836 >>> idx.is_year_end
1837 array([False, True, False])
1838 """,
1839 )
1840 is_leap_year = _field_accessor(
1841 "is_leap_year",
1842 "is_leap_year",
1843 """
1844 Boolean indicator if the date belongs to a leap year.
1846 A leap year is a year, which has 366 days (instead of 365) including
1847 29th of February as an intercalary day.
1848 Leap years are years which are multiples of four with the exception
1849 of years divisible by 100 but not by 400.
1851 Returns
1852 -------
1853 Series or ndarray
1854 Booleans indicating if dates belong to a leap year.
1856 Examples
1857 --------
1858 This method is available on Series with datetime values under
1859 the ``.dt`` accessor, and directly on DatetimeIndex.
1861 >>> idx = pd.date_range("2012-01-01", "2015-01-01", freq="Y")
1862 >>> idx
1863 DatetimeIndex(['2012-12-31', '2013-12-31', '2014-12-31'],
1864 dtype='datetime64[ns]', freq='A-DEC')
1865 >>> idx.is_leap_year
1866 array([ True, False, False])
1868 >>> dates_series = pd.Series(idx)
1869 >>> dates_series
1870 0 2012-12-31
1871 1 2013-12-31
1872 2 2014-12-31
1873 dtype: datetime64[ns]
1874 >>> dates_series.dt.is_leap_year
1875 0 True
1876 1 False
1877 2 False
1878 dtype: bool
1879 """,
1880 )
1882 def to_julian_date(self) -> npt.NDArray[np.float64]:
1883 """
1884 Convert Datetime Array to float64 ndarray of Julian Dates.
1885 0 Julian date is noon January 1, 4713 BC.
1886 https://en.wikipedia.org/wiki/Julian_day
1887 """
1889 # http://mysite.verizon.net/aesir_research/date/jdalg2.htm
1890 year = np.asarray(self.year)
1891 month = np.asarray(self.month)
1892 day = np.asarray(self.day)
1893 testarr = month < 3
1894 year[testarr] -= 1
1895 month[testarr] += 12
1896 return (
1897 day
1898 + np.fix((153 * month - 457) / 5)
1899 + 365 * year
1900 + np.floor(year / 4)
1901 - np.floor(year / 100)
1902 + np.floor(year / 400)
1903 + 1_721_118.5
1904 + (
1905 self.hour
1906 + self.minute / 60
1907 + self.second / 3600
1908 + self.microsecond / 3600 / 10**6
1909 + self.nanosecond / 3600 / 10**9
1910 )
1911 / 24
1912 )
1914 # -----------------------------------------------------------------
1915 # Reductions
1917 def std(
1918 self,
1919 axis=None,
1920 dtype=None,
1921 out=None,
1922 ddof: int = 1,
1923 keepdims: bool = False,
1924 skipna: bool = True,
1925 ):
1926 """
1927 Return sample standard deviation over requested axis.
1929 Normalized by N-1 by default. This can be changed using the ddof argument
1931 Parameters
1932 ----------
1933 axis : int optional, default None
1934 Axis for the function to be applied on.
1935 For `Series` this parameter is unused and defaults to `None`.
1936 ddof : int, default 1
1937 Degrees of Freedom. The divisor used in calculations is N - ddof,
1938 where N represents the number of elements.
1939 skipna : bool, default True
1940 Exclude NA/null values. If an entire row/column is NA, the result will be
1941 NA.
1943 Returns
1944 -------
1945 Timedelta
1946 """
1947 # Because std is translation-invariant, we can get self.std
1948 # by calculating (self - Timestamp(0)).std, and we can do it
1949 # without creating a copy by using a view on self._ndarray
1950 from pandas.core.arrays import TimedeltaArray
1952 # Find the td64 dtype with the same resolution as our dt64 dtype
1953 dtype_str = self._ndarray.dtype.name.replace("datetime64", "timedelta64")
1954 dtype = np.dtype(dtype_str)
1956 tda = TimedeltaArray._simple_new(self._ndarray.view(dtype), dtype=dtype)
1958 return tda.std(axis=axis, out=out, ddof=ddof, keepdims=keepdims, skipna=skipna)
1961# -------------------------------------------------------------------
1962# Constructor Helpers
1965def sequence_to_datetimes(data, require_iso8601: bool = False) -> DatetimeArray:
1966 """
1967 Parse/convert the passed data to either DatetimeArray or np.ndarray[object].
1968 """
1969 result, tz, freq = _sequence_to_dt64ns(
1970 data,
1971 allow_mixed=True,
1972 require_iso8601=require_iso8601,
1973 )
1975 dtype = tz_to_dtype(tz)
1976 dta = DatetimeArray._simple_new(result, freq=freq, dtype=dtype)
1977 return dta
1980def _sequence_to_dt64ns(
1981 data,
1982 dtype=None,
1983 copy: bool = False,
1984 tz=None,
1985 dayfirst: bool = False,
1986 yearfirst: bool = False,
1987 ambiguous="raise",
1988 *,
1989 allow_mixed: bool = False,
1990 require_iso8601: bool = False,
1991):
1992 """
1993 Parameters
1994 ----------
1995 data : list-like
1996 dtype : dtype, str, or None, default None
1997 copy : bool, default False
1998 tz : tzinfo, str, or None, default None
1999 dayfirst : bool, default False
2000 yearfirst : bool, default False
2001 ambiguous : str, bool, or arraylike, default 'raise'
2002 See pandas._libs.tslibs.tzconversion.tz_localize_to_utc.
2003 allow_mixed : bool, default False
2004 Interpret integers as timestamps when datetime objects are also present.
2005 require_iso8601 : bool, default False
2006 Only consider ISO-8601 formats when parsing strings.
2008 Returns
2009 -------
2010 result : numpy.ndarray
2011 The sequence converted to a numpy array with dtype ``datetime64[ns]``.
2012 tz : tzinfo or None
2013 Either the user-provided tzinfo or one inferred from the data.
2014 inferred_freq : Tick or None
2015 The inferred frequency of the sequence.
2017 Raises
2018 ------
2019 TypeError : PeriodDType data is passed
2020 """
2022 inferred_freq = None
2024 dtype = _validate_dt64_dtype(dtype)
2025 tz = timezones.maybe_get_tz(tz)
2027 # if dtype has an embedded tz, capture it
2028 tz = validate_tz_from_dtype(dtype, tz)
2030 data, copy = dtl.ensure_arraylike_for_datetimelike(
2031 data, copy, cls_name="DatetimeArray"
2032 )
2034 if isinstance(data, DatetimeArray):
2035 inferred_freq = data.freq
2037 # By this point we are assured to have either a numpy array or Index
2038 data, copy = maybe_convert_dtype(data, copy, tz=tz)
2039 data_dtype = getattr(data, "dtype", None)
2041 if (
2042 is_object_dtype(data_dtype)
2043 or is_string_dtype(data_dtype)
2044 or is_sparse(data_dtype)
2045 ):
2046 # TODO: We do not have tests specific to string-dtypes,
2047 # also complex or categorical or other extension
2048 copy = False
2049 if lib.infer_dtype(data, skipna=False) == "integer":
2050 data = data.astype(np.int64)
2051 else:
2052 # data comes back here as either i8 to denote UTC timestamps
2053 # or M8[ns] to denote wall times
2054 data, inferred_tz = objects_to_datetime64ns(
2055 data,
2056 dayfirst=dayfirst,
2057 yearfirst=yearfirst,
2058 allow_object=False,
2059 allow_mixed=allow_mixed,
2060 require_iso8601=require_iso8601,
2061 )
2062 if tz and inferred_tz:
2063 # two timezones: convert to intended from base UTC repr
2064 if data.dtype == "i8":
2065 # GH#42505
2066 # by convention, these are _already_ UTC, e.g
2067 return data.view(DT64NS_DTYPE), tz, None
2069 if timezones.is_utc(tz):
2070 # Fastpath, avoid copy made in tzconversion
2071 utc_vals = data.view("i8")
2072 else:
2073 utc_vals = tz_convert_from_utc(data.view("i8"), tz)
2074 data = utc_vals.view(DT64NS_DTYPE)
2075 elif inferred_tz:
2076 tz = inferred_tz
2078 data_dtype = data.dtype
2080 # `data` may have originally been a Categorical[datetime64[ns, tz]],
2081 # so we need to handle these types.
2082 if is_datetime64tz_dtype(data_dtype):
2083 # DatetimeArray -> ndarray
2084 tz = _maybe_infer_tz(tz, data.tz)
2085 result = data._ndarray
2087 elif is_datetime64_dtype(data_dtype):
2088 # tz-naive DatetimeArray or ndarray[datetime64]
2089 data = getattr(data, "_ndarray", data)
2090 if data.dtype != DT64NS_DTYPE:
2091 data = astype_overflowsafe(data, dtype=DT64NS_DTYPE)
2092 copy = False
2094 if tz is not None:
2095 # Convert tz-naive to UTC
2096 tz = timezones.maybe_get_tz(tz)
2097 # TODO: if tz is UTC, are there situations where we *don't* want a
2098 # copy? tz_localize_to_utc always makes one.
2099 data = tzconversion.tz_localize_to_utc(
2100 data.view("i8"), tz, ambiguous=ambiguous
2101 )
2102 data = data.view(DT64NS_DTYPE)
2104 assert data.dtype == DT64NS_DTYPE, data.dtype
2105 result = data
2107 else:
2108 # must be integer dtype otherwise
2109 # assume this data are epoch timestamps
2110 if tz:
2111 tz = timezones.maybe_get_tz(tz)
2113 if data.dtype != INT64_DTYPE:
2114 data = data.astype(np.int64, copy=False)
2115 result = data.view(DT64NS_DTYPE)
2117 if copy:
2118 result = result.copy()
2120 assert isinstance(result, np.ndarray), type(result)
2121 assert result.dtype == "M8[ns]", result.dtype
2123 # We have to call this again after possibly inferring a tz above
2124 validate_tz_from_dtype(dtype, tz)
2126 return result, tz, inferred_freq
2129def objects_to_datetime64ns(
2130 data: np.ndarray,
2131 dayfirst,
2132 yearfirst,
2133 utc=False,
2134 errors="raise",
2135 require_iso8601: bool = False,
2136 allow_object: bool = False,
2137 allow_mixed: bool = False,
2138):
2139 """
2140 Convert data to array of timestamps.
2142 Parameters
2143 ----------
2144 data : np.ndarray[object]
2145 dayfirst : bool
2146 yearfirst : bool
2147 utc : bool, default False
2148 Whether to convert timezone-aware timestamps to UTC.
2149 errors : {'raise', 'ignore', 'coerce'}
2150 require_iso8601 : bool, default False
2151 allow_object : bool
2152 Whether to return an object-dtype ndarray instead of raising if the
2153 data contains more than one timezone.
2154 allow_mixed : bool, default False
2155 Interpret integers as timestamps when datetime objects are also present.
2157 Returns
2158 -------
2159 result : ndarray
2160 np.int64 dtype if returned values represent UTC timestamps
2161 np.datetime64[ns] if returned values represent wall times
2162 object if mixed timezones
2163 inferred_tz : tzinfo or None
2165 Raises
2166 ------
2167 ValueError : if data cannot be converted to datetimes
2168 """
2169 assert errors in ["raise", "ignore", "coerce"]
2171 # if str-dtype, convert
2172 data = np.array(data, copy=False, dtype=np.object_)
2174 flags = data.flags
2175 order: Literal["F", "C"] = "F" if flags.f_contiguous else "C"
2176 try:
2177 result, tz_parsed = tslib.array_to_datetime(
2178 data.ravel("K"),
2179 errors=errors,
2180 utc=utc,
2181 dayfirst=dayfirst,
2182 yearfirst=yearfirst,
2183 require_iso8601=require_iso8601,
2184 allow_mixed=allow_mixed,
2185 )
2186 result = result.reshape(data.shape, order=order)
2187 except OverflowError as err:
2188 # Exception is raised when a part of date is greater than 32 bit signed int
2189 raise OutOfBoundsDatetime("Out of bounds nanosecond timestamp") from err
2191 if tz_parsed is not None:
2192 # We can take a shortcut since the datetime64 numpy array
2193 # is in UTC
2194 # Return i8 values to denote unix timestamps
2195 return result.view("i8"), tz_parsed
2196 elif is_datetime64_dtype(result):
2197 # returning M8[ns] denotes wall-times; since tz is None
2198 # the distinction is a thin one
2199 return result, tz_parsed
2200 elif is_object_dtype(result):
2201 # GH#23675 when called via `pd.to_datetime`, returning an object-dtype
2202 # array is allowed. When called via `pd.DatetimeIndex`, we can
2203 # only accept datetime64 dtype, so raise TypeError if object-dtype
2204 # is returned, as that indicates the values can be recognized as
2205 # datetimes but they have conflicting timezones/awareness
2206 if allow_object:
2207 return result, tz_parsed
2208 raise TypeError(result)
2209 else: # pragma: no cover
2210 # GH#23675 this TypeError should never be hit, whereas the TypeError
2211 # in the object-dtype branch above is reachable.
2212 raise TypeError(result)
2215def maybe_convert_dtype(data, copy: bool, tz: tzinfo | None = None):
2216 """
2217 Convert data based on dtype conventions, issuing deprecation warnings
2218 or errors where appropriate.
2220 Parameters
2221 ----------
2222 data : np.ndarray or pd.Index
2223 copy : bool
2224 tz : tzinfo or None, default None
2226 Returns
2227 -------
2228 data : np.ndarray or pd.Index
2229 copy : bool
2231 Raises
2232 ------
2233 TypeError : PeriodDType data is passed
2234 """
2235 if not hasattr(data, "dtype"):
2236 # e.g. collections.deque
2237 return data, copy
2239 if is_float_dtype(data.dtype):
2240 # Note: we must cast to datetime64[ns] here in order to treat these
2241 # as wall-times instead of UTC timestamps.
2242 data = data.astype(DT64NS_DTYPE)
2243 copy = False
2244 if (
2245 tz is not None
2246 and len(data) > 0
2247 and not timezones.is_utc(timezones.maybe_get_tz(tz))
2248 ):
2249 # GH#23675, GH#45573 deprecate to treat symmetrically with integer dtypes
2250 warnings.warn(
2251 "The behavior of DatetimeArray._from_sequence with a timezone-aware "
2252 "dtype and floating-dtype data is deprecated. In a future version, "
2253 "this data will be interpreted as nanosecond UTC timestamps "
2254 "instead of wall-times, matching the behavior with integer dtypes. "
2255 "To retain the old behavior, explicitly cast to 'datetime64[ns]' "
2256 "before passing the data to pandas. To get the future behavior, "
2257 "first cast to 'int64'.",
2258 FutureWarning,
2259 stacklevel=find_stack_level(),
2260 )
2262 elif is_timedelta64_dtype(data.dtype) or is_bool_dtype(data.dtype):
2263 # GH#29794 enforcing deprecation introduced in GH#23539
2264 raise TypeError(f"dtype {data.dtype} cannot be converted to datetime64[ns]")
2265 elif is_period_dtype(data.dtype):
2266 # Note: without explicitly raising here, PeriodIndex
2267 # test_setops.test_join_does_not_recur fails
2268 raise TypeError(
2269 "Passing PeriodDtype data is invalid. Use `data.to_timestamp()` instead"
2270 )
2272 elif is_extension_array_dtype(data.dtype) and not is_datetime64tz_dtype(data.dtype):
2273 # TODO: We have no tests for these
2274 data = np.array(data, dtype=np.object_)
2275 copy = False
2277 return data, copy
2280# -------------------------------------------------------------------
2281# Validation and Inference
2284def _maybe_infer_tz(tz: tzinfo | None, inferred_tz: tzinfo | None) -> tzinfo | None:
2285 """
2286 If a timezone is inferred from data, check that it is compatible with
2287 the user-provided timezone, if any.
2289 Parameters
2290 ----------
2291 tz : tzinfo or None
2292 inferred_tz : tzinfo or None
2294 Returns
2295 -------
2296 tz : tzinfo or None
2298 Raises
2299 ------
2300 TypeError : if both timezones are present but do not match
2301 """
2302 if tz is None:
2303 tz = inferred_tz
2304 elif inferred_tz is None:
2305 pass
2306 elif not timezones.tz_compare(tz, inferred_tz):
2307 raise TypeError(
2308 f"data is already tz-aware {inferred_tz}, unable to "
2309 f"set specified tz: {tz}"
2310 )
2311 return tz
2314def _validate_dt64_dtype(dtype):
2315 """
2316 Check that a dtype, if passed, represents either a numpy datetime64[ns]
2317 dtype or a pandas DatetimeTZDtype.
2319 Parameters
2320 ----------
2321 dtype : object
2323 Returns
2324 -------
2325 dtype : None, numpy.dtype, or DatetimeTZDtype
2327 Raises
2328 ------
2329 ValueError : invalid dtype
2331 Notes
2332 -----
2333 Unlike validate_tz_from_dtype, this does _not_ allow non-existent
2334 tz errors to go through
2335 """
2336 if dtype is not None:
2337 dtype = pandas_dtype(dtype)
2338 if is_dtype_equal(dtype, np.dtype("M8")):
2339 # no precision, disallowed GH#24806
2340 msg = (
2341 "Passing in 'datetime64' dtype with no precision is not allowed. "
2342 "Please pass in 'datetime64[ns]' instead."
2343 )
2344 raise ValueError(msg)
2346 if (isinstance(dtype, np.dtype) and dtype != DT64NS_DTYPE) or not isinstance(
2347 dtype, (np.dtype, DatetimeTZDtype)
2348 ):
2349 raise ValueError(
2350 f"Unexpected value for 'dtype': '{dtype}'. "
2351 "Must be 'datetime64[ns]' or DatetimeTZDtype'."
2352 )
2354 if getattr(dtype, "tz", None):
2355 # https://github.com/pandas-dev/pandas/issues/18595
2356 # Ensure that we have a standard timezone for pytz objects.
2357 # Without this, things like adding an array of timedeltas and
2358 # a tz-aware Timestamp (with a tz specific to its datetime) will
2359 # be incorrect(ish?) for the array as a whole
2360 dtype = cast(DatetimeTZDtype, dtype)
2361 dtype = DatetimeTZDtype(tz=timezones.tz_standardize(dtype.tz))
2363 return dtype
2366def validate_tz_from_dtype(dtype, tz: tzinfo | None) -> tzinfo | None:
2367 """
2368 If the given dtype is a DatetimeTZDtype, extract the implied
2369 tzinfo object from it and check that it does not conflict with the given
2370 tz.
2372 Parameters
2373 ----------
2374 dtype : dtype, str
2375 tz : None, tzinfo
2377 Returns
2378 -------
2379 tz : consensus tzinfo
2381 Raises
2382 ------
2383 ValueError : on tzinfo mismatch
2384 """
2385 if dtype is not None:
2386 if isinstance(dtype, str):
2387 try:
2388 dtype = DatetimeTZDtype.construct_from_string(dtype)
2389 except TypeError:
2390 # Things like `datetime64[ns]`, which is OK for the
2391 # constructors, but also nonsense, which should be validated
2392 # but not by us. We *do* allow non-existent tz errors to
2393 # go through
2394 pass
2395 dtz = getattr(dtype, "tz", None)
2396 if dtz is not None:
2397 if tz is not None and not timezones.tz_compare(tz, dtz):
2398 raise ValueError("cannot supply both a tz and a dtype with a tz")
2399 tz = dtz
2401 if tz is not None and is_datetime64_dtype(dtype):
2402 # We also need to check for the case where the user passed a
2403 # tz-naive dtype (i.e. datetime64[ns])
2404 if tz is not None and not timezones.tz_compare(tz, dtz):
2405 raise ValueError(
2406 "cannot supply both a tz and a "
2407 "timezone-naive dtype (i.e. datetime64[ns])"
2408 )
2410 return tz
2413def _infer_tz_from_endpoints(
2414 start: Timestamp, end: Timestamp, tz: tzinfo | None
2415) -> tzinfo | None:
2416 """
2417 If a timezone is not explicitly given via `tz`, see if one can
2418 be inferred from the `start` and `end` endpoints. If more than one
2419 of these inputs provides a timezone, require that they all agree.
2421 Parameters
2422 ----------
2423 start : Timestamp
2424 end : Timestamp
2425 tz : tzinfo or None
2427 Returns
2428 -------
2429 tz : tzinfo or None
2431 Raises
2432 ------
2433 TypeError : if start and end timezones do not agree
2434 """
2435 try:
2436 inferred_tz = timezones.infer_tzinfo(start, end)
2437 except AssertionError as err:
2438 # infer_tzinfo raises AssertionError if passed mismatched timezones
2439 raise TypeError(
2440 "Start and end cannot both be tz-aware with different timezones"
2441 ) from err
2443 inferred_tz = timezones.maybe_get_tz(inferred_tz)
2444 tz = timezones.maybe_get_tz(tz)
2446 if tz is not None and inferred_tz is not None:
2447 if not timezones.tz_compare(inferred_tz, tz):
2448 raise AssertionError("Inferred time zone not equal to passed time zone")
2450 elif inferred_tz is not None:
2451 tz = inferred_tz
2453 return tz
2456def _maybe_normalize_endpoints(
2457 start: Timestamp | None, end: Timestamp | None, normalize: bool
2458):
2459 _normalized = True
2461 if start is not None:
2462 if normalize:
2463 start = start.normalize()
2464 _normalized = True
2465 else:
2466 _normalized = _normalized and start.time() == _midnight
2468 if end is not None:
2469 if normalize:
2470 end = end.normalize()
2471 _normalized = True
2472 else:
2473 _normalized = _normalized and end.time() == _midnight
2475 return start, end, _normalized
2478def _maybe_localize_point(ts, is_none, is_not_none, freq, tz, ambiguous, nonexistent):
2479 """
2480 Localize a start or end Timestamp to the timezone of the corresponding
2481 start or end Timestamp
2483 Parameters
2484 ----------
2485 ts : start or end Timestamp to potentially localize
2486 is_none : argument that should be None
2487 is_not_none : argument that should not be None
2488 freq : Tick, DateOffset, or None
2489 tz : str, timezone object or None
2490 ambiguous: str, localization behavior for ambiguous times
2491 nonexistent: str, localization behavior for nonexistent times
2493 Returns
2494 -------
2495 ts : Timestamp
2496 """
2497 # Make sure start and end are timezone localized if:
2498 # 1) freq = a Timedelta-like frequency (Tick)
2499 # 2) freq = None i.e. generating a linspaced range
2500 if is_none is None and is_not_none is not None:
2501 # Note: We can't ambiguous='infer' a singular ambiguous time; however,
2502 # we have historically defaulted ambiguous=False
2503 ambiguous = ambiguous if ambiguous != "infer" else False
2504 localize_args = {"ambiguous": ambiguous, "nonexistent": nonexistent, "tz": None}
2505 if isinstance(freq, Tick) or freq is None:
2506 localize_args["tz"] = tz
2507 ts = ts.tz_localize(**localize_args)
2508 return ts
2511def generate_range(start=None, end=None, periods=None, offset=BDay()):
2512 """
2513 Generates a sequence of dates corresponding to the specified time
2514 offset. Similar to dateutil.rrule except uses pandas DateOffset
2515 objects to represent time increments.
2517 Parameters
2518 ----------
2519 start : datetime, (default None)
2520 end : datetime, (default None)
2521 periods : int, (default None)
2522 offset : DateOffset, (default BDay())
2524 Notes
2525 -----
2526 * This method is faster for generating weekdays than dateutil.rrule
2527 * At least two of (start, end, periods) must be specified.
2528 * If both start and end are specified, the returned dates will
2529 satisfy start <= date <= end.
2531 Returns
2532 -------
2533 dates : generator object
2534 """
2535 offset = to_offset(offset)
2537 start = Timestamp(start)
2538 start = start if start is not NaT else None
2539 end = Timestamp(end)
2540 end = end if end is not NaT else None
2542 if start and not offset.is_on_offset(start):
2543 start = offset.rollforward(start)
2545 elif end and not offset.is_on_offset(end):
2546 end = offset.rollback(end)
2548 if periods is None and end < start and offset.n >= 0:
2549 end = None
2550 periods = 0
2552 if end is None:
2553 end = start + (periods - 1) * offset
2555 if start is None:
2556 start = end - (periods - 1) * offset
2558 cur = start
2559 if offset.n >= 0:
2560 while cur <= end:
2561 yield cur
2563 if cur == end:
2564 # GH#24252 avoid overflows by not performing the addition
2565 # in offset.apply unless we have to
2566 break
2568 # faster than cur + offset
2569 next_date = offset._apply(cur)
2570 if next_date <= cur:
2571 raise ValueError(f"Offset {offset} did not increment date")
2572 cur = next_date
2573 else:
2574 while cur >= end:
2575 yield cur
2577 if cur == end:
2578 # GH#24252 avoid overflows by not performing the addition
2579 # in offset.apply unless we have to
2580 break
2582 # faster than cur + offset
2583 next_date = offset._apply(cur)
2584 if next_date >= cur:
2585 raise ValueError(f"Offset {offset} did not decrement date")
2586 cur = next_date