Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/indexes/period.py: 28%
200 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1from __future__ import annotations
3from datetime import (
4 datetime,
5 timedelta,
6)
7from typing import Hashable
8import warnings
10import numpy as np
12from pandas._libs import (
13 index as libindex,
14 lib,
15)
16from pandas._libs.tslibs import (
17 BaseOffset,
18 NaT,
19 Period,
20 Resolution,
21 Tick,
22)
23from pandas._typing import (
24 Dtype,
25 DtypeObj,
26 npt,
27)
28from pandas.util._decorators import (
29 cache_readonly,
30 doc,
31)
32from pandas.util._exceptions import find_stack_level
34from pandas.core.dtypes.common import (
35 is_datetime64_any_dtype,
36 is_integer,
37 pandas_dtype,
38)
39from pandas.core.dtypes.dtypes import PeriodDtype
40from pandas.core.dtypes.missing import is_valid_na_for_dtype
42from pandas.core.arrays.period import (
43 PeriodArray,
44 period_array,
45 raise_on_incompatible,
46 validate_dtype_freq,
47)
48import pandas.core.common as com
49import pandas.core.indexes.base as ibase
50from pandas.core.indexes.base import maybe_extract_name
51from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin
52from pandas.core.indexes.datetimes import (
53 DatetimeIndex,
54 Index,
55)
56from pandas.core.indexes.extension import inherit_names
57from pandas.core.indexes.numeric import Int64Index
59_index_doc_kwargs = dict(ibase._index_doc_kwargs)
60_index_doc_kwargs.update({"target_klass": "PeriodIndex or list of Periods"})
61_shared_doc_kwargs = {
62 "klass": "PeriodArray",
63}
65# --- Period index sketch
68def _new_PeriodIndex(cls, **d):
69 # GH13277 for unpickling
70 values = d.pop("data")
71 if values.dtype == "int64":
72 freq = d.pop("freq", None)
73 values = PeriodArray(values, freq=freq)
74 return cls._simple_new(values, **d)
75 else:
76 return cls(values, **d)
79@inherit_names(
80 ["strftime", "start_time", "end_time"] + PeriodArray._field_ops,
81 PeriodArray,
82 wrap=True,
83)
84@inherit_names(["is_leap_year", "_format_native_types"], PeriodArray)
85class PeriodIndex(DatetimeIndexOpsMixin):
86 """
87 Immutable ndarray holding ordinal values indicating regular periods in time.
89 Index keys are boxed to Period objects which carries the metadata (eg,
90 frequency information).
92 Parameters
93 ----------
94 data : array-like (1d int np.ndarray or PeriodArray), optional
95 Optional period-like data to construct index with.
96 copy : bool
97 Make a copy of input ndarray.
98 freq : str or period object, optional
99 One of pandas period strings or corresponding objects.
100 year : int, array, or Series, default None
101 month : int, array, or Series, default None
102 quarter : int, array, or Series, default None
103 day : int, array, or Series, default None
104 hour : int, array, or Series, default None
105 minute : int, array, or Series, default None
106 second : int, array, or Series, default None
107 dtype : str or PeriodDtype, default None
109 Attributes
110 ----------
111 day
112 dayofweek
113 day_of_week
114 dayofyear
115 day_of_year
116 days_in_month
117 daysinmonth
118 end_time
119 freq
120 freqstr
121 hour
122 is_leap_year
123 minute
124 month
125 quarter
126 qyear
127 second
128 start_time
129 week
130 weekday
131 weekofyear
132 year
134 Methods
135 -------
136 asfreq
137 strftime
138 to_timestamp
140 See Also
141 --------
142 Index : The base pandas Index type.
143 Period : Represents a period of time.
144 DatetimeIndex : Index with datetime64 data.
145 TimedeltaIndex : Index of timedelta64 data.
146 period_range : Create a fixed-frequency PeriodIndex.
148 Examples
149 --------
150 >>> idx = pd.PeriodIndex(year=[2000, 2002], quarter=[1, 3])
151 >>> idx
152 PeriodIndex(['2000Q1', '2002Q3'], dtype='period[Q-DEC]')
153 """
155 _typ = "periodindex"
157 _data: PeriodArray
158 freq: BaseOffset
159 dtype: PeriodDtype
161 _data_cls = PeriodArray
162 _supports_partial_string_indexing = True
164 @property
165 def _engine_type(self) -> type[libindex.PeriodEngine]:
166 return libindex.PeriodEngine
168 @cache_readonly
169 # Signature of "_resolution_obj" incompatible with supertype "DatetimeIndexOpsMixin"
170 def _resolution_obj(self) -> Resolution: # type: ignore[override]
171 # for compat with DatetimeIndex
172 return self.dtype._resolution_obj
174 # --------------------------------------------------------------------
175 # methods that dispatch to array and wrap result in Index
176 # These are defined here instead of via inherit_names for mypy
178 @doc(
179 PeriodArray.asfreq,
180 other="pandas.arrays.PeriodArray",
181 other_name="PeriodArray",
182 **_shared_doc_kwargs,
183 )
184 def asfreq(self, freq=None, how: str = "E") -> PeriodIndex:
185 arr = self._data.asfreq(freq, how)
186 return type(self)._simple_new(arr, name=self.name)
188 @doc(PeriodArray.to_timestamp)
189 def to_timestamp(self, freq=None, how: str = "start") -> DatetimeIndex:
190 arr = self._data.to_timestamp(freq, how)
191 return DatetimeIndex._simple_new(arr, name=self.name)
193 # https://github.com/python/mypy/issues/1362
194 # error: Decorated property not supported
195 @property # type: ignore[misc]
196 @doc(PeriodArray.hour.fget)
197 def hour(self) -> Int64Index:
198 return Int64Index(self._data.hour, name=self.name)
200 # https://github.com/python/mypy/issues/1362
201 # error: Decorated property not supported
202 @property # type: ignore[misc]
203 @doc(PeriodArray.minute.fget)
204 def minute(self) -> Int64Index:
205 return Int64Index(self._data.minute, name=self.name)
207 # https://github.com/python/mypy/issues/1362
208 # error: Decorated property not supported
209 @property # type: ignore[misc]
210 @doc(PeriodArray.second.fget)
211 def second(self) -> Int64Index:
212 return Int64Index(self._data.second, name=self.name)
214 # ------------------------------------------------------------------------
215 # Index Constructors
217 def __new__(
218 cls,
219 data=None,
220 ordinal=None,
221 freq=None,
222 dtype: Dtype | None = None,
223 copy: bool = False,
224 name: Hashable = None,
225 **fields,
226 ) -> PeriodIndex:
228 valid_field_set = {
229 "year",
230 "month",
231 "day",
232 "quarter",
233 "hour",
234 "minute",
235 "second",
236 }
238 if not set(fields).issubset(valid_field_set):
239 argument = list(set(fields) - valid_field_set)[0]
240 raise TypeError(f"__new__() got an unexpected keyword argument {argument}")
242 name = maybe_extract_name(name, data, cls)
244 if data is None and ordinal is None:
245 # range-based.
246 if not fields:
247 # test_pickle_compat_construction
248 raise cls._scalar_data_error(None)
250 data, freq2 = PeriodArray._generate_range(None, None, None, freq, fields)
251 # PeriodArray._generate range does validation that fields is
252 # empty when really using the range-based constructor.
253 freq = freq2
255 data = PeriodArray(data, freq=freq)
256 else:
257 freq = validate_dtype_freq(dtype, freq)
259 # PeriodIndex allow PeriodIndex(period_index, freq=different)
260 # Let's not encourage that kind of behavior in PeriodArray.
262 if freq and isinstance(data, cls) and data.freq != freq:
263 # TODO: We can do some of these with no-copy / coercion?
264 # e.g. D -> 2D seems to be OK
265 data = data.asfreq(freq)
267 if data is None and ordinal is not None:
268 # we strangely ignore `ordinal` if data is passed.
269 ordinal = np.asarray(ordinal, dtype=np.int64)
270 data = PeriodArray(ordinal, freq=freq)
271 else:
272 # don't pass copy here, since we copy later.
273 data = period_array(data=data, freq=freq)
275 if copy:
276 data = data.copy()
278 return cls._simple_new(data, name=name)
280 # ------------------------------------------------------------------------
281 # Data
283 @property
284 def values(self) -> np.ndarray:
285 return np.asarray(self, dtype=object)
287 def _maybe_convert_timedelta(self, other) -> int | npt.NDArray[np.int64]:
288 """
289 Convert timedelta-like input to an integer multiple of self.freq
291 Parameters
292 ----------
293 other : timedelta, np.timedelta64, DateOffset, int, np.ndarray
295 Returns
296 -------
297 converted : int, np.ndarray[int64]
299 Raises
300 ------
301 IncompatibleFrequency : if the input cannot be written as a multiple
302 of self.freq. Note IncompatibleFrequency subclasses ValueError.
303 """
304 if isinstance(other, (timedelta, np.timedelta64, Tick, np.ndarray)):
305 if isinstance(self.freq, Tick):
306 # _check_timedeltalike_freq_compat will raise if incompatible
307 delta = self._data._check_timedeltalike_freq_compat(other)
308 return delta
309 elif isinstance(other, BaseOffset):
310 if other.base == self.freq.base:
311 return other.n
313 raise raise_on_incompatible(self, other)
314 elif is_integer(other):
315 # integer is passed to .shift via
316 # _add_datetimelike_methods basically
317 # but ufunc may pass integer to _add_delta
318 return other
320 # raise when input doesn't have freq
321 raise raise_on_incompatible(self, None)
323 def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
324 """
325 Can we compare values of the given dtype to our own?
326 """
327 if not isinstance(dtype, PeriodDtype):
328 return False
329 # For the subset of DateOffsets that can be a dtype.freq, it
330 # suffices (and is much faster) to compare the dtype_code rather than
331 # the freq itself.
332 # See also: PeriodDtype.__eq__
333 freq = dtype.freq
334 own_freq = self.freq
335 return (
336 freq._period_dtype_code
337 # error: "BaseOffset" has no attribute "_period_dtype_code"
338 == own_freq._period_dtype_code # type: ignore[attr-defined]
339 and freq.n == own_freq.n
340 )
342 # ------------------------------------------------------------------------
343 # Index Methods
345 def asof_locs(self, where: Index, mask: npt.NDArray[np.bool_]) -> np.ndarray:
346 """
347 where : array of timestamps
348 mask : np.ndarray[bool]
349 Array of booleans where data is not NA.
350 """
351 if isinstance(where, DatetimeIndex):
352 where = PeriodIndex(where._values, freq=self.freq)
353 elif not isinstance(where, PeriodIndex):
354 raise TypeError("asof_locs `where` must be DatetimeIndex or PeriodIndex")
356 return super().asof_locs(where, mask)
358 @doc(Index.astype)
359 def astype(self, dtype, copy: bool = True, how=lib.no_default):
360 dtype = pandas_dtype(dtype)
362 if how is not lib.no_default:
363 # GH#37982
364 warnings.warn(
365 "The 'how' keyword in PeriodIndex.astype is deprecated and "
366 "will be removed in a future version. "
367 "Use index.to_timestamp(how=how) instead.",
368 FutureWarning,
369 stacklevel=find_stack_level(),
370 )
371 else:
372 how = "start"
374 if is_datetime64_any_dtype(dtype):
375 # 'how' is index-specific, isn't part of the EA interface.
376 # GH#45038 implement this for PeriodArray (but without "how")
377 # once the "how" deprecation is enforced we can just dispatch
378 # directly to PeriodArray.
379 tz = getattr(dtype, "tz", None)
380 return self.to_timestamp(how=how).tz_localize(tz)
382 return super().astype(dtype, copy=copy)
384 @property
385 def is_full(self) -> bool:
386 """
387 Returns True if this PeriodIndex is range-like in that all Periods
388 between start and end are present, in order.
389 """
390 if len(self) == 0:
391 return True
392 if not self.is_monotonic_increasing:
393 raise ValueError("Index is not monotonic")
394 values = self.asi8
395 return ((values[1:] - values[:-1]) < 2).all()
397 @property
398 def inferred_type(self) -> str:
399 # b/c data is represented as ints make sure we can't have ambiguous
400 # indexing
401 return "period"
403 # ------------------------------------------------------------------------
404 # Indexing Methods
406 def _convert_tolerance(self, tolerance, target):
407 # Returned tolerance must be in dtype/units so that
408 # `|self._get_engine_target() - target._engine_target()| <= tolerance`
409 # is meaningful. Since PeriodIndex returns int64 for engine_target,
410 # we may need to convert timedelta64 tolerance to int64.
411 tolerance = super()._convert_tolerance(tolerance, target)
413 if self.dtype == target.dtype:
414 # convert tolerance to i8
415 tolerance = self._maybe_convert_timedelta(tolerance)
417 return tolerance
419 def get_loc(self, key, method=None, tolerance=None):
420 """
421 Get integer location for requested label.
423 Parameters
424 ----------
425 key : Period, NaT, str, or datetime
426 String or datetime key must be parsable as Period.
428 Returns
429 -------
430 loc : int or ndarray[int64]
432 Raises
433 ------
434 KeyError
435 Key is not present in the index.
436 TypeError
437 If key is listlike or otherwise not hashable.
438 """
439 orig_key = key
441 self._check_indexing_error(key)
443 if is_valid_na_for_dtype(key, self.dtype):
444 key = NaT
446 elif isinstance(key, str):
448 try:
449 parsed, reso = self._parse_with_reso(key)
450 except ValueError as err:
451 # A string with invalid format
452 raise KeyError(f"Cannot interpret '{key}' as period") from err
454 if self._can_partial_date_slice(reso):
455 try:
456 return self._partial_date_slice(reso, parsed)
457 except KeyError as err:
458 # TODO: pass if method is not None, like DTI does?
459 raise KeyError(key) from err
461 if reso == self._resolution_obj:
462 # the reso < self._resolution_obj case goes
463 # through _get_string_slice
464 key = self._cast_partial_indexing_scalar(key)
465 loc = self.get_loc(key, method=method, tolerance=tolerance)
466 # Recursing instead of falling through matters for the exception
467 # message in test_get_loc3 (though not clear if that really matters)
468 return loc
469 elif method is None:
470 raise KeyError(key)
471 else:
472 key = self._cast_partial_indexing_scalar(parsed)
474 elif isinstance(key, Period):
475 key = self._maybe_cast_for_get_loc(key)
477 elif isinstance(key, datetime):
478 key = self._cast_partial_indexing_scalar(key)
480 else:
481 # in particular integer, which Period constructor would cast to string
482 raise KeyError(key)
484 try:
485 return Index.get_loc(self, key, method, tolerance)
486 except KeyError as err:
487 raise KeyError(orig_key) from err
489 def _maybe_cast_for_get_loc(self, key: Period) -> Period:
490 # name is a misnomer, chosen for compat with DatetimeIndex
491 sfreq = self.freq
492 kfreq = key.freq
493 if not (
494 sfreq.n == kfreq.n
495 # error: "BaseOffset" has no attribute "_period_dtype_code"
496 and sfreq._period_dtype_code # type: ignore[attr-defined]
497 # error: "BaseOffset" has no attribute "_period_dtype_code"
498 == kfreq._period_dtype_code # type: ignore[attr-defined]
499 ):
500 # GH#42247 For the subset of DateOffsets that can be Period freqs,
501 # checking these two attributes is sufficient to check equality,
502 # and much more performant than `self.freq == key.freq`
503 raise KeyError(key)
504 return key
506 def _cast_partial_indexing_scalar(self, label):
507 try:
508 key = Period(label, freq=self.freq)
509 except ValueError as err:
510 # we cannot construct the Period
511 raise KeyError(label) from err
512 return key
514 @doc(DatetimeIndexOpsMixin._maybe_cast_slice_bound)
515 def _maybe_cast_slice_bound(self, label, side: str, kind=lib.no_default):
516 if isinstance(label, datetime):
517 label = self._cast_partial_indexing_scalar(label)
519 return super()._maybe_cast_slice_bound(label, side, kind=kind)
521 def _parsed_string_to_bounds(self, reso: Resolution, parsed: datetime):
522 iv = Period(parsed, freq=reso.attr_abbrev)
523 return (iv.asfreq(self.freq, how="start"), iv.asfreq(self.freq, how="end"))
526def period_range(
527 start=None, end=None, periods: int | None = None, freq=None, name=None
528) -> PeriodIndex:
529 """
530 Return a fixed frequency PeriodIndex.
532 The day (calendar) is the default frequency.
534 Parameters
535 ----------
536 start : str or period-like, default None
537 Left bound for generating periods.
538 end : str or period-like, default None
539 Right bound for generating periods.
540 periods : int, default None
541 Number of periods to generate.
542 freq : str or DateOffset, optional
543 Frequency alias. By default the freq is taken from `start` or `end`
544 if those are Period objects. Otherwise, the default is ``"D"`` for
545 daily frequency.
546 name : str, default None
547 Name of the resulting PeriodIndex.
549 Returns
550 -------
551 PeriodIndex
553 Notes
554 -----
555 Of the three parameters: ``start``, ``end``, and ``periods``, exactly two
556 must be specified.
558 To learn more about the frequency strings, please see `this link
559 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
561 Examples
562 --------
563 >>> pd.period_range(start='2017-01-01', end='2018-01-01', freq='M')
564 PeriodIndex(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05', '2017-06',
565 '2017-07', '2017-08', '2017-09', '2017-10', '2017-11', '2017-12',
566 '2018-01'],
567 dtype='period[M]')
569 If ``start`` or ``end`` are ``Period`` objects, they will be used as anchor
570 endpoints for a ``PeriodIndex`` with frequency matching that of the
571 ``period_range`` constructor.
573 >>> pd.period_range(start=pd.Period('2017Q1', freq='Q'),
574 ... end=pd.Period('2017Q2', freq='Q'), freq='M')
575 PeriodIndex(['2017-03', '2017-04', '2017-05', '2017-06'],
576 dtype='period[M]')
577 """
578 if com.count_not_none(start, end, periods) != 2:
579 raise ValueError(
580 "Of the three parameters: start, end, and periods, "
581 "exactly two must be specified"
582 )
583 if freq is None and (not isinstance(start, Period) and not isinstance(end, Period)):
584 freq = "D"
586 data, freq = PeriodArray._generate_range(start, end, periods, freq, fields={})
587 data = PeriodArray(data, freq=freq)
588 return PeriodIndex(data, name=name)