Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/indexes/period.py: 28%

1from __future__ import annotations

3from datetime import (

4 datetime,

5 timedelta,

7from typing import Hashable

8import warnings

10import numpy as np

12from pandas._libs import (

13 index as libindex,

14 lib,

15)

16from pandas._libs.tslibs import (

17 BaseOffset,

18 NaT,

19 Period,

20 Resolution,

21 Tick,

22)

23from pandas._typing import (

24 Dtype,

25 DtypeObj,

26 npt,

27)

28from pandas.util._decorators import (

29 cache_readonly,

30 doc,

31)

32from pandas.util._exceptions import find_stack_level

34from pandas.core.dtypes.common import (

35 is_datetime64_any_dtype,

36 is_integer,

37 pandas_dtype,

38)

39from pandas.core.dtypes.dtypes import PeriodDtype

40from pandas.core.dtypes.missing import is_valid_na_for_dtype

42from pandas.core.arrays.period import (

43 PeriodArray,

44 period_array,

45 raise_on_incompatible,

46 validate_dtype_freq,

47)

48import pandas.core.common as com

49import pandas.core.indexes.base as ibase

50from pandas.core.indexes.base import maybe_extract_name

51from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin

52from pandas.core.indexes.datetimes import (

53 DatetimeIndex,

54 Index,

55)

56from pandas.core.indexes.extension import inherit_names

57from pandas.core.indexes.numeric import Int64Index

59_index_doc_kwargs = dict(ibase._index_doc_kwargs)

60_index_doc_kwargs.update({"target_klass": "PeriodIndex or list of Periods"})

61_shared_doc_kwargs = {

62 "klass": "PeriodArray",

63}

65# --- Period index sketch

68def _new_PeriodIndex(cls, **d):

69 # GH13277 for unpickling

70 values = d.pop("data")

71 if values.dtype == "int64":

72 freq = d.pop("freq", None)

73 values = PeriodArray(values, freq=freq)

74 return cls._simple_new(values, **d)

75 else:

76 return cls(values, **d)

79@inherit_names(

80 ["strftime", "start_time", "end_time"] + PeriodArray._field_ops,

81 PeriodArray,

82 wrap=True,

83)

84@inherit_names(["is_leap_year", "_format_native_types"], PeriodArray)

85class PeriodIndex(DatetimeIndexOpsMixin):

86 """

87 Immutable ndarray holding ordinal values indicating regular periods in time.

89 Index keys are boxed to Period objects which carries the metadata (eg,

90 frequency information).

92 Parameters

93 ----------

94 data : array-like (1d int np.ndarray or PeriodArray), optional

95 Optional period-like data to construct index with.

96 copy : bool

97 Make a copy of input ndarray.

98 freq : str or period object, optional

99 One of pandas period strings or corresponding objects.

100 year : int, array, or Series, default None

101 month : int, array, or Series, default None

102 quarter : int, array, or Series, default None

103 day : int, array, or Series, default None

104 hour : int, array, or Series, default None

105 minute : int, array, or Series, default None

106 second : int, array, or Series, default None

107 dtype : str or PeriodDtype, default None

108

109 Attributes

110 ----------

111 day

112 dayofweek

113 day_of_week

114 dayofyear

115 day_of_year

116 days_in_month

117 daysinmonth

118 end_time

119 freq

120 freqstr

121 hour

122 is_leap_year

123 minute

124 month

125 quarter

126 qyear

127 second

128 start_time

129 week

130 weekday

131 weekofyear

132 year

133

134 Methods

135 -------

136 asfreq

137 strftime

138 to_timestamp

139

140 See Also

141 --------

142 Index : The base pandas Index type.

143 Period : Represents a period of time.

144 DatetimeIndex : Index with datetime64 data.

145 TimedeltaIndex : Index of timedelta64 data.

146 period_range : Create a fixed-frequency PeriodIndex.

147

148 Examples

149 --------

150 >>> idx = pd.PeriodIndex(year=[2000, 2002], quarter=[1, 3])

151 >>> idx

152 PeriodIndex(['2000Q1', '2002Q3'], dtype='period[Q-DEC]')

153 """

154

155 _typ = "periodindex"

156

157 _data: PeriodArray

158 freq: BaseOffset

159 dtype: PeriodDtype

160

161 _data_cls = PeriodArray

162 _supports_partial_string_indexing = True

163

164 @property

165 def _engine_type(self) -> type[libindex.PeriodEngine]:

166 return libindex.PeriodEngine

167

168 @cache_readonly

169 # Signature of "_resolution_obj" incompatible with supertype "DatetimeIndexOpsMixin"

170 def _resolution_obj(self) -> Resolution: # type: ignore[override]

171 # for compat with DatetimeIndex

172 return self.dtype._resolution_obj

173

174 # --------------------------------------------------------------------

175 # methods that dispatch to array and wrap result in Index

176 # These are defined here instead of via inherit_names for mypy

177

178 @doc(

179 PeriodArray.asfreq,

180 other="pandas.arrays.PeriodArray",

181 other_name="PeriodArray",

182 **_shared_doc_kwargs,

183 )

184 def asfreq(self, freq=None, how: str = "E") -> PeriodIndex:

185 arr = self._data.asfreq(freq, how)

186 return type(self)._simple_new(arr, name=self.name)

187

188 @doc(PeriodArray.to_timestamp)

189 def to_timestamp(self, freq=None, how: str = "start") -> DatetimeIndex:

190 arr = self._data.to_timestamp(freq, how)

191 return DatetimeIndex._simple_new(arr, name=self.name)

192

193 # https://github.com/python/mypy/issues/1362

194 # error: Decorated property not supported

195 @property # type: ignore[misc]

196 @doc(PeriodArray.hour.fget)

197 def hour(self) -> Int64Index:

198 return Int64Index(self._data.hour, name=self.name)

199

200 # https://github.com/python/mypy/issues/1362

201 # error: Decorated property not supported

202 @property # type: ignore[misc]

203 @doc(PeriodArray.minute.fget)

204 def minute(self) -> Int64Index:

205 return Int64Index(self._data.minute, name=self.name)

206

207 # https://github.com/python/mypy/issues/1362

208 # error: Decorated property not supported

209 @property # type: ignore[misc]

210 @doc(PeriodArray.second.fget)

211 def second(self) -> Int64Index:

212 return Int64Index(self._data.second, name=self.name)

213

214 # ------------------------------------------------------------------------

215 # Index Constructors

216

217 def __new__(

218 cls,

219 data=None,

220 ordinal=None,

221 freq=None,

222 dtype: Dtype | None = None,

223 copy: bool = False,

224 name: Hashable = None,

225 **fields,

226 ) -> PeriodIndex:

227

228 valid_field_set = {

229 "year",

230 "month",

231 "day",

232 "quarter",

233 "hour",

234 "minute",

235 "second",

236 }

237

238 if not set(fields).issubset(valid_field_set):

239 argument = list(set(fields) - valid_field_set)[0]

240 raise TypeError(f"__new__() got an unexpected keyword argument {argument}")

241

242 name = maybe_extract_name(name, data, cls)

243

244 if data is None and ordinal is None:

245 # range-based.

246 if not fields:

247 # test_pickle_compat_construction

248 raise cls._scalar_data_error(None)

249

250 data, freq2 = PeriodArray._generate_range(None, None, None, freq, fields)

251 # PeriodArray._generate range does validation that fields is

252 # empty when really using the range-based constructor.

253 freq = freq2

254

255 data = PeriodArray(data, freq=freq)

256 else:

257 freq = validate_dtype_freq(dtype, freq)

258

259 # PeriodIndex allow PeriodIndex(period_index, freq=different)

260 # Let's not encourage that kind of behavior in PeriodArray.

261

262 if freq and isinstance(data, cls) and data.freq != freq:

263 # TODO: We can do some of these with no-copy / coercion?

264 # e.g. D -> 2D seems to be OK

265 data = data.asfreq(freq)

266

267 if data is None and ordinal is not None:

268 # we strangely ignore `ordinal` if data is passed.

269 ordinal = np.asarray(ordinal, dtype=np.int64)

270 data = PeriodArray(ordinal, freq=freq)

271 else:

272 # don't pass copy here, since we copy later.

273 data = period_array(data=data, freq=freq)

274

275 if copy:

276 data = data.copy()

277

278 return cls._simple_new(data, name=name)

279

280 # ------------------------------------------------------------------------

281 # Data

282

283 @property

284 def values(self) -> np.ndarray:

285 return np.asarray(self, dtype=object)

286

287 def _maybe_convert_timedelta(self, other) -> int | npt.NDArray[np.int64]:

288 """

289 Convert timedelta-like input to an integer multiple of self.freq

290

291 Parameters

292 ----------

293 other : timedelta, np.timedelta64, DateOffset, int, np.ndarray

294

295 Returns

296 -------

297 converted : int, np.ndarray[int64]

298

299 Raises

300 ------

301 IncompatibleFrequency : if the input cannot be written as a multiple

302 of self.freq. Note IncompatibleFrequency subclasses ValueError.

303 """

304 if isinstance(other, (timedelta, np.timedelta64, Tick, np.ndarray)):

305 if isinstance(self.freq, Tick):

306 # _check_timedeltalike_freq_compat will raise if incompatible

307 delta = self._data._check_timedeltalike_freq_compat(other)

308 return delta

309 elif isinstance(other, BaseOffset):

310 if other.base == self.freq.base:

311 return other.n

312

313 raise raise_on_incompatible(self, other)

314 elif is_integer(other):

315 # integer is passed to .shift via

316 # _add_datetimelike_methods basically

317 # but ufunc may pass integer to _add_delta

318 return other

319

320 # raise when input doesn't have freq

321 raise raise_on_incompatible(self, None)

322

323 def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:

324 """

325 Can we compare values of the given dtype to our own?

326 """

327 if not isinstance(dtype, PeriodDtype):

328 return False

329 # For the subset of DateOffsets that can be a dtype.freq, it

330 # suffices (and is much faster) to compare the dtype_code rather than

331 # the freq itself.

332 # See also: PeriodDtype.__eq__

333 freq = dtype.freq

334 own_freq = self.freq

335 return (

336 freq._period_dtype_code

337 # error: "BaseOffset" has no attribute "_period_dtype_code"

338 == own_freq._period_dtype_code # type: ignore[attr-defined]

339 and freq.n == own_freq.n

340 )

341

342 # ------------------------------------------------------------------------

343 # Index Methods

344

345 def asof_locs(self, where: Index, mask: npt.NDArray[np.bool_]) -> np.ndarray:

346 """

347 where : array of timestamps

348 mask : np.ndarray[bool]

349 Array of booleans where data is not NA.

350 """

351 if isinstance(where, DatetimeIndex):

352 where = PeriodIndex(where._values, freq=self.freq)

353 elif not isinstance(where, PeriodIndex):

354 raise TypeError("asof_locs `where` must be DatetimeIndex or PeriodIndex")

355

356 return super().asof_locs(where, mask)

357

358 @doc(Index.astype)

359 def astype(self, dtype, copy: bool = True, how=lib.no_default):

360 dtype = pandas_dtype(dtype)

361

362 if how is not lib.no_default:

363 # GH#37982

364 warnings.warn(

365 "The 'how' keyword in PeriodIndex.astype is deprecated and "

366 "will be removed in a future version. "

367 "Use index.to_timestamp(how=how) instead.",

368 FutureWarning,

369 stacklevel=find_stack_level(),

370 )

371 else:

372 how = "start"

373

374 if is_datetime64_any_dtype(dtype):

375 # 'how' is index-specific, isn't part of the EA interface.

376 # GH#45038 implement this for PeriodArray (but without "how")

377 # once the "how" deprecation is enforced we can just dispatch

378 # directly to PeriodArray.

379 tz = getattr(dtype, "tz", None)

380 return self.to_timestamp(how=how).tz_localize(tz)

381

382 return super().astype(dtype, copy=copy)

383

384 @property

385 def is_full(self) -> bool:

386 """

387 Returns True if this PeriodIndex is range-like in that all Periods

388 between start and end are present, in order.

389 """

390 if len(self) == 0:

391 return True

392 if not self.is_monotonic_increasing:

393 raise ValueError("Index is not monotonic")

394 values = self.asi8

395 return ((values[1:] - values[:-1]) < 2).all()

396

397 @property

398 def inferred_type(self) -> str:

399 # b/c data is represented as ints make sure we can't have ambiguous

400 # indexing

401 return "period"

402

403 # ------------------------------------------------------------------------

404 # Indexing Methods

405

406 def _convert_tolerance(self, tolerance, target):

407 # Returned tolerance must be in dtype/units so that

408 # `|self._get_engine_target() - target._engine_target()| <= tolerance`

409 # is meaningful. Since PeriodIndex returns int64 for engine_target,

410 # we may need to convert timedelta64 tolerance to int64.

411 tolerance = super()._convert_tolerance(tolerance, target)

412

413 if self.dtype == target.dtype:

414 # convert tolerance to i8

415 tolerance = self._maybe_convert_timedelta(tolerance)

416

417 return tolerance

418

419 def get_loc(self, key, method=None, tolerance=None):

420 """

421 Get integer location for requested label.

422

423 Parameters

424 ----------

425 key : Period, NaT, str, or datetime

426 String or datetime key must be parsable as Period.

427

428 Returns

429 -------

430 loc : int or ndarray[int64]

431

432 Raises

433 ------

434 KeyError

435 Key is not present in the index.

436 TypeError

437 If key is listlike or otherwise not hashable.

438 """

439 orig_key = key

440

441 self._check_indexing_error(key)

442

443 if is_valid_na_for_dtype(key, self.dtype):

444 key = NaT

445

446 elif isinstance(key, str):

447

448 try:

449 parsed, reso = self._parse_with_reso(key)

450 except ValueError as err:

451 # A string with invalid format

452 raise KeyError(f"Cannot interpret '{key}' as period") from err

453

454 if self._can_partial_date_slice(reso):

455 try:

456 return self._partial_date_slice(reso, parsed)

457 except KeyError as err:

458 # TODO: pass if method is not None, like DTI does?

459 raise KeyError(key) from err

460

461 if reso == self._resolution_obj:

462 # the reso < self._resolution_obj case goes

463 # through _get_string_slice

464 key = self._cast_partial_indexing_scalar(key)

465 loc = self.get_loc(key, method=method, tolerance=tolerance)

466 # Recursing instead of falling through matters for the exception

467 # message in test_get_loc3 (though not clear if that really matters)

468 return loc

469 elif method is None:

470 raise KeyError(key)

471 else:

472 key = self._cast_partial_indexing_scalar(parsed)

473

474 elif isinstance(key, Period):

475 key = self._maybe_cast_for_get_loc(key)

476

477 elif isinstance(key, datetime):

478 key = self._cast_partial_indexing_scalar(key)

479

480 else:

481 # in particular integer, which Period constructor would cast to string

482 raise KeyError(key)

483

484 try:

485 return Index.get_loc(self, key, method, tolerance)

486 except KeyError as err:

487 raise KeyError(orig_key) from err

488

489 def _maybe_cast_for_get_loc(self, key: Period) -> Period:

490 # name is a misnomer, chosen for compat with DatetimeIndex

491 sfreq = self.freq

492 kfreq = key.freq

493 if not (

494 sfreq.n == kfreq.n

495 # error: "BaseOffset" has no attribute "_period_dtype_code"

496 and sfreq._period_dtype_code # type: ignore[attr-defined]

497 # error: "BaseOffset" has no attribute "_period_dtype_code"

498 == kfreq._period_dtype_code # type: ignore[attr-defined]

499 ):

500 # GH#42247 For the subset of DateOffsets that can be Period freqs,

501 # checking these two attributes is sufficient to check equality,

502 # and much more performant than `self.freq == key.freq`

503 raise KeyError(key)

504 return key

505

506 def _cast_partial_indexing_scalar(self, label):

507 try:

508 key = Period(label, freq=self.freq)

509 except ValueError as err:

510 # we cannot construct the Period

511 raise KeyError(label) from err

512 return key

513

514 @doc(DatetimeIndexOpsMixin._maybe_cast_slice_bound)

515 def _maybe_cast_slice_bound(self, label, side: str, kind=lib.no_default):

516 if isinstance(label, datetime):

517 label = self._cast_partial_indexing_scalar(label)

518

519 return super()._maybe_cast_slice_bound(label, side, kind=kind)

520

521 def _parsed_string_to_bounds(self, reso: Resolution, parsed: datetime):

522 iv = Period(parsed, freq=reso.attr_abbrev)

523 return (iv.asfreq(self.freq, how="start"), iv.asfreq(self.freq, how="end"))

524

525

526def period_range(

527 start=None, end=None, periods: int | None = None, freq=None, name=None

528) -> PeriodIndex:

529 """

530 Return a fixed frequency PeriodIndex.

531

532 The day (calendar) is the default frequency.

533

534 Parameters

535 ----------

536 start : str or period-like, default None

537 Left bound for generating periods.

538 end : str or period-like, default None

539 Right bound for generating periods.

540 periods : int, default None

541 Number of periods to generate.

542 freq : str or DateOffset, optional

543 Frequency alias. By default the freq is taken from `start` or `end`

544 if those are Period objects. Otherwise, the default is ``"D"`` for

545 daily frequency.

546 name : str, default None

547 Name of the resulting PeriodIndex.

548

549 Returns

550 -------

551 PeriodIndex

552

553 Notes

554 -----

555 Of the three parameters: ``start``, ``end``, and ``periods``, exactly two

556 must be specified.

557

558 To learn more about the frequency strings, please see `this link

559 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.

560

561 Examples

562 --------

563 >>> pd.period_range(start='2017-01-01', end='2018-01-01', freq='M')

564 PeriodIndex(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05', '2017-06',

565 '2017-07', '2017-08', '2017-09', '2017-10', '2017-11', '2017-12',

566 '2018-01'],

567 dtype='period[M]')

568

569 If ``start`` or ``end`` are ``Period`` objects, they will be used as anchor

570 endpoints for a ``PeriodIndex`` with frequency matching that of the

571 ``period_range`` constructor.

572

573 >>> pd.period_range(start=pd.Period('2017Q1', freq='Q'),

574 ... end=pd.Period('2017Q2', freq='Q'), freq='M')

575 PeriodIndex(['2017-03', '2017-04', '2017-05', '2017-06'],

576 dtype='period[M]')

577 """

578 if com.count_not_none(start, end, periods) != 2:

579 raise ValueError(

580 "Of the three parameters: start, end, and periods, "

581 "exactly two must be specified"

582 )

583 if freq is None and (not isinstance(start, Period) and not isinstance(end, Period)):

584 freq = "D"

585

586 data, freq = PeriodArray._generate_range(start, end, periods, freq, fields={})

587 data = PeriodArray(data, freq=freq)

588 return PeriodIndex(data, name=name)