Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/indexes/period.py: 28%

200 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1from __future__ import annotations 

2 

3from datetime import ( 

4 datetime, 

5 timedelta, 

6) 

7from typing import Hashable 

8import warnings 

9 

10import numpy as np 

11 

12from pandas._libs import ( 

13 index as libindex, 

14 lib, 

15) 

16from pandas._libs.tslibs import ( 

17 BaseOffset, 

18 NaT, 

19 Period, 

20 Resolution, 

21 Tick, 

22) 

23from pandas._typing import ( 

24 Dtype, 

25 DtypeObj, 

26 npt, 

27) 

28from pandas.util._decorators import ( 

29 cache_readonly, 

30 doc, 

31) 

32from pandas.util._exceptions import find_stack_level 

33 

34from pandas.core.dtypes.common import ( 

35 is_datetime64_any_dtype, 

36 is_integer, 

37 pandas_dtype, 

38) 

39from pandas.core.dtypes.dtypes import PeriodDtype 

40from pandas.core.dtypes.missing import is_valid_na_for_dtype 

41 

42from pandas.core.arrays.period import ( 

43 PeriodArray, 

44 period_array, 

45 raise_on_incompatible, 

46 validate_dtype_freq, 

47) 

48import pandas.core.common as com 

49import pandas.core.indexes.base as ibase 

50from pandas.core.indexes.base import maybe_extract_name 

51from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin 

52from pandas.core.indexes.datetimes import ( 

53 DatetimeIndex, 

54 Index, 

55) 

56from pandas.core.indexes.extension import inherit_names 

57from pandas.core.indexes.numeric import Int64Index 

58 

59_index_doc_kwargs = dict(ibase._index_doc_kwargs) 

60_index_doc_kwargs.update({"target_klass": "PeriodIndex or list of Periods"}) 

61_shared_doc_kwargs = { 

62 "klass": "PeriodArray", 

63} 

64 

65# --- Period index sketch 

66 

67 

68def _new_PeriodIndex(cls, **d): 

69 # GH13277 for unpickling 

70 values = d.pop("data") 

71 if values.dtype == "int64": 

72 freq = d.pop("freq", None) 

73 values = PeriodArray(values, freq=freq) 

74 return cls._simple_new(values, **d) 

75 else: 

76 return cls(values, **d) 

77 

78 

79@inherit_names( 

80 ["strftime", "start_time", "end_time"] + PeriodArray._field_ops, 

81 PeriodArray, 

82 wrap=True, 

83) 

84@inherit_names(["is_leap_year", "_format_native_types"], PeriodArray) 

85class PeriodIndex(DatetimeIndexOpsMixin): 

86 """ 

87 Immutable ndarray holding ordinal values indicating regular periods in time. 

88 

89 Index keys are boxed to Period objects which carries the metadata (eg, 

90 frequency information). 

91 

92 Parameters 

93 ---------- 

94 data : array-like (1d int np.ndarray or PeriodArray), optional 

95 Optional period-like data to construct index with. 

96 copy : bool 

97 Make a copy of input ndarray. 

98 freq : str or period object, optional 

99 One of pandas period strings or corresponding objects. 

100 year : int, array, or Series, default None 

101 month : int, array, or Series, default None 

102 quarter : int, array, or Series, default None 

103 day : int, array, or Series, default None 

104 hour : int, array, or Series, default None 

105 minute : int, array, or Series, default None 

106 second : int, array, or Series, default None 

107 dtype : str or PeriodDtype, default None 

108 

109 Attributes 

110 ---------- 

111 day 

112 dayofweek 

113 day_of_week 

114 dayofyear 

115 day_of_year 

116 days_in_month 

117 daysinmonth 

118 end_time 

119 freq 

120 freqstr 

121 hour 

122 is_leap_year 

123 minute 

124 month 

125 quarter 

126 qyear 

127 second 

128 start_time 

129 week 

130 weekday 

131 weekofyear 

132 year 

133 

134 Methods 

135 ------- 

136 asfreq 

137 strftime 

138 to_timestamp 

139 

140 See Also 

141 -------- 

142 Index : The base pandas Index type. 

143 Period : Represents a period of time. 

144 DatetimeIndex : Index with datetime64 data. 

145 TimedeltaIndex : Index of timedelta64 data. 

146 period_range : Create a fixed-frequency PeriodIndex. 

147 

148 Examples 

149 -------- 

150 >>> idx = pd.PeriodIndex(year=[2000, 2002], quarter=[1, 3]) 

151 >>> idx 

152 PeriodIndex(['2000Q1', '2002Q3'], dtype='period[Q-DEC]') 

153 """ 

154 

155 _typ = "periodindex" 

156 

157 _data: PeriodArray 

158 freq: BaseOffset 

159 dtype: PeriodDtype 

160 

161 _data_cls = PeriodArray 

162 _supports_partial_string_indexing = True 

163 

164 @property 

165 def _engine_type(self) -> type[libindex.PeriodEngine]: 

166 return libindex.PeriodEngine 

167 

168 @cache_readonly 

169 # Signature of "_resolution_obj" incompatible with supertype "DatetimeIndexOpsMixin" 

170 def _resolution_obj(self) -> Resolution: # type: ignore[override] 

171 # for compat with DatetimeIndex 

172 return self.dtype._resolution_obj 

173 

174 # -------------------------------------------------------------------- 

175 # methods that dispatch to array and wrap result in Index 

176 # These are defined here instead of via inherit_names for mypy 

177 

178 @doc( 

179 PeriodArray.asfreq, 

180 other="pandas.arrays.PeriodArray", 

181 other_name="PeriodArray", 

182 **_shared_doc_kwargs, 

183 ) 

184 def asfreq(self, freq=None, how: str = "E") -> PeriodIndex: 

185 arr = self._data.asfreq(freq, how) 

186 return type(self)._simple_new(arr, name=self.name) 

187 

188 @doc(PeriodArray.to_timestamp) 

189 def to_timestamp(self, freq=None, how: str = "start") -> DatetimeIndex: 

190 arr = self._data.to_timestamp(freq, how) 

191 return DatetimeIndex._simple_new(arr, name=self.name) 

192 

193 # https://github.com/python/mypy/issues/1362 

194 # error: Decorated property not supported 

195 @property # type: ignore[misc] 

196 @doc(PeriodArray.hour.fget) 

197 def hour(self) -> Int64Index: 

198 return Int64Index(self._data.hour, name=self.name) 

199 

200 # https://github.com/python/mypy/issues/1362 

201 # error: Decorated property not supported 

202 @property # type: ignore[misc] 

203 @doc(PeriodArray.minute.fget) 

204 def minute(self) -> Int64Index: 

205 return Int64Index(self._data.minute, name=self.name) 

206 

207 # https://github.com/python/mypy/issues/1362 

208 # error: Decorated property not supported 

209 @property # type: ignore[misc] 

210 @doc(PeriodArray.second.fget) 

211 def second(self) -> Int64Index: 

212 return Int64Index(self._data.second, name=self.name) 

213 

214 # ------------------------------------------------------------------------ 

215 # Index Constructors 

216 

217 def __new__( 

218 cls, 

219 data=None, 

220 ordinal=None, 

221 freq=None, 

222 dtype: Dtype | None = None, 

223 copy: bool = False, 

224 name: Hashable = None, 

225 **fields, 

226 ) -> PeriodIndex: 

227 

228 valid_field_set = { 

229 "year", 

230 "month", 

231 "day", 

232 "quarter", 

233 "hour", 

234 "minute", 

235 "second", 

236 } 

237 

238 if not set(fields).issubset(valid_field_set): 

239 argument = list(set(fields) - valid_field_set)[0] 

240 raise TypeError(f"__new__() got an unexpected keyword argument {argument}") 

241 

242 name = maybe_extract_name(name, data, cls) 

243 

244 if data is None and ordinal is None: 

245 # range-based. 

246 if not fields: 

247 # test_pickle_compat_construction 

248 raise cls._scalar_data_error(None) 

249 

250 data, freq2 = PeriodArray._generate_range(None, None, None, freq, fields) 

251 # PeriodArray._generate range does validation that fields is 

252 # empty when really using the range-based constructor. 

253 freq = freq2 

254 

255 data = PeriodArray(data, freq=freq) 

256 else: 

257 freq = validate_dtype_freq(dtype, freq) 

258 

259 # PeriodIndex allow PeriodIndex(period_index, freq=different) 

260 # Let's not encourage that kind of behavior in PeriodArray. 

261 

262 if freq and isinstance(data, cls) and data.freq != freq: 

263 # TODO: We can do some of these with no-copy / coercion? 

264 # e.g. D -> 2D seems to be OK 

265 data = data.asfreq(freq) 

266 

267 if data is None and ordinal is not None: 

268 # we strangely ignore `ordinal` if data is passed. 

269 ordinal = np.asarray(ordinal, dtype=np.int64) 

270 data = PeriodArray(ordinal, freq=freq) 

271 else: 

272 # don't pass copy here, since we copy later. 

273 data = period_array(data=data, freq=freq) 

274 

275 if copy: 

276 data = data.copy() 

277 

278 return cls._simple_new(data, name=name) 

279 

280 # ------------------------------------------------------------------------ 

281 # Data 

282 

283 @property 

284 def values(self) -> np.ndarray: 

285 return np.asarray(self, dtype=object) 

286 

287 def _maybe_convert_timedelta(self, other) -> int | npt.NDArray[np.int64]: 

288 """ 

289 Convert timedelta-like input to an integer multiple of self.freq 

290 

291 Parameters 

292 ---------- 

293 other : timedelta, np.timedelta64, DateOffset, int, np.ndarray 

294 

295 Returns 

296 ------- 

297 converted : int, np.ndarray[int64] 

298 

299 Raises 

300 ------ 

301 IncompatibleFrequency : if the input cannot be written as a multiple 

302 of self.freq. Note IncompatibleFrequency subclasses ValueError. 

303 """ 

304 if isinstance(other, (timedelta, np.timedelta64, Tick, np.ndarray)): 

305 if isinstance(self.freq, Tick): 

306 # _check_timedeltalike_freq_compat will raise if incompatible 

307 delta = self._data._check_timedeltalike_freq_compat(other) 

308 return delta 

309 elif isinstance(other, BaseOffset): 

310 if other.base == self.freq.base: 

311 return other.n 

312 

313 raise raise_on_incompatible(self, other) 

314 elif is_integer(other): 

315 # integer is passed to .shift via 

316 # _add_datetimelike_methods basically 

317 # but ufunc may pass integer to _add_delta 

318 return other 

319 

320 # raise when input doesn't have freq 

321 raise raise_on_incompatible(self, None) 

322 

323 def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: 

324 """ 

325 Can we compare values of the given dtype to our own? 

326 """ 

327 if not isinstance(dtype, PeriodDtype): 

328 return False 

329 # For the subset of DateOffsets that can be a dtype.freq, it 

330 # suffices (and is much faster) to compare the dtype_code rather than 

331 # the freq itself. 

332 # See also: PeriodDtype.__eq__ 

333 freq = dtype.freq 

334 own_freq = self.freq 

335 return ( 

336 freq._period_dtype_code 

337 # error: "BaseOffset" has no attribute "_period_dtype_code" 

338 == own_freq._period_dtype_code # type: ignore[attr-defined] 

339 and freq.n == own_freq.n 

340 ) 

341 

342 # ------------------------------------------------------------------------ 

343 # Index Methods 

344 

345 def asof_locs(self, where: Index, mask: npt.NDArray[np.bool_]) -> np.ndarray: 

346 """ 

347 where : array of timestamps 

348 mask : np.ndarray[bool] 

349 Array of booleans where data is not NA. 

350 """ 

351 if isinstance(where, DatetimeIndex): 

352 where = PeriodIndex(where._values, freq=self.freq) 

353 elif not isinstance(where, PeriodIndex): 

354 raise TypeError("asof_locs `where` must be DatetimeIndex or PeriodIndex") 

355 

356 return super().asof_locs(where, mask) 

357 

358 @doc(Index.astype) 

359 def astype(self, dtype, copy: bool = True, how=lib.no_default): 

360 dtype = pandas_dtype(dtype) 

361 

362 if how is not lib.no_default: 

363 # GH#37982 

364 warnings.warn( 

365 "The 'how' keyword in PeriodIndex.astype is deprecated and " 

366 "will be removed in a future version. " 

367 "Use index.to_timestamp(how=how) instead.", 

368 FutureWarning, 

369 stacklevel=find_stack_level(), 

370 ) 

371 else: 

372 how = "start" 

373 

374 if is_datetime64_any_dtype(dtype): 

375 # 'how' is index-specific, isn't part of the EA interface. 

376 # GH#45038 implement this for PeriodArray (but without "how") 

377 # once the "how" deprecation is enforced we can just dispatch 

378 # directly to PeriodArray. 

379 tz = getattr(dtype, "tz", None) 

380 return self.to_timestamp(how=how).tz_localize(tz) 

381 

382 return super().astype(dtype, copy=copy) 

383 

384 @property 

385 def is_full(self) -> bool: 

386 """ 

387 Returns True if this PeriodIndex is range-like in that all Periods 

388 between start and end are present, in order. 

389 """ 

390 if len(self) == 0: 

391 return True 

392 if not self.is_monotonic_increasing: 

393 raise ValueError("Index is not monotonic") 

394 values = self.asi8 

395 return ((values[1:] - values[:-1]) < 2).all() 

396 

397 @property 

398 def inferred_type(self) -> str: 

399 # b/c data is represented as ints make sure we can't have ambiguous 

400 # indexing 

401 return "period" 

402 

403 # ------------------------------------------------------------------------ 

404 # Indexing Methods 

405 

406 def _convert_tolerance(self, tolerance, target): 

407 # Returned tolerance must be in dtype/units so that 

408 # `|self._get_engine_target() - target._engine_target()| <= tolerance` 

409 # is meaningful. Since PeriodIndex returns int64 for engine_target, 

410 # we may need to convert timedelta64 tolerance to int64. 

411 tolerance = super()._convert_tolerance(tolerance, target) 

412 

413 if self.dtype == target.dtype: 

414 # convert tolerance to i8 

415 tolerance = self._maybe_convert_timedelta(tolerance) 

416 

417 return tolerance 

418 

419 def get_loc(self, key, method=None, tolerance=None): 

420 """ 

421 Get integer location for requested label. 

422 

423 Parameters 

424 ---------- 

425 key : Period, NaT, str, or datetime 

426 String or datetime key must be parsable as Period. 

427 

428 Returns 

429 ------- 

430 loc : int or ndarray[int64] 

431 

432 Raises 

433 ------ 

434 KeyError 

435 Key is not present in the index. 

436 TypeError 

437 If key is listlike or otherwise not hashable. 

438 """ 

439 orig_key = key 

440 

441 self._check_indexing_error(key) 

442 

443 if is_valid_na_for_dtype(key, self.dtype): 

444 key = NaT 

445 

446 elif isinstance(key, str): 

447 

448 try: 

449 parsed, reso = self._parse_with_reso(key) 

450 except ValueError as err: 

451 # A string with invalid format 

452 raise KeyError(f"Cannot interpret '{key}' as period") from err 

453 

454 if self._can_partial_date_slice(reso): 

455 try: 

456 return self._partial_date_slice(reso, parsed) 

457 except KeyError as err: 

458 # TODO: pass if method is not None, like DTI does? 

459 raise KeyError(key) from err 

460 

461 if reso == self._resolution_obj: 

462 # the reso < self._resolution_obj case goes 

463 # through _get_string_slice 

464 key = self._cast_partial_indexing_scalar(key) 

465 loc = self.get_loc(key, method=method, tolerance=tolerance) 

466 # Recursing instead of falling through matters for the exception 

467 # message in test_get_loc3 (though not clear if that really matters) 

468 return loc 

469 elif method is None: 

470 raise KeyError(key) 

471 else: 

472 key = self._cast_partial_indexing_scalar(parsed) 

473 

474 elif isinstance(key, Period): 

475 key = self._maybe_cast_for_get_loc(key) 

476 

477 elif isinstance(key, datetime): 

478 key = self._cast_partial_indexing_scalar(key) 

479 

480 else: 

481 # in particular integer, which Period constructor would cast to string 

482 raise KeyError(key) 

483 

484 try: 

485 return Index.get_loc(self, key, method, tolerance) 

486 except KeyError as err: 

487 raise KeyError(orig_key) from err 

488 

489 def _maybe_cast_for_get_loc(self, key: Period) -> Period: 

490 # name is a misnomer, chosen for compat with DatetimeIndex 

491 sfreq = self.freq 

492 kfreq = key.freq 

493 if not ( 

494 sfreq.n == kfreq.n 

495 # error: "BaseOffset" has no attribute "_period_dtype_code" 

496 and sfreq._period_dtype_code # type: ignore[attr-defined] 

497 # error: "BaseOffset" has no attribute "_period_dtype_code" 

498 == kfreq._period_dtype_code # type: ignore[attr-defined] 

499 ): 

500 # GH#42247 For the subset of DateOffsets that can be Period freqs, 

501 # checking these two attributes is sufficient to check equality, 

502 # and much more performant than `self.freq == key.freq` 

503 raise KeyError(key) 

504 return key 

505 

506 def _cast_partial_indexing_scalar(self, label): 

507 try: 

508 key = Period(label, freq=self.freq) 

509 except ValueError as err: 

510 # we cannot construct the Period 

511 raise KeyError(label) from err 

512 return key 

513 

514 @doc(DatetimeIndexOpsMixin._maybe_cast_slice_bound) 

515 def _maybe_cast_slice_bound(self, label, side: str, kind=lib.no_default): 

516 if isinstance(label, datetime): 

517 label = self._cast_partial_indexing_scalar(label) 

518 

519 return super()._maybe_cast_slice_bound(label, side, kind=kind) 

520 

521 def _parsed_string_to_bounds(self, reso: Resolution, parsed: datetime): 

522 iv = Period(parsed, freq=reso.attr_abbrev) 

523 return (iv.asfreq(self.freq, how="start"), iv.asfreq(self.freq, how="end")) 

524 

525 

526def period_range( 

527 start=None, end=None, periods: int | None = None, freq=None, name=None 

528) -> PeriodIndex: 

529 """ 

530 Return a fixed frequency PeriodIndex. 

531 

532 The day (calendar) is the default frequency. 

533 

534 Parameters 

535 ---------- 

536 start : str or period-like, default None 

537 Left bound for generating periods. 

538 end : str or period-like, default None 

539 Right bound for generating periods. 

540 periods : int, default None 

541 Number of periods to generate. 

542 freq : str or DateOffset, optional 

543 Frequency alias. By default the freq is taken from `start` or `end` 

544 if those are Period objects. Otherwise, the default is ``"D"`` for 

545 daily frequency. 

546 name : str, default None 

547 Name of the resulting PeriodIndex. 

548 

549 Returns 

550 ------- 

551 PeriodIndex 

552 

553 Notes 

554 ----- 

555 Of the three parameters: ``start``, ``end``, and ``periods``, exactly two 

556 must be specified. 

557 

558 To learn more about the frequency strings, please see `this link 

559 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__. 

560 

561 Examples 

562 -------- 

563 >>> pd.period_range(start='2017-01-01', end='2018-01-01', freq='M') 

564 PeriodIndex(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05', '2017-06', 

565 '2017-07', '2017-08', '2017-09', '2017-10', '2017-11', '2017-12', 

566 '2018-01'], 

567 dtype='period[M]') 

568 

569 If ``start`` or ``end`` are ``Period`` objects, they will be used as anchor 

570 endpoints for a ``PeriodIndex`` with frequency matching that of the 

571 ``period_range`` constructor. 

572 

573 >>> pd.period_range(start=pd.Period('2017Q1', freq='Q'), 

574 ... end=pd.Period('2017Q2', freq='Q'), freq='M') 

575 PeriodIndex(['2017-03', '2017-04', '2017-05', '2017-06'], 

576 dtype='period[M]') 

577 """ 

578 if com.count_not_none(start, end, periods) != 2: 

579 raise ValueError( 

580 "Of the three parameters: start, end, and periods, " 

581 "exactly two must be specified" 

582 ) 

583 if freq is None and (not isinstance(start, Period) and not isinstance(end, Period)): 

584 freq = "D" 

585 

586 data, freq = PeriodArray._generate_range(start, end, periods, freq, fields={}) 

587 data = PeriodArray(data, freq=freq) 

588 return PeriodIndex(data, name=name)