Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/groupby/grouper.py: 15%

1"""

2Provide user facing operators for doing the split part of the

3split-apply-combine paradigm.

4"""

5from __future__ import annotations

7from typing import (

8 TYPE_CHECKING,

9 Any,

10 Hashable,

11 final,

12)

13import warnings

15import numpy as np

17from pandas._typing import (

18 ArrayLike,

19 NDFrameT,

20 npt,

21)

22from pandas.errors import InvalidIndexError

23from pandas.util._decorators import cache_readonly

24from pandas.util._exceptions import find_stack_level

26from pandas.core.dtypes.cast import sanitize_to_nanoseconds

27from pandas.core.dtypes.common import (

28 is_categorical_dtype,

29 is_list_like,

30 is_scalar,

31)

33import pandas.core.algorithms as algorithms

34from pandas.core.arrays import (

35 Categorical,

36 ExtensionArray,

37)

38import pandas.core.common as com

39from pandas.core.frame import DataFrame

40from pandas.core.groupby import ops

41from pandas.core.groupby.categorical import (

42 recode_for_groupby,

43 recode_from_groupby,

44)

45from pandas.core.indexes.api import (

46 CategoricalIndex,

47 Index,

48 MultiIndex,

49)

50from pandas.core.series import Series

52from pandas.io.formats.printing import pprint_thing

54if TYPE_CHECKING: 54 ↛ 55line 54 didn't jump to line 55, because the condition on line 54 was never true

55 from pandas.core.generic import NDFrame

58class Grouper:

59 """

60 A Grouper allows the user to specify a groupby instruction for an object.

62 This specification will select a column via the key parameter, or if the

63 level and/or axis parameters are given, a level of the index of the target

64 object.

66 If `axis` and/or `level` are passed as keywords to both `Grouper` and

67 `groupby`, the values passed to `Grouper` take precedence.

69 Parameters

70 ----------

71 key : str, defaults to None

72 Groupby key, which selects the grouping column of the target.

73 level : name/number, defaults to None

74 The level for the target index.

75 freq : str / frequency object, defaults to None

76 This will groupby the specified frequency if the target selection

77 (via key or level) is a datetime-like object. For full specification

78 of available frequencies, please see `here

79 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`_.

80 axis : str, int, defaults to 0

81 Number/name of the axis.

82 sort : bool, default to False

83 Whether to sort the resulting labels.

84 closed : {'left' or 'right'}

85 Closed end of interval. Only when `freq` parameter is passed.

86 label : {'left' or 'right'}

87 Interval boundary to use for labeling.

88 Only when `freq` parameter is passed.

89 convention : {'start', 'end', 'e', 's'}

90 If grouper is PeriodIndex and `freq` parameter is passed.

91 base : int, default 0

92 Only when `freq` parameter is passed.

93 For frequencies that evenly subdivide 1 day, the "origin" of the

94 aggregated intervals. For example, for '5min' frequency, base could

95 range from 0 through 4. Defaults to 0.

97 .. deprecated:: 1.1.0

98 The new arguments that you should use are 'offset' or 'origin'.

100 loffset : str, DateOffset, timedelta object

101 Only when `freq` parameter is passed.

102

103 .. deprecated:: 1.1.0

104 loffset is only working for ``.resample(...)`` and not for

105 Grouper (:issue:`28302`).

106 However, loffset is also deprecated for ``.resample(...)``

107 See: :class:`DataFrame.resample`

108

109 origin : Timestamp or str, default 'start_day'

110 The timestamp on which to adjust the grouping. The timezone of origin must

111 match the timezone of the index.

112 If string, must be one of the following:

113

114 - 'epoch': `origin` is 1970-01-01

115 - 'start': `origin` is the first value of the timeseries

116 - 'start_day': `origin` is the first day at midnight of the timeseries

117

118 .. versionadded:: 1.1.0

119

120 - 'end': `origin` is the last value of the timeseries

121 - 'end_day': `origin` is the ceiling midnight of the last day

122

123 .. versionadded:: 1.3.0

124

125 offset : Timedelta or str, default is None

126 An offset timedelta added to the origin.

127

128 .. versionadded:: 1.1.0

129

130 dropna : bool, default True

131 If True, and if group keys contain NA values, NA values together with

132 row/column will be dropped. If False, NA values will also be treated as

133 the key in groups.

134

135 .. versionadded:: 1.2.0

136

137 Returns

138 -------

139 A specification for a groupby instruction

140

141 Examples

142 --------

143 Syntactic sugar for ``df.groupby('A')``

144

145 >>> df = pd.DataFrame(

146 ... {

147 ... "Animal": ["Falcon", "Parrot", "Falcon", "Falcon", "Parrot"],

148 ... "Speed": [100, 5, 200, 300, 15],

149 ... }

150 ... )

151 >>> df

152 Animal Speed

153 0 Falcon 100

154 1 Parrot 5

155 2 Falcon 200

156 3 Falcon 300

157 4 Parrot 15

158 >>> df.groupby(pd.Grouper(key="Animal")).mean()

159 Speed

160 Animal

161 Falcon 200.0

162 Parrot 10.0

163

164 Specify a resample operation on the column 'Publish date'

165

166 >>> df = pd.DataFrame(

167 ... {

168 ... "Publish date": [

169 ... pd.Timestamp("2000-01-02"),

170 ... pd.Timestamp("2000-01-02"),

171 ... pd.Timestamp("2000-01-09"),

172 ... pd.Timestamp("2000-01-16")

173 ... ],

174 ... "ID": [0, 1, 2, 3],

175 ... "Price": [10, 20, 30, 40]

176 ... }

177 ... )

178 >>> df

179 Publish date ID Price

180 0 2000-01-02 0 10

181 1 2000-01-02 1 20

182 2 2000-01-09 2 30

183 3 2000-01-16 3 40

184 >>> df.groupby(pd.Grouper(key="Publish date", freq="1W")).mean()

185 ID Price

186 Publish date

187 2000-01-02 0.5 15.0

188 2000-01-09 2.0 30.0

189 2000-01-16 3.0 40.0

190

191 If you want to adjust the start of the bins based on a fixed timestamp:

192

193 >>> start, end = '2000-10-01 23:30:00', '2000-10-02 00:30:00'

194 >>> rng = pd.date_range(start, end, freq='7min')

195 >>> ts = pd.Series(np.arange(len(rng)) * 3, index=rng)

196 >>> ts

197 2000-10-01 23:30:00 0

198 2000-10-01 23:37:00 3

199 2000-10-01 23:44:00 6

200 2000-10-01 23:51:00 9

201 2000-10-01 23:58:00 12

202 2000-10-02 00:05:00 15

203 2000-10-02 00:12:00 18

204 2000-10-02 00:19:00 21

205 2000-10-02 00:26:00 24

206 Freq: 7T, dtype: int64

207

208 >>> ts.groupby(pd.Grouper(freq='17min')).sum()

209 2000-10-01 23:14:00 0

210 2000-10-01 23:31:00 9

211 2000-10-01 23:48:00 21

212 2000-10-02 00:05:00 54

213 2000-10-02 00:22:00 24

214 Freq: 17T, dtype: int64

215

216 >>> ts.groupby(pd.Grouper(freq='17min', origin='epoch')).sum()

217 2000-10-01 23:18:00 0

218 2000-10-01 23:35:00 18

219 2000-10-01 23:52:00 27

220 2000-10-02 00:09:00 39

221 2000-10-02 00:26:00 24

222 Freq: 17T, dtype: int64

223

224 >>> ts.groupby(pd.Grouper(freq='17min', origin='2000-01-01')).sum()

225 2000-10-01 23:24:00 3

226 2000-10-01 23:41:00 15

227 2000-10-01 23:58:00 45

228 2000-10-02 00:15:00 45

229 Freq: 17T, dtype: int64

230

231 If you want to adjust the start of the bins with an `offset` Timedelta, the two

232 following lines are equivalent:

233

234 >>> ts.groupby(pd.Grouper(freq='17min', origin='start')).sum()

235 2000-10-01 23:30:00 9

236 2000-10-01 23:47:00 21

237 2000-10-02 00:04:00 54

238 2000-10-02 00:21:00 24

239 Freq: 17T, dtype: int64

240

241 >>> ts.groupby(pd.Grouper(freq='17min', offset='23h30min')).sum()

242 2000-10-01 23:30:00 9

243 2000-10-01 23:47:00 21

244 2000-10-02 00:04:00 54

245 2000-10-02 00:21:00 24

246 Freq: 17T, dtype: int64

247

248 To replace the use of the deprecated `base` argument, you can now use `offset`,

249 in this example it is equivalent to have `base=2`:

250

251 >>> ts.groupby(pd.Grouper(freq='17min', offset='2min')).sum()

252 2000-10-01 23:16:00 0

253 2000-10-01 23:33:00 9

254 2000-10-01 23:50:00 36

255 2000-10-02 00:07:00 39

256 2000-10-02 00:24:00 24

257 Freq: 17T, dtype: int64

258 """

259

260 axis: int

261 sort: bool

262 dropna: bool

263 _gpr_index: Index | None

264 _grouper: Index | None

265

266 _attributes: tuple[str, ...] = ("key", "level", "freq", "axis", "sort", "dropna")

267

268 def __new__(cls, *args, **kwargs):

269 if kwargs.get("freq") is not None:

270 from pandas.core.resample import TimeGrouper

271

272 _check_deprecated_resample_kwargs(kwargs, origin=cls)

273 cls = TimeGrouper

274 return super().__new__(cls)

275

276 def __init__(

277 self,

278 key=None,

279 level=None,

280 freq=None,

281 axis: int = 0,

282 sort: bool = False,

283 dropna: bool = True,

284 ) -> None:

285 self.key = key

286 self.level = level

287 self.freq = freq

288 self.axis = axis

289 self.sort = sort

290 self.dropna = dropna

291

292 self.grouper = None

293 self._gpr_index = None

294 self.obj = None

295 self.indexer = None

296 self.binner = None

297 self._grouper = None

298 self._indexer = None

299

300 @final

301 @property

302 def ax(self) -> Index:

303 index = self._gpr_index

304 if index is None:

305 raise ValueError("_set_grouper must be called before ax is accessed")

306 return index

307

308 def _get_grouper(

309 self, obj: NDFrameT, validate: bool = True

310 ) -> tuple[Any, ops.BaseGrouper, NDFrameT]:

311 """

312 Parameters

313 ----------

314 obj : Series or DataFrame

315 validate : bool, default True

316 if True, validate the grouper

317

318 Returns

319 -------

320 a tuple of binner, grouper, obj (possibly sorted)

321 """

322 self._set_grouper(obj)

323 # error: Value of type variable "NDFrameT" of "get_grouper" cannot be

324 # "Optional[Any]"

325 # error: Incompatible types in assignment (expression has type "BaseGrouper",

326 # variable has type "None")

327 self.grouper, _, self.obj = get_grouper( # type: ignore[type-var,assignment]

328 self.obj,

329 [self.key],

330 axis=self.axis,

331 level=self.level,

332 sort=self.sort,

333 validate=validate,

334 dropna=self.dropna,

335 )

336

337 # error: Incompatible return value type (got "Tuple[None, None, None]",

338 # expected "Tuple[Any, BaseGrouper, NDFrameT]")

339 return self.binner, self.grouper, self.obj # type: ignore[return-value]

340

341 @final

342 def _set_grouper(self, obj: NDFrame, sort: bool = False) -> None:

343 """

344 given an object and the specifications, setup the internal grouper

345 for this particular specification

346

347 Parameters

348 ----------

349 obj : Series or DataFrame

350 sort : bool, default False

351 whether the resulting grouper should be sorted

352 """

353 assert obj is not None

354

355 if self.key is not None and self.level is not None:

356 raise ValueError("The Grouper cannot specify both a key and a level!")

357

358 # Keep self.grouper value before overriding

359 if self._grouper is None:

360 # TODO: What are we assuming about subsequent calls?

361 self._grouper = self._gpr_index

362 self._indexer = self.indexer

363

364 # the key must be a valid info item

365 if self.key is not None:

366 key = self.key

367 # The 'on' is already defined

368 if getattr(self._gpr_index, "name", None) == key and isinstance(

369 obj, Series

370 ):

371 # Sometimes self._grouper will have been resorted while

372 # obj has not. In this case there is a mismatch when we

373 # call self._grouper.take(obj.index) so we need to undo the sorting

374 # before we call _grouper.take.

375 assert self._grouper is not None

376 if self._indexer is not None:

377 reverse_indexer = self._indexer.argsort()

378 unsorted_ax = self._grouper.take(reverse_indexer)

379 ax = unsorted_ax.take(obj.index)

380 else:

381 ax = self._grouper.take(obj.index)

382 else:

383 if key not in obj._info_axis:

384 raise KeyError(f"The grouper name {key} is not found")

385 ax = Index(obj[key], name=key)

386

387 else:

388 ax = obj._get_axis(self.axis)

389 if self.level is not None:

390 level = self.level

391

392 # if a level is given it must be a mi level or

393 # equivalent to the axis name

394 if isinstance(ax, MultiIndex):

395 level = ax._get_level_number(level)

396 ax = Index(ax._get_level_values(level), name=ax.names[level])

397

398 else:

399 if level not in (0, ax.name):

400 raise ValueError(f"The level {level} is not valid")

401

402 # possibly sort

403 if (self.sort or sort) and not ax.is_monotonic_increasing:

404 # use stable sort to support first, last, nth

405 # TODO: why does putting na_position="first" fix datetimelike cases?

406 indexer = self.indexer = ax.array.argsort(

407 kind="mergesort", na_position="first"

408 )

409 ax = ax.take(indexer)

410 obj = obj.take(indexer, axis=self.axis)

411

412 # error: Incompatible types in assignment (expression has type

413 # "NDFrameT", variable has type "None")

414 self.obj = obj # type: ignore[assignment]

415 self._gpr_index = ax

416

417 @final

418 @property

419 def groups(self):

420 # error: "None" has no attribute "groups"

421 return self.grouper.groups # type: ignore[attr-defined]

422

423 @final

424 def __repr__(self) -> str:

425 attrs_list = (

426 f"{attr_name}={repr(getattr(self, attr_name))}"

427 for attr_name in self._attributes

428 if getattr(self, attr_name) is not None

429 )

430 attrs = ", ".join(attrs_list)

431 cls_name = type(self).__name__

432 return f"{cls_name}({attrs})"

433

434

435@final

436class Grouping:

437 """

438 Holds the grouping information for a single key

439

440 Parameters

441 ----------

442 index : Index

443 grouper :

444 obj : DataFrame or Series

445 name : Label

446 level :

447 observed : bool, default False

448 If we are a Categorical, use the observed values

449 in_axis : if the Grouping is a column in self.obj and hence among

450 Groupby.exclusions list

451

452 Returns

453 -------

454 **Attributes**:

455 * indices : dict of {group -> index_list}

456 * codes : ndarray, group codes

457 * group_index : unique groups

458 * groups : dict of {group -> label_list}

459 """

460

461 _codes: npt.NDArray[np.signedinteger] | None = None

462 _group_index: Index | None = None

463 _passed_categorical: bool

464 _all_grouper: Categorical | None

465 _index: Index

466

467 def __init__(

468 self,

469 index: Index,

470 grouper=None,

471 obj: NDFrame | None = None,

472 level=None,

473 sort: bool = True,

474 observed: bool = False,

475 in_axis: bool = False,

476 dropna: bool = True,

477 ) -> None:

478 self.level = level

479 self._orig_grouper = grouper

480 self.grouping_vector = _convert_grouper(index, grouper)

481 self._all_grouper = None

482 self._index = index

483 self._sort = sort

484 self.obj = obj

485 self._observed = observed

486 self.in_axis = in_axis

487 self._dropna = dropna

488

489 self._passed_categorical = False

490

491 # we have a single grouper which may be a myriad of things,

492 # some of which are dependent on the passing in level

493

494 ilevel = self._ilevel

495 if ilevel is not None:

496 mapper = self.grouping_vector

497 # In extant tests, the new self.grouping_vector matches

498 # `index.get_level_values(ilevel)` whenever

499 # mapper is None and isinstance(index, MultiIndex)

500 (

501 self.grouping_vector, # Index

502 self._codes,

503 self._group_index,

504 ) = index._get_grouper_for_level(mapper, level=ilevel, dropna=dropna)

505

506 # a passed Grouper like, directly get the grouper in the same way

507 # as single grouper groupby, use the group_info to get codes

508 elif isinstance(self.grouping_vector, Grouper):

509 # get the new grouper; we already have disambiguated

510 # what key/level refer to exactly, don't need to

511 # check again as we have by this point converted these

512 # to an actual value (rather than a pd.Grouper)

513 assert self.obj is not None # for mypy

514 _, newgrouper, newobj = self.grouping_vector._get_grouper(

515 self.obj, validate=False

516 )

517 self.obj = newobj

518

519 ng = newgrouper._get_grouper()

520 if isinstance(newgrouper, ops.BinGrouper):

521 # in this case we have `ng is newgrouper`

522 self.grouping_vector = ng

523 else:

524 # ops.BaseGrouper

525 # use Index instead of ndarray so we can recover the name

526 self.grouping_vector = Index(ng, name=newgrouper.result_index.name)

527

528 elif is_categorical_dtype(self.grouping_vector):

529 # a passed Categorical

530 self._passed_categorical = True

531

532 self.grouping_vector, self._all_grouper = recode_for_groupby(

533 self.grouping_vector, sort, observed

534 )

535

536 elif not isinstance(

537 self.grouping_vector, (Series, Index, ExtensionArray, np.ndarray)

538 ):

539 # no level passed

540 if getattr(self.grouping_vector, "ndim", 1) != 1:

541 t = self.name or str(type(self.grouping_vector))

542 raise ValueError(f"Grouper for '{t}' not 1-dimensional")

543

544 self.grouping_vector = index.map(self.grouping_vector)

545

546 if not (

547 hasattr(self.grouping_vector, "__len__")

548 and len(self.grouping_vector) == len(index)

549 ):

550 grper = pprint_thing(self.grouping_vector)

551 errmsg = (

552 "Grouper result violates len(labels) == "

553 f"len(data)\nresult: {grper}"

554 )

555 self.grouping_vector = None # Try for sanity

556 raise AssertionError(errmsg)

557

558 if isinstance(self.grouping_vector, np.ndarray):

559 # if we have a date/time-like grouper, make sure that we have

560 # Timestamps like

561 self.grouping_vector = sanitize_to_nanoseconds(self.grouping_vector)

562

563 def __repr__(self) -> str:

564 return f"Grouping({self.name})"

565

566 def __iter__(self):

567 return iter(self.indices)

568

569 @cache_readonly

570 def name(self) -> Hashable:

571 ilevel = self._ilevel

572 if ilevel is not None:

573 return self._index.names[ilevel]

574

575 if isinstance(self._orig_grouper, (Index, Series)):

576 return self._orig_grouper.name

577

578 elif isinstance(self.grouping_vector, ops.BaseGrouper):

579 return self.grouping_vector.result_index.name

580

581 elif isinstance(self.grouping_vector, Index):

582 return self.grouping_vector.name

583

584 # otherwise we have ndarray or ExtensionArray -> no name

585 return None

586

587 @cache_readonly

588 def _ilevel(self) -> int | None:

589 """

590 If necessary, converted index level name to index level position.

591 """

592 level = self.level

593 if level is None:

594 return None

595 if not isinstance(level, int):

596 index = self._index

597 if level not in index.names:

598 raise AssertionError(f"Level {level} not in index")

599 return index.names.index(level)

600 return level

601

602 @property

603 def ngroups(self) -> int:

604 return len(self.group_index)

605

606 @cache_readonly

607 def indices(self) -> dict[Hashable, npt.NDArray[np.intp]]:

608 # we have a list of groupers

609 if isinstance(self.grouping_vector, ops.BaseGrouper):

610 return self.grouping_vector.indices

611

612 values = Categorical(self.grouping_vector)

613 return values._reverse_indexer()

614

615 @property

616 def codes(self) -> npt.NDArray[np.signedinteger]:

617 if self._codes is not None:

618 # _codes is set in __init__ for MultiIndex cases

619 return self._codes

620

621 return self._codes_and_uniques[0]

622

623 @cache_readonly

624 def group_arraylike(self) -> ArrayLike:

625 """

626 Analogous to result_index, but holding an ArrayLike to ensure

627 we can retain ExtensionDtypes.

628 """

629 if self._group_index is not None:

630 # _group_index is set in __init__ for MultiIndex cases

631 return self._group_index._values

632

633 elif self._all_grouper is not None:

634 # retain dtype for categories, including unobserved ones

635 return self.result_index._values

636

637 return self._codes_and_uniques[1]

638

639 @cache_readonly

640 def result_index(self) -> Index:

641 # result_index retains dtype for categories, including unobserved ones,

642 # which group_index does not

643 if self._all_grouper is not None:

644 group_idx = self.group_index

645 assert isinstance(group_idx, CategoricalIndex)

646 return recode_from_groupby(self._all_grouper, self._sort, group_idx)

647 return self.group_index

648

649 @cache_readonly

650 def group_index(self) -> Index:

651 if self._group_index is not None:

652 # _group_index is set in __init__ for MultiIndex cases

653 return self._group_index

654

655 uniques = self._codes_and_uniques[1]

656 return Index._with_infer(uniques, name=self.name)

657

658 @cache_readonly

659 def _codes_and_uniques(self) -> tuple[npt.NDArray[np.signedinteger], ArrayLike]:

660 if self._passed_categorical:

661 # we make a CategoricalIndex out of the cat grouper

662 # preserving the categories / ordered attributes;

663 # doesn't (yet - GH#46909) handle dropna=False

664 cat = self.grouping_vector

665 categories = cat.categories

666

667 if self._observed:

668 ucodes = algorithms.unique1d(cat.codes)

669 ucodes = ucodes[ucodes != -1]

670 if self._sort or cat.ordered:

671 ucodes = np.sort(ucodes)

672 else:

673 ucodes = np.arange(len(categories))

674

675 uniques = Categorical.from_codes(

676 codes=ucodes, categories=categories, ordered=cat.ordered

677 )

678 return cat.codes, uniques

679

680 elif isinstance(self.grouping_vector, ops.BaseGrouper):

681 # we have a list of groupers

682 codes = self.grouping_vector.codes_info

683 # error: Incompatible types in assignment (expression has type "Union

684 # [ExtensionArray, ndarray[Any, Any]]", variable has type "Categorical")

685 uniques = (

686 self.grouping_vector.result_index._values # type: ignore[assignment]

687 )

688 else:

689 # GH35667, replace dropna=False with use_na_sentinel=False

690 # error: Incompatible types in assignment (expression has type "Union[

691 # ndarray[Any, Any], Index]", variable has type "Categorical")

692 codes, uniques = algorithms.factorize( # type: ignore[assignment]

693 self.grouping_vector, sort=self._sort, use_na_sentinel=self._dropna

694 )

695 return codes, uniques

696

697 @cache_readonly

698 def groups(self) -> dict[Hashable, np.ndarray]:

699 return self._index.groupby(Categorical.from_codes(self.codes, self.group_index))

700

701

702def get_grouper(

703 obj: NDFrameT,

704 key=None,

705 axis: int = 0,

706 level=None,

707 sort: bool = True,

708 observed: bool = False,

709 mutated: bool = False,

710 validate: bool = True,

711 dropna: bool = True,

712) -> tuple[ops.BaseGrouper, frozenset[Hashable], NDFrameT]:

713 """

714 Create and return a BaseGrouper, which is an internal

715 mapping of how to create the grouper indexers.

716 This may be composed of multiple Grouping objects, indicating

717 multiple groupers

718

719 Groupers are ultimately index mappings. They can originate as:

720 index mappings, keys to columns, functions, or Groupers

721

722 Groupers enable local references to axis,level,sort, while

723 the passed in axis, level, and sort are 'global'.

724

725 This routine tries to figure out what the passing in references

726 are and then creates a Grouping for each one, combined into

727 a BaseGrouper.

728

729 If observed & we have a categorical grouper, only show the observed

730 values.

731

732 If validate, then check for key/level overlaps.

733

734 """

735 group_axis = obj._get_axis(axis)

736

737 # validate that the passed single level is compatible with the passed

738 # axis of the object

739 if level is not None:

740 # TODO: These if-block and else-block are almost same.

741 # MultiIndex instance check is removable, but it seems that there are

742 # some processes only for non-MultiIndex in else-block,

743 # eg. `obj.index.name != level`. We have to consider carefully whether

744 # these are applicable for MultiIndex. Even if these are applicable,

745 # we need to check if it makes no side effect to subsequent processes

746 # on the outside of this condition.

747 # (GH 17621)

748 if isinstance(group_axis, MultiIndex):

749 if is_list_like(level) and len(level) == 1:

750 level = level[0]

751

752 if key is None and is_scalar(level):

753 # Get the level values from group_axis

754 key = group_axis.get_level_values(level)

755 level = None

756

757 else:

758 # allow level to be a length-one list-like object

759 # (e.g., level=[0])

760 # GH 13901

761 if is_list_like(level):

762 nlevels = len(level)

763 if nlevels == 1:

764 level = level[0]

765 elif nlevels == 0:

766 raise ValueError("No group keys passed!")

767 else:

768 raise ValueError("multiple levels only valid with MultiIndex")

769

770 if isinstance(level, str):

771 if obj._get_axis(axis).name != level:

772 raise ValueError(

773 f"level name {level} is not the name "

774 f"of the {obj._get_axis_name(axis)}"

775 )

776 elif level > 0 or level < -1:

777 raise ValueError("level > 0 or level < -1 only valid with MultiIndex")

778

779 # NOTE: `group_axis` and `group_axis.get_level_values(level)`

780 # are same in this section.

781 level = None

782 key = group_axis

783

784 # a passed-in Grouper, directly convert

785 if isinstance(key, Grouper):

786 binner, grouper, obj = key._get_grouper(obj, validate=False)

787 if key.key is None:

788 return grouper, frozenset(), obj

789 else:

790 return grouper, frozenset({key.key}), obj

791

792 # already have a BaseGrouper, just return it

793 elif isinstance(key, ops.BaseGrouper):

794 return key, frozenset(), obj

795

796 if not isinstance(key, list):

797 keys = [key]

798 match_axis_length = False

799 else:

800 keys = key

801 match_axis_length = len(keys) == len(group_axis)

802

803 # what are we after, exactly?

804 any_callable = any(callable(g) or isinstance(g, dict) for g in keys)

805 any_groupers = any(isinstance(g, (Grouper, Grouping)) for g in keys)

806 any_arraylike = any(

807 isinstance(g, (list, tuple, Series, Index, np.ndarray)) for g in keys

808 )

809

810 # is this an index replacement?

811 if (

812 not any_callable

813 and not any_arraylike

814 and not any_groupers

815 and match_axis_length

816 and level is None

817 ):

818 if isinstance(obj, DataFrame):

819 all_in_columns_index = all(

820 g in obj.columns or g in obj.index.names for g in keys

821 )

822 else:

823 assert isinstance(obj, Series)

824 all_in_columns_index = all(g in obj.index.names for g in keys)

825

826 if not all_in_columns_index:

827 keys = [com.asarray_tuplesafe(keys)]

828

829 if isinstance(level, (tuple, list)):

830 if key is None:

831 keys = [None] * len(level)

832 levels = level

833 else:

834 levels = [level] * len(keys)

835

836 groupings: list[Grouping] = []

837 exclusions: set[Hashable] = set()

838

839 # if the actual grouper should be obj[key]

840 def is_in_axis(key) -> bool:

841

842 if not _is_label_like(key):

843 if obj.ndim == 1:

844 return False

845

846 # items -> .columns for DataFrame, .index for Series

847 items = obj.axes[-1]

848 try:

849 items.get_loc(key)

850 except (KeyError, TypeError, InvalidIndexError):

851 # TypeError shows up here if we pass e.g. Int64Index

852 return False

853

854 return True

855

856 # if the grouper is obj[name]

857 def is_in_obj(gpr) -> bool:

858 if not hasattr(gpr, "name"):

859 return False

860 try:

861 return gpr is obj[gpr.name]

862 except (KeyError, IndexError, InvalidIndexError):

863 # IndexError reached in e.g. test_skip_group_keys when we pass

864 # lambda here

865 # InvalidIndexError raised on key-types inappropriate for index,

866 # e.g. DatetimeIndex.get_loc(tuple())

867 return False

868

869 for gpr, level in zip(keys, levels):

870

871 if is_in_obj(gpr): # df.groupby(df['name'])

872 in_axis = True

873 exclusions.add(gpr.name)

874

875 elif is_in_axis(gpr): # df.groupby('name')

876 if gpr in obj:

877 if validate:

878 obj._check_label_or_level_ambiguity(gpr, axis=axis)

879 in_axis, name, gpr = True, gpr, obj[gpr]

880 if gpr.ndim != 1:

881 # non-unique columns; raise here to get the name in the

882 # exception message

883 raise ValueError(f"Grouper for '{name}' not 1-dimensional")

884 exclusions.add(name)

885 elif obj._is_level_reference(gpr, axis=axis):

886 in_axis, level, gpr = False, gpr, None

887 else:

888 raise KeyError(gpr)

889 elif isinstance(gpr, Grouper) and gpr.key is not None:

890 # Add key to exclusions

891 exclusions.add(gpr.key)

892 in_axis = False

893 else:

894 in_axis = False

895

896 # create the Grouping

897 # allow us to passing the actual Grouping as the gpr

898 ping = (

899 Grouping(

900 group_axis,

901 gpr,

902 obj=obj,

903 level=level,

904 sort=sort,

905 observed=observed,

906 in_axis=in_axis,

907 dropna=dropna,

908 )

909 if not isinstance(gpr, Grouping)

910 else gpr

911 )

912

913 groupings.append(ping)

914

915 if len(groupings) == 0 and len(obj):

916 raise ValueError("No group keys passed!")

917 elif len(groupings) == 0:

918 groupings.append(Grouping(Index([], dtype="int"), np.array([], dtype=np.intp)))

919

920 # create the internals grouper

921 grouper = ops.BaseGrouper(

922 group_axis, groupings, sort=sort, mutated=mutated, dropna=dropna

923 )

924 return grouper, frozenset(exclusions), obj

925

926

927def _is_label_like(val) -> bool:

928 return isinstance(val, (str, tuple)) or (val is not None and is_scalar(val))

929

930

931def _convert_grouper(axis: Index, grouper):

932 if isinstance(grouper, dict):

933 return grouper.get

934 elif isinstance(grouper, Series):

935 if grouper.index.equals(axis):

936 return grouper._values

937 else:

938 return grouper.reindex(axis)._values

939 elif isinstance(grouper, MultiIndex):

940 return grouper._values

941 elif isinstance(grouper, (list, tuple, Index, Categorical, np.ndarray)):

942 if len(grouper) != len(axis):

943 raise ValueError("Grouper and axis must be same length")

944

945 if isinstance(grouper, (list, tuple)):

946 grouper = com.asarray_tuplesafe(grouper)

947 return grouper

948 else:

949 return grouper

950

951

952def _check_deprecated_resample_kwargs(kwargs, origin):

953 """

954 Check for use of deprecated parameters in ``resample`` and related functions.

955

956 Raises the appropriate warnings if these parameters are detected.

957 Only sets an approximate ``stacklevel`` for the warnings (see #37603, #36629).

958

959 Parameters

960 ----------

961 kwargs : dict

962 Dictionary of keyword arguments to check for deprecated parameters.

963 origin : object

964 From where this function is being called; either Grouper or TimeGrouper. Used

965 to determine an approximate stacklevel.

966 """

967 # Deprecation warning of `base` and `loffset` since v1.1.0:

968 # we are raising the warning here to be able to set the `stacklevel`

969 # properly since we need to raise the `base` and `loffset` deprecation

970 # warning from three different cases:

971 # core/generic.py::NDFrame.resample

972 # core/groupby/groupby.py::GroupBy.resample

973 # core/groupby/grouper.py::Grouper

974 # raising these warnings from TimeGrouper directly would fail the test:

975 # tests/resample/test_deprecated.py::test_deprecating_on_loffset_and_base

976

977 if kwargs.get("base", None) is not None:

978 warnings.warn(

979 "'base' in .resample() and in Grouper() is deprecated.\n"

980 "The new arguments that you should use are 'offset' or 'origin'.\n"

981 '\n>>> df.resample(freq="3s", base=2)\n'

982 "\nbecomes:\n"

983 '\n>>> df.resample(freq="3s", offset="2s")\n',

984 FutureWarning,

985 stacklevel=find_stack_level(),

986 )

987 if kwargs.get("loffset", None) is not None:

988 warnings.warn(

989 "'loffset' in .resample() and in Grouper() is deprecated.\n"

990 '\n>>> df.resample(freq="3s", loffset="8H")\n'

991 "\nbecomes:\n"

992 "\n>>> from pandas.tseries.frequencies import to_offset"

993 '\n>>> df = df.resample(freq="3s").mean()'

994 '\n>>> df.index = df.index.to_timestamp() + to_offset("8H")\n',

995 FutureWarning,

996 stacklevel=find_stack_level(),

997 )