Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/generic.py: 19%

1# pyright: reportPropertyTypeMismatch=false

2from __future__ import annotations

4import collections

5from datetime import timedelta

6import functools

7import gc

8import json

9import operator

10import pickle

11import re

12from typing import (

13 TYPE_CHECKING,

14 Any,

15 Callable,

16 ClassVar,

17 Hashable,

18 Literal,

19 Mapping,

20 NoReturn,

21 Sequence,

22 Type,

23 cast,

24 final,

25 overload,

26)

27import warnings

28import weakref

30import numpy as np

32from pandas._config import config

34from pandas._libs import lib

35from pandas._libs.tslibs import (

36 Period,

37 Tick,

38 Timestamp,

39 to_offset,

40)

41from pandas._typing import (

42 AnyArrayLike,

43 ArrayLike,

44 Axis,

45 ColspaceArgType,

46 CompressionOptions,

47 Dtype,

48 DtypeArg,

49 DtypeObj,

50 FilePath,

51 FillnaOptions,

52 FloatFormatType,

53 FormattersType,

54 Frequency,

55 IgnoreRaise,

56 IndexKeyFunc,

57 IndexLabel,

58 IntervalClosedType,

59 JSONSerializable,

60 Level,

61 Manager,

62 NaPosition,

63 NDFrameT,

64 RandomState,

65 Renamer,

66 SortKind,

67 StorageOptions,

68 Suffixes,

69 T,

70 TimedeltaConvertibleTypes,

71 TimestampConvertibleTypes,

72 ValueKeyFunc,

73 WriteBuffer,

74 npt,

75)

76from pandas.compat._optional import import_optional_dependency

77from pandas.compat.numpy import function as nv

78from pandas.errors import (

79 AbstractMethodError,

80 InvalidIndexError,

81 SettingWithCopyError,

82 SettingWithCopyWarning,

83)

84from pandas.util._decorators import (

85 deprecate_kwarg,

86 deprecate_nonkeyword_arguments,

87 doc,

88 rewrite_axis_style_signature,

89)

90from pandas.util._exceptions import find_stack_level

91from pandas.util._validators import (

92 validate_ascending,

93 validate_bool_kwarg,

94 validate_fillna_kwargs,

95 validate_inclusive,

96)

98from pandas.core.dtypes.common import (

99 ensure_object,

100 ensure_platform_int,

101 ensure_str,

102 is_bool,

103 is_bool_dtype,

104 is_datetime64_any_dtype,

105 is_datetime64tz_dtype,

106 is_dict_like,

107 is_dtype_equal,

108 is_extension_array_dtype,

109 is_float,

110 is_list_like,

111 is_number,

112 is_numeric_dtype,

113 is_re_compilable,

114 is_scalar,

115 is_timedelta64_dtype,

116 pandas_dtype,

117)

118from pandas.core.dtypes.generic import (

119 ABCDataFrame,

120 ABCSeries,

121)

122from pandas.core.dtypes.inference import (

123 is_hashable,

124 is_nested_list_like,

125)

126from pandas.core.dtypes.missing import (

127 isna,

128 notna,

129)

130

131from pandas.core import (

132 algorithms as algos,

133 arraylike,

134 common as com,

135 indexing,

136 missing,

137 nanops,

138 sample,

139)

140from pandas.core.array_algos.replace import should_use_regex

141from pandas.core.arrays import ExtensionArray

142from pandas.core.base import PandasObject

143from pandas.core.construction import (

144 create_series_with_explicit_dtype,

145 extract_array,

146)

147from pandas.core.describe import describe_ndframe

148from pandas.core.flags import Flags

149from pandas.core.indexes.api import (

150 DatetimeIndex,

151 Index,

152 MultiIndex,

153 PeriodIndex,

154 RangeIndex,

155 default_index,

156 ensure_index,

157)

158from pandas.core.internals import (

159 ArrayManager,

160 BlockManager,

161 SingleArrayManager,

162)

163from pandas.core.internals.construction import mgr_to_mgr

164from pandas.core.missing import find_valid_index

165from pandas.core.ops import align_method_FRAME

166from pandas.core.reshape.concat import concat

167from pandas.core.shared_docs import _shared_docs

168from pandas.core.sorting import get_indexer_indexer

169from pandas.core.window import (

170 Expanding,

171 ExponentialMovingWindow,

172 Rolling,

173 Window,

174)

175

176from pandas.io.formats import format as fmt

177from pandas.io.formats.format import (

178 DataFrameFormatter,

179 DataFrameRenderer,

180)

181from pandas.io.formats.printing import pprint_thing

182

183if TYPE_CHECKING: 183 ↛ 185line 183 didn't jump to line 185, because the condition on line 183 was never true

184

185 from pandas._libs.tslibs import BaseOffset

186

187 from pandas.core.frame import DataFrame

188 from pandas.core.indexers.objects import BaseIndexer

189 from pandas.core.resample import Resampler

190 from pandas.core.series import Series

191

192 from pandas.io.pytables import HDFStore

193

194

195# goal is to be able to define the docs close to function, while still being

196# able to share

197_shared_docs = {**_shared_docs}

198_shared_doc_kwargs = {

199 "axes": "keywords for axes",

200 "klass": "Series/DataFrame",

201 "axes_single_arg": "int or labels for object",

202 "args_transpose": "axes to permute (int or label for object)",

203 "inplace": """

204 inplace : bool, default False

205 If True, performs operation inplace and returns None.""",

206 "optional_by": """

207 by : str or list of str

208 Name or list of names to sort by""",

209 "replace_iloc": """

210 This differs from updating with ``.loc`` or ``.iloc``, which require

211 you to specify a location to update with some value.""",

212}

213

214

215bool_t = bool # Need alias because NDFrame has def bool:

216

217

218class NDFrame(PandasObject, indexing.IndexingMixin):

219 """

220 N-dimensional analogue of DataFrame. Store multi-dimensional in a

221 size-mutable, labeled data structure

222

223 Parameters

224 ----------

225 data : BlockManager

226 axes : list

227 copy : bool, default False

228 """

229

230 _internal_names: list[str] = [

231 "_mgr",

232 "_cacher",

233 "_item_cache",

234 "_cache",

235 "_is_copy",

236 "_subtyp",

237 "_name",

238 "_default_kind",

239 "_default_fill_value",

240 "_metadata",

241 "__array_struct__",

242 "__array_interface__",

243 "_flags",

244 ]

245 _internal_names_set: set[str] = set(_internal_names)

246 _accessors: set[str] = set()

247 _hidden_attrs: frozenset[str] = frozenset(

248 ["_AXIS_NAMES", "_AXIS_NUMBERS", "get_values", "tshift"]

249 )

250 _metadata: list[str] = []

251 _is_copy: weakref.ReferenceType[NDFrame] | None = None

252 _mgr: Manager

253 _attrs: dict[Hashable, Any]

254 _typ: str

255

256 # ----------------------------------------------------------------------

257 # Constructors

258

259 def __init__(

260 self,

261 data: Manager,

262 copy: bool_t = False,

263 attrs: Mapping[Hashable, Any] | None = None,

264 ) -> None:

265 # copy kwarg is retained for mypy compat, is not used

266

267 object.__setattr__(self, "_is_copy", None)

268 object.__setattr__(self, "_mgr", data)

269 object.__setattr__(self, "_item_cache", {})

270 if attrs is None:

271 attrs = {}

272 else:

273 attrs = dict(attrs)

274 object.__setattr__(self, "_attrs", attrs)

275 object.__setattr__(self, "_flags", Flags(self, allows_duplicate_labels=True))

276

277 @classmethod

278 def _init_mgr(

279 cls,

280 mgr: Manager,

281 axes,

282 dtype: Dtype | None = None,

283 copy: bool_t = False,

284 ) -> Manager:

285 """passed a manager and a axes dict"""

286 for a, axe in axes.items():

287 if axe is not None:

288 axe = ensure_index(axe)

289 bm_axis = cls._get_block_manager_axis(a)

290 mgr = mgr.reindex_axis(axe, axis=bm_axis)

291

292 # make a copy if explicitly requested

293 if copy:

294 mgr = mgr.copy()

295 if dtype is not None:

296 # avoid further copies if we can

297 if (

298 isinstance(mgr, BlockManager)

299 and len(mgr.blocks) == 1

300 and is_dtype_equal(mgr.blocks[0].values.dtype, dtype)

301 ):

302 pass

303 else:

304 mgr = mgr.astype(dtype=dtype)

305 return mgr

306

307 def _as_manager(self: NDFrameT, typ: str, copy: bool_t = True) -> NDFrameT:

308 """

309 Private helper function to create a DataFrame with specific manager.

310

311 Parameters

312 ----------

313 typ : {"block", "array"}

314 copy : bool, default True

315 Only controls whether the conversion from Block->ArrayManager

316 copies the 1D arrays (to ensure proper/contiguous memory layout).

317

318 Returns

319 -------

320 DataFrame

321 New DataFrame using specified manager type. Is not guaranteed

322 to be a copy or not.

323 """

324 new_mgr: Manager

325 new_mgr = mgr_to_mgr(self._mgr, typ=typ, copy=copy)

326 # fastpath of passing a manager doesn't check the option/manager class

327 return self._constructor(new_mgr).__finalize__(self)

328

329 # ----------------------------------------------------------------------

330 # attrs and flags

331

332 @property

333 def attrs(self) -> dict[Hashable, Any]:

334 """

335 Dictionary of global attributes of this dataset.

336

337 .. warning::

338

339 attrs is experimental and may change without warning.

340

341 See Also

342 --------

343 DataFrame.flags : Global flags applying to this object.

344 """

345 if self._attrs is None:

346 self._attrs = {}

347 return self._attrs

348

349 @attrs.setter

350 def attrs(self, value: Mapping[Hashable, Any]) -> None:

351 self._attrs = dict(value)

352

353 @final

354 @property

355 def flags(self) -> Flags:

356 """

357 Get the properties associated with this pandas object.

358

359 The available flags are

360

361 * :attr:`Flags.allows_duplicate_labels`

362

363 See Also

364 --------

365 Flags : Flags that apply to pandas objects.

366 DataFrame.attrs : Global metadata applying to this dataset.

367

368 Notes

369 -----

370 "Flags" differ from "metadata". Flags reflect properties of the

371 pandas object (the Series or DataFrame). Metadata refer to properties

372 of the dataset, and should be stored in :attr:`DataFrame.attrs`.

373

374 Examples

375 --------

376 >>> df = pd.DataFrame({"A": [1, 2]})

377 >>> df.flags

378 <Flags(allows_duplicate_labels=True)>

379

380 Flags can be get or set using ``.``

381

382 >>> df.flags.allows_duplicate_labels

383 True

384 >>> df.flags.allows_duplicate_labels = False

385

386 Or by slicing with a key

387

388 >>> df.flags["allows_duplicate_labels"]

389 False

390 >>> df.flags["allows_duplicate_labels"] = True

391 """

392 return self._flags

393

394 @final

395 def set_flags(

396 self: NDFrameT,

397 *,

398 copy: bool_t = False,

399 allows_duplicate_labels: bool_t | None = None,

400 ) -> NDFrameT:

401 """

402 Return a new object with updated flags.

403

404 Parameters

405 ----------

406 allows_duplicate_labels : bool, optional

407 Whether the returned object allows duplicate labels.

408

409 Returns

410 -------

411 Series or DataFrame

412 The same type as the caller.

413

414 See Also

415 --------

416 DataFrame.attrs : Global metadata applying to this dataset.

417 DataFrame.flags : Global flags applying to this object.

418

419 Notes

420 -----

421 This method returns a new object that's a view on the same data

422 as the input. Mutating the input or the output values will be reflected

423 in the other.

424

425 This method is intended to be used in method chains.

426

427 "Flags" differ from "metadata". Flags reflect properties of the

428 pandas object (the Series or DataFrame). Metadata refer to properties

429 of the dataset, and should be stored in :attr:`DataFrame.attrs`.

430

431 Examples

432 --------

433 >>> df = pd.DataFrame({"A": [1, 2]})

434 >>> df.flags.allows_duplicate_labels

435 True

436 >>> df2 = df.set_flags(allows_duplicate_labels=False)

437 >>> df2.flags.allows_duplicate_labels

438 False

439 """

440 df = self.copy(deep=copy)

441 if allows_duplicate_labels is not None:

442 df.flags["allows_duplicate_labels"] = allows_duplicate_labels

443 return df

444

445 @final

446 @classmethod

447 def _validate_dtype(cls, dtype) -> DtypeObj | None:

448 """validate the passed dtype"""

449 if dtype is not None:

450 dtype = pandas_dtype(dtype)

451

452 # a compound dtype

453 if dtype.kind == "V":

454 raise NotImplementedError(

455 "compound dtypes are not implemented "

456 f"in the {cls.__name__} constructor"

457 )

458

459 return dtype

460

461 # ----------------------------------------------------------------------

462 # Construction

463

464 @property

465 def _constructor(self: NDFrameT) -> Callable[..., NDFrameT]:

466 """

467 Used when a manipulation result has the same dimensions as the

468 original.

469 """

470 raise AbstractMethodError(self)

471

472 # ----------------------------------------------------------------------

473 # Internals

474

475 @final

476 @property

477 def _data(self):

478 # GH#33054 retained because some downstream packages uses this,

479 # e.g. fastparquet

480 return self._mgr

481

482 # ----------------------------------------------------------------------

483 # Axis

484 _stat_axis_number = 0

485 _stat_axis_name = "index"

486 _AXIS_ORDERS: list[str]

487 _AXIS_TO_AXIS_NUMBER: dict[Axis, int] = {0: 0, "index": 0, "rows": 0}

488 _info_axis_number: int

489 _info_axis_name: str

490 _AXIS_LEN: int

491

492 @property

493 def _AXIS_NUMBERS(self) -> dict[str, int]:

494 """.. deprecated:: 1.1.0"""

495 warnings.warn(

496 "_AXIS_NUMBERS has been deprecated.",

497 FutureWarning,

498 stacklevel=find_stack_level(),

499 )

500 return {"index": 0}

501

502 @property

503 def _AXIS_NAMES(self) -> dict[int, str]:

504 """.. deprecated:: 1.1.0"""

505 level = self.ndim + 1

506 warnings.warn(

507 "_AXIS_NAMES has been deprecated.", FutureWarning, stacklevel=level

508 )

509 return {0: "index"}

510

511 @final

512 def _construct_axes_dict(self, axes=None, **kwargs):

513 """Return an axes dictionary for myself."""

514 d = {a: self._get_axis(a) for a in (axes or self._AXIS_ORDERS)}

515 d.update(kwargs)

516 return d

517

518 @final

519 @classmethod

520 def _construct_axes_from_arguments(

521 cls, args, kwargs, require_all: bool_t = False, sentinel=None

522 ):

523 """

524 Construct and returns axes if supplied in args/kwargs.

525

526 If require_all, raise if all axis arguments are not supplied

527 return a tuple of (axes, kwargs).

528

529 sentinel specifies the default parameter when an axis is not

530 supplied; useful to distinguish when a user explicitly passes None

531 in scenarios where None has special meaning.

532 """

533 # construct the args

534 args = list(args)

535 for a in cls._AXIS_ORDERS:

536

537 # look for a argument by position

538 if a not in kwargs:

539 try:

540 kwargs[a] = args.pop(0)

541 except IndexError as err:

542 if require_all:

543 raise TypeError(

544 "not enough/duplicate arguments specified!"

545 ) from err

546

547 axes = {a: kwargs.pop(a, sentinel) for a in cls._AXIS_ORDERS}

548 return axes, kwargs

549

550 @final

551 @classmethod

552 def _get_axis_number(cls, axis: Axis) -> int:

553 try:

554 return cls._AXIS_TO_AXIS_NUMBER[axis]

555 except KeyError:

556 raise ValueError(f"No axis named {axis} for object type {cls.__name__}")

557

558 @final

559 @classmethod

560 def _get_axis_name(cls, axis: Axis) -> str:

561 axis_number = cls._get_axis_number(axis)

562 return cls._AXIS_ORDERS[axis_number]

563

564 @final

565 def _get_axis(self, axis: Axis) -> Index:

566 axis_number = self._get_axis_number(axis)

567 assert axis_number in {0, 1}

568 return self.index if axis_number == 0 else self.columns

569

570 @final

571 @classmethod

572 def _get_block_manager_axis(cls, axis: Axis) -> int:

573 """Map the axis to the block_manager axis."""

574 axis = cls._get_axis_number(axis)

575 ndim = cls._AXIS_LEN

576 if ndim == 2:

577 # i.e. DataFrame

578 return 1 - axis

579 return axis

580

581 @final

582 def _get_axis_resolvers(self, axis: str) -> dict[str, Series | MultiIndex]:

583 # index or columns

584 axis_index = getattr(self, axis)

585 d = {}

586 prefix = axis[0]

587

588 for i, name in enumerate(axis_index.names):

589 if name is not None:

590 key = level = name

591 else:

592 # prefix with 'i' or 'c' depending on the input axis

593 # e.g., you must do ilevel_0 for the 0th level of an unnamed

594 # multiiindex

595 key = f"{prefix}level_{i}"

596 level = i

597

598 level_values = axis_index.get_level_values(level)

599 s = level_values.to_series()

600 s.index = axis_index

601 d[key] = s

602

603 # put the index/columns itself in the dict

604 if isinstance(axis_index, MultiIndex):

605 dindex = axis_index

606 else:

607 dindex = axis_index.to_series()

608

609 d[axis] = dindex

610 return d

611

612 @final

613 def _get_index_resolvers(self) -> dict[Hashable, Series | MultiIndex]:

614 from pandas.core.computation.parsing import clean_column_name

615

616 d: dict[str, Series | MultiIndex] = {}

617 for axis_name in self._AXIS_ORDERS:

618 d.update(self._get_axis_resolvers(axis_name))

619

620 return {clean_column_name(k): v for k, v in d.items() if not isinstance(k, int)}

621

622 @final

623 def _get_cleaned_column_resolvers(self) -> dict[Hashable, Series]:

624 """

625 Return the special character free column resolvers of a dataframe.

626

627 Column names with special characters are 'cleaned up' so that they can

628 be referred to by backtick quoting.

629 Used in :meth:`DataFrame.eval`.

630 """

631 from pandas.core.computation.parsing import clean_column_name

632

633 if isinstance(self, ABCSeries):

634 return {clean_column_name(self.name): self}

635

636 return {

637 clean_column_name(k): v for k, v in self.items() if not isinstance(k, int)

638 }

639

640 @property

641 def _info_axis(self) -> Index:

642 return getattr(self, self._info_axis_name)

643

644 @property

645 def _stat_axis(self) -> Index:

646 return getattr(self, self._stat_axis_name)

647

648 @property

649 def shape(self) -> tuple[int, ...]:

650 """

651 Return a tuple of axis dimensions

652 """

653 return tuple(len(self._get_axis(a)) for a in self._AXIS_ORDERS)

654

655 @property

656 def axes(self) -> list[Index]:

657 """

658 Return index label(s) of the internal NDFrame

659 """

660 # we do it this way because if we have reversed axes, then

661 # the block manager shows then reversed

662 return [self._get_axis(a) for a in self._AXIS_ORDERS]

663

664 @property

665 def ndim(self) -> int:

666 """

667 Return an int representing the number of axes / array dimensions.

668

669 Return 1 if Series. Otherwise return 2 if DataFrame.

670

671 See Also

672 --------

673 ndarray.ndim : Number of array dimensions.

674

675 Examples

676 --------

677 >>> s = pd.Series({'a': 1, 'b': 2, 'c': 3})

678 >>> s.ndim

679 1

680

681 >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})

682 >>> df.ndim

683 2

684 """

685 return self._mgr.ndim

686

687 @property

688 def size(self) -> int:

689 """

690 Return an int representing the number of elements in this object.

691

692 Return the number of rows if Series. Otherwise return the number of

693 rows times number of columns if DataFrame.

694

695 See Also

696 --------

697 ndarray.size : Number of elements in the array.

698

699 Examples

700 --------

701 >>> s = pd.Series({'a': 1, 'b': 2, 'c': 3})

702 >>> s.size

703 3

704

705 >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})

706 >>> df.size

707 4

708 """

709 # error: Incompatible return value type (got "signedinteger[_64Bit]",

710 # expected "int") [return-value]

711 return np.prod(self.shape) # type: ignore[return-value]

712

713 @overload

714 def set_axis(

715 self: NDFrameT,

716 labels,

717 *,

718 axis: Axis = ...,

719 inplace: Literal[False] | lib.NoDefault = ...,

720 copy: bool_t | lib.NoDefault = ...,

721 ) -> NDFrameT:

722 ...

723

724 @overload

725 def set_axis(

726 self,

727 labels,

728 *,

729 axis: Axis = ...,

730 inplace: Literal[True],

731 copy: bool_t | lib.NoDefault = ...,

732 ) -> None:

733 ...

734

735 @overload

736 def set_axis(

737 self: NDFrameT,

738 labels,

739 *,

740 axis: Axis = ...,

741 inplace: bool_t | lib.NoDefault = ...,

742 copy: bool_t | lib.NoDefault = ...,

743 ) -> NDFrameT | None:

744 ...

745

746 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "labels"])

747 def set_axis(

748 self: NDFrameT,

749 labels,

750 axis: Axis = 0,

751 inplace: bool_t | lib.NoDefault = lib.no_default,

752 *,

753 copy: bool_t | lib.NoDefault = lib.no_default,

754 ) -> NDFrameT | None:

755 """

756 Assign desired index to given axis.

757

758 Indexes for%(extended_summary_sub)s row labels can be changed by assigning

759 a list-like or Index.

760

761 Parameters

762 ----------

763 labels : list-like, Index

764 The values for the new index.

765

766 axis : %(axes_single_arg)s, default 0

767 The axis to update. The value 0 identifies the rows. For `Series`

768 this parameter is unused and defaults to 0.

769

770 inplace : bool, default False

771 Whether to return a new %(klass)s instance.

772

773 .. deprecated:: 1.5.0

774

775 copy : bool, default True

776 Whether to make a copy of the underlying data.

777

778 .. versionadded:: 1.5.0

779

780 Returns

781 -------

782 renamed : %(klass)s or None

783 An object of type %(klass)s or None if ``inplace=True``.

784

785 See Also

786 --------

787 %(klass)s.rename_axis : Alter the name of the index%(see_also_sub)s.

788 """

789 if inplace is not lib.no_default:

790 warnings.warn(

791 f"{type(self).__name__}.set_axis 'inplace' keyword is deprecated "

792 "and will be removed in a future version. Use "

793 "`obj = obj.set_axis(..., copy=False)` instead",

794 FutureWarning,

795 stacklevel=find_stack_level(),

796 )

797 else:

798 inplace = False

799

800 if inplace:

801 if copy is True:

802 raise ValueError("Cannot specify both inplace=True and copy=True")

803 copy = False

804 elif copy is lib.no_default:

805 copy = True

806

807 self._check_inplace_and_allows_duplicate_labels(inplace)

808 return self._set_axis_nocheck(labels, axis, inplace, copy=copy)

809

810 @final

811 def _set_axis_nocheck(self, labels, axis: Axis, inplace: bool_t, copy: bool_t):

812 if inplace:

813 setattr(self, self._get_axis_name(axis), labels)

814 else:

815 # With copy=False, we create a new object but don't copy the

816 # underlying data.

817 obj = self.copy(deep=copy)

818 setattr(obj, obj._get_axis_name(axis), labels)

819 return obj

820

821 def _set_axis(self, axis: int, labels: AnyArrayLike | list) -> None:

822 labels = ensure_index(labels)

823 self._mgr.set_axis(axis, labels)

824 self._clear_item_cache()

825

826 @final

827 def swapaxes(

828 self: NDFrameT, axis1: Axis, axis2: Axis, copy: bool_t = True

829 ) -> NDFrameT:

830 """

831 Interchange axes and swap values axes appropriately.

832

833 Returns

834 -------

835 y : same as input

836 """

837 i = self._get_axis_number(axis1)

838 j = self._get_axis_number(axis2)

839

840 if i == j:

841 if copy:

842 return self.copy()

843 return self

844

845 mapping = {i: j, j: i}

846

847 new_axes = (self._get_axis(mapping.get(k, k)) for k in range(self._AXIS_LEN))

848 new_values = self.values.swapaxes(i, j)

849 if copy:

850 new_values = new_values.copy()

851

852 return self._constructor(

853 new_values,

854 *new_axes,

855 ).__finalize__(self, method="swapaxes")

856

857 @final

858 @doc(klass=_shared_doc_kwargs["klass"])

859 def droplevel(self: NDFrameT, level: IndexLabel, axis: Axis = 0) -> NDFrameT:

860 """

861 Return {klass} with requested index / column level(s) removed.

862

863 Parameters

864 ----------

865 level : int, str, or list-like

866 If a string is given, must be the name of a level

867 If list-like, elements must be names or positional indexes

868 of levels.

869

870 axis : {{0 or 'index', 1 or 'columns'}}, default 0

871 Axis along which the level(s) is removed:

872

873 * 0 or 'index': remove level(s) in column.

874 * 1 or 'columns': remove level(s) in row.

875

876 For `Series` this parameter is unused and defaults to 0.

877

878 Returns

879 -------

880 {klass}

881 {klass} with requested index / column level(s) removed.

882

883 Examples

884 --------

885 >>> df = pd.DataFrame([

886 ... [1, 2, 3, 4],

887 ... [5, 6, 7, 8],

888 ... [9, 10, 11, 12]

889 ... ]).set_index([0, 1]).rename_axis(['a', 'b'])

890

891 >>> df.columns = pd.MultiIndex.from_tuples([

892 ... ('c', 'e'), ('d', 'f')

893 ... ], names=['level_1', 'level_2'])

894

895 >>> df

896 level_1 c d

897 level_2 e f

898 a b

899 1 2 3 4

900 5 6 7 8

901 9 10 11 12

902

903 >>> df.droplevel('a')

904 level_1 c d

905 level_2 e f

906 b

907 2 3 4

908 6 7 8

909 10 11 12

910

911 >>> df.droplevel('level_2', axis=1)

912 level_1 c d

913 a b

914 1 2 3 4

915 5 6 7 8

916 9 10 11 12

917 """

918 labels = self._get_axis(axis)

919 new_labels = labels.droplevel(level)

920 return self.set_axis(new_labels, axis=axis)

921

922 def pop(self, item: Hashable) -> Series | Any:

923 result = self[item]

924 del self[item]

925

926 return result

927

928 @final

929 def squeeze(self, axis=None):

930 """

931 Squeeze 1 dimensional axis objects into scalars.

932

933 Series or DataFrames with a single element are squeezed to a scalar.

934 DataFrames with a single column or a single row are squeezed to a

935 Series. Otherwise the object is unchanged.

936

937 This method is most useful when you don't know if your

938 object is a Series or DataFrame, but you do know it has just a single

939 column. In that case you can safely call `squeeze` to ensure you have a

940 Series.

941

942 Parameters

943 ----------

944 axis : {0 or 'index', 1 or 'columns', None}, default None

945 A specific axis to squeeze. By default, all length-1 axes are

946 squeezed. For `Series` this parameter is unused and defaults to `None`.

947

948 Returns

949 -------

950 DataFrame, Series, or scalar

951 The projection after squeezing `axis` or all the axes.

952

953 See Also

954 --------

955 Series.iloc : Integer-location based indexing for selecting scalars.

956 DataFrame.iloc : Integer-location based indexing for selecting Series.

957 Series.to_frame : Inverse of DataFrame.squeeze for a

958 single-column DataFrame.

959

960 Examples

961 --------

962 >>> primes = pd.Series([2, 3, 5, 7])

963

964 Slicing might produce a Series with a single value:

965

966 >>> even_primes = primes[primes % 2 == 0]

967 >>> even_primes

968 0 2

969 dtype: int64

970

971 >>> even_primes.squeeze()

972 2

973

974 Squeezing objects with more than one value in every axis does nothing:

975

976 >>> odd_primes = primes[primes % 2 == 1]

977 >>> odd_primes

978 1 3

979 2 5

980 3 7

981 dtype: int64

982

983 >>> odd_primes.squeeze()

984 1 3

985 2 5

986 3 7

987 dtype: int64

988

989 Squeezing is even more effective when used with DataFrames.

990

991 >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=['a', 'b'])

992 >>> df

993 a b

994 0 1 2

995 1 3 4

996

997 Slicing a single column will produce a DataFrame with the columns

998 having only one value:

999

1000 >>> df_a = df[['a']]

1001 >>> df_a

1002 a

1003 0 1

1004 1 3

1005

1006 So the columns can be squeezed down, resulting in a Series:

1007

1008 >>> df_a.squeeze('columns')

1009 0 1

1010 1 3

1011 Name: a, dtype: int64

1012

1013 Slicing a single row from a single column will produce a single

1014 scalar DataFrame:

1015

1016 >>> df_0a = df.loc[df.index < 1, ['a']]

1017 >>> df_0a

1018 a

1019 0 1

1020

1021 Squeezing the rows produces a single scalar Series:

1022

1023 >>> df_0a.squeeze('rows')

1024 a 1

1025 Name: 0, dtype: int64

1026

1027 Squeezing all axes will project directly into a scalar:

1028

1029 >>> df_0a.squeeze()

1030 1

1031 """

1032 axis = range(self._AXIS_LEN) if axis is None else (self._get_axis_number(axis),)

1033 return self.iloc[

1034 tuple(

1035 0 if i in axis and len(a) == 1 else slice(None)

1036 for i, a in enumerate(self.axes)

1037 )

1038 ]

1039

1040 # ----------------------------------------------------------------------

1041 # Rename

1042

1043 def _rename(

1044 self: NDFrameT,

1045 mapper: Renamer | None = None,

1046 *,

1047 index: Renamer | None = None,

1048 columns: Renamer | None = None,

1049 axis: Axis | None = None,

1050 copy: bool_t | None = None,

1051 inplace: bool_t = False,

1052 level: Level | None = None,

1053 errors: str = "ignore",

1054 ) -> NDFrameT | None:

1055 # called by Series.rename and DataFrame.rename

1056

1057 if mapper is None and index is None and columns is None:

1058 raise TypeError("must pass an index to rename")

1059

1060 if index is not None or columns is not None:

1061 if axis is not None:

1062 raise TypeError(

1063 "Cannot specify both 'axis' and any of 'index' or 'columns'"

1064 )

1065 elif mapper is not None:

1066 raise TypeError(

1067 "Cannot specify both 'mapper' and any of 'index' or 'columns'"

1068 )

1069 else:

1070 # use the mapper argument

1071 if axis and self._get_axis_number(axis) == 1:

1072 columns = mapper

1073 else:

1074 index = mapper

1075

1076 self._check_inplace_and_allows_duplicate_labels(inplace)

1077 result = self if inplace else self.copy(deep=copy)

1078

1079 for axis_no, replacements in enumerate((index, columns)):

1080 if replacements is None:

1081 continue

1082

1083 ax = self._get_axis(axis_no)

1084 f = com.get_rename_function(replacements)

1085

1086 if level is not None:

1087 level = ax._get_level_number(level)

1088

1089 # GH 13473

1090 if not callable(replacements):

1091 if ax._is_multi and level is not None:

1092 indexer = ax.get_level_values(level).get_indexer_for(replacements)

1093 else:

1094 indexer = ax.get_indexer_for(replacements)

1095

1096 if errors == "raise" and len(indexer[indexer == -1]):

1097 missing_labels = [

1098 label

1099 for index, label in enumerate(replacements)

1100 if indexer[index] == -1

1101 ]

1102 raise KeyError(f"{missing_labels} not found in axis")

1103

1104 new_index = ax._transform_index(f, level=level)

1105 result._set_axis_nocheck(new_index, axis=axis_no, inplace=True, copy=False)

1106 result._clear_item_cache()

1107

1108 if inplace:

1109 self._update_inplace(result)

1110 return None

1111 else:

1112 return result.__finalize__(self, method="rename")

1113

1114 @overload

1115 def rename_axis(

1116 self: NDFrameT,

1117 mapper: IndexLabel | lib.NoDefault = ...,

1118 *,

1119 inplace: Literal[False] = ...,

1120 **kwargs,

1121 ) -> NDFrameT:

1122 ...

1123

1124 @overload

1125 def rename_axis(

1126 self,

1127 mapper: IndexLabel | lib.NoDefault = ...,

1128 *,

1129 inplace: Literal[True],

1130 **kwargs,

1131 ) -> None:

1132 ...

1133

1134 @overload

1135 def rename_axis(

1136 self: NDFrameT,

1137 mapper: IndexLabel | lib.NoDefault = ...,

1138 *,

1139 inplace: bool_t = ...,

1140 **kwargs,

1141 ) -> NDFrameT | None:

1142 ...

1143

1144 @rewrite_axis_style_signature("mapper", [("copy", True)])

1145 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "mapper"])

1146 def rename_axis(

1147 self: NDFrameT,

1148 mapper: IndexLabel | lib.NoDefault = lib.no_default,

1149 inplace: bool_t = False,

1150 **kwargs,

1151 ) -> NDFrameT | None:

1152 """

1153 Set the name of the axis for the index or columns.

1154

1155 Parameters

1156 ----------

1157 mapper : scalar, list-like, optional

1158 Value to set the axis name attribute.

1159 index, columns : scalar, list-like, dict-like or function, optional

1160 A scalar, list-like, dict-like or functions transformations to

1161 apply to that axis' values.

1162 Note that the ``columns`` parameter is not allowed if the

1163 object is a Series. This parameter only apply for DataFrame

1164 type objects.

1165

1166 Use either ``mapper`` and ``axis`` to

1167 specify the axis to target with ``mapper``, or ``index``

1168 and/or ``columns``.

1169 axis : {0 or 'index', 1 or 'columns'}, default 0

1170 The axis to rename. For `Series` this parameter is unused and defaults to 0.

1171 copy : bool, default True

1172 Also copy underlying data.

1173 inplace : bool, default False

1174 Modifies the object directly, instead of creating a new Series

1175 or DataFrame.

1176

1177 Returns

1178 -------

1179 Series, DataFrame, or None

1180 The same type as the caller or None if ``inplace=True``.

1181

1182 See Also

1183 --------

1184 Series.rename : Alter Series index labels or name.

1185 DataFrame.rename : Alter DataFrame index labels or name.

1186 Index.rename : Set new names on index.

1187

1188 Notes

1189 -----

1190 ``DataFrame.rename_axis`` supports two calling conventions

1191

1192 * ``(index=index_mapper, columns=columns_mapper, ...)``

1193 * ``(mapper, axis={'index', 'columns'}, ...)``

1194

1195 The first calling convention will only modify the names of

1196 the index and/or the names of the Index object that is the columns.

1197 In this case, the parameter ``copy`` is ignored.

1198

1199 The second calling convention will modify the names of the

1200 corresponding index if mapper is a list or a scalar.

1201 However, if mapper is dict-like or a function, it will use the

1202 deprecated behavior of modifying the axis *labels*.

1203

1204 We *highly* recommend using keyword arguments to clarify your

1205 intent.

1206

1207 Examples

1208 --------

1209 **Series**

1210

1211 >>> s = pd.Series(["dog", "cat", "monkey"])

1212 >>> s

1213 0 dog

1214 1 cat

1215 2 monkey

1216 dtype: object

1217 >>> s.rename_axis("animal")

1218 animal

1219 0 dog

1220 1 cat

1221 2 monkey

1222 dtype: object

1223

1224 **DataFrame**

1225

1226 >>> df = pd.DataFrame({"num_legs": [4, 4, 2],

1227 ... "num_arms": [0, 0, 2]},

1228 ... ["dog", "cat", "monkey"])

1229 >>> df

1230 num_legs num_arms

1231 dog 4 0

1232 cat 4 0

1233 monkey 2 2

1234 >>> df = df.rename_axis("animal")

1235 >>> df

1236 num_legs num_arms

1237 animal

1238 dog 4 0

1239 cat 4 0

1240 monkey 2 2

1241 >>> df = df.rename_axis("limbs", axis="columns")

1242 >>> df

1243 limbs num_legs num_arms

1244 animal

1245 dog 4 0

1246 cat 4 0

1247 monkey 2 2

1248

1249 **MultiIndex**

1250

1251 >>> df.index = pd.MultiIndex.from_product([['mammal'],

1252 ... ['dog', 'cat', 'monkey']],

1253 ... names=['type', 'name'])

1254 >>> df

1255 limbs num_legs num_arms

1256 type name

1257 mammal dog 4 0

1258 cat 4 0

1259 monkey 2 2

1260

1261 >>> df.rename_axis(index={'type': 'class'})

1262 limbs num_legs num_arms

1263 class name

1264 mammal dog 4 0

1265 cat 4 0

1266 monkey 2 2

1267

1268 >>> df.rename_axis(columns=str.upper)

1269 LIMBS num_legs num_arms

1270 type name

1271 mammal dog 4 0

1272 cat 4 0

1273 monkey 2 2

1274 """

1275 kwargs["inplace"] = inplace

1276 axes, kwargs = self._construct_axes_from_arguments(

1277 (), kwargs, sentinel=lib.no_default

1278 )

1279 copy = kwargs.pop("copy", True)

1280 inplace = kwargs.pop("inplace", False)

1281 axis = kwargs.pop("axis", 0)

1282 if axis is not None:

1283 axis = self._get_axis_number(axis)

1284

1285 if kwargs:

1286 raise TypeError(

1287 "rename_axis() got an unexpected keyword "

1288 f'argument "{list(kwargs.keys())[0]}"'

1289 )

1290

1291 inplace = validate_bool_kwarg(inplace, "inplace")

1292

1293 if mapper is not lib.no_default:

1294 # Use v0.23 behavior if a scalar or list

1295 non_mapper = is_scalar(mapper) or (

1296 is_list_like(mapper) and not is_dict_like(mapper)

1297 )

1298 if non_mapper:

1299 return self._set_axis_name(mapper, axis=axis, inplace=inplace)

1300 else:

1301 raise ValueError("Use `.rename` to alter labels with a mapper.")

1302 else:

1303 # Use new behavior. Means that index and/or columns

1304 # is specified

1305 result = self if inplace else self.copy(deep=copy)

1306

1307 for axis in range(self._AXIS_LEN):

1308 v = axes.get(self._get_axis_name(axis))

1309 if v is lib.no_default:

1310 continue

1311 non_mapper = is_scalar(v) or (is_list_like(v) and not is_dict_like(v))

1312 if non_mapper:

1313 newnames = v

1314 else:

1315 f = com.get_rename_function(v)

1316 curnames = self._get_axis(axis).names

1317 newnames = [f(name) for name in curnames]

1318 result._set_axis_name(newnames, axis=axis, inplace=True)

1319 if not inplace:

1320 return result

1321 return None

1322

1323 @final

1324 def _set_axis_name(self, name, axis=0, inplace=False):

1325 """

1326 Set the name(s) of the axis.

1327

1328 Parameters

1329 ----------

1330 name : str or list of str

1331 Name(s) to set.

1332 axis : {0 or 'index', 1 or 'columns'}, default 0

1333 The axis to set the label. The value 0 or 'index' specifies index,

1334 and the value 1 or 'columns' specifies columns.

1335 inplace : bool, default False

1336 If `True`, do operation inplace and return None.

1337

1338 Returns

1339 -------

1340 Series, DataFrame, or None

1341 The same type as the caller or `None` if `inplace` is `True`.

1342

1343 See Also

1344 --------

1345 DataFrame.rename : Alter the axis labels of :class:`DataFrame`.

1346 Series.rename : Alter the index labels or set the index name

1347 of :class:`Series`.

1348 Index.rename : Set the name of :class:`Index` or :class:`MultiIndex`.

1349

1350 Examples

1351 --------

1352 >>> df = pd.DataFrame({"num_legs": [4, 4, 2]},

1353 ... ["dog", "cat", "monkey"])

1354 >>> df

1355 num_legs

1356 dog 4

1357 cat 4

1358 monkey 2

1359 >>> df._set_axis_name("animal")

1360 num_legs

1361 animal

1362 dog 4

1363 cat 4

1364 monkey 2

1365 >>> df.index = pd.MultiIndex.from_product(

1366 ... [["mammal"], ['dog', 'cat', 'monkey']])

1367 >>> df._set_axis_name(["type", "name"])

1368 num_legs

1369 type name

1370 mammal dog 4

1371 cat 4

1372 monkey 2

1373 """

1374 axis = self._get_axis_number(axis)

1375 idx = self._get_axis(axis).set_names(name)

1376

1377 inplace = validate_bool_kwarg(inplace, "inplace")

1378 renamed = self if inplace else self.copy()

1379 if axis == 0:

1380 renamed.index = idx

1381 else:

1382 renamed.columns = idx

1383

1384 if not inplace:

1385 return renamed

1386

1387 # ----------------------------------------------------------------------

1388 # Comparison Methods

1389

1390 @final

1391 def _indexed_same(self, other) -> bool_t:

1392 return all(

1393 self._get_axis(a).equals(other._get_axis(a)) for a in self._AXIS_ORDERS

1394 )

1395

1396 @final

1397 def equals(self, other: object) -> bool_t:

1398 """

1399 Test whether two objects contain the same elements.

1400

1401 This function allows two Series or DataFrames to be compared against

1402 each other to see if they have the same shape and elements. NaNs in

1403 the same location are considered equal.

1404

1405 The row/column index do not need to have the same type, as long

1406 as the values are considered equal. Corresponding columns must be of

1407 the same dtype.

1408

1409 Parameters

1410 ----------

1411 other : Series or DataFrame

1412 The other Series or DataFrame to be compared with the first.

1413

1414 Returns

1415 -------

1416 bool

1417 True if all elements are the same in both objects, False

1418 otherwise.

1419

1420 See Also

1421 --------

1422 Series.eq : Compare two Series objects of the same length

1423 and return a Series where each element is True if the element

1424 in each Series is equal, False otherwise.

1425 DataFrame.eq : Compare two DataFrame objects of the same shape and

1426 return a DataFrame where each element is True if the respective

1427 element in each DataFrame is equal, False otherwise.

1428 testing.assert_series_equal : Raises an AssertionError if left and

1429 right are not equal. Provides an easy interface to ignore

1430 inequality in dtypes, indexes and precision among others.

1431 testing.assert_frame_equal : Like assert_series_equal, but targets

1432 DataFrames.

1433 numpy.array_equal : Return True if two arrays have the same shape

1434 and elements, False otherwise.

1435

1436 Examples

1437 --------

1438 >>> df = pd.DataFrame({1: [10], 2: [20]})

1439 >>> df

1440 1 2

1441 0 10 20

1442

1443 DataFrames df and exactly_equal have the same types and values for

1444 their elements and column labels, which will return True.

1445

1446 >>> exactly_equal = pd.DataFrame({1: [10], 2: [20]})

1447 >>> exactly_equal

1448 1 2

1449 0 10 20

1450 >>> df.equals(exactly_equal)

1451 True

1452

1453 DataFrames df and different_column_type have the same element

1454 types and values, but have different types for the column labels,

1455 which will still return True.

1456

1457 >>> different_column_type = pd.DataFrame({1.0: [10], 2.0: [20]})

1458 >>> different_column_type

1459 1.0 2.0

1460 0 10 20

1461 >>> df.equals(different_column_type)

1462 True

1463

1464 DataFrames df and different_data_type have different types for the

1465 same values for their elements, and will return False even though

1466 their column labels are the same values and types.

1467

1468 >>> different_data_type = pd.DataFrame({1: [10.0], 2: [20.0]})

1469 >>> different_data_type

1470 1 2

1471 0 10.0 20.0

1472 >>> df.equals(different_data_type)

1473 False

1474 """

1475 if not (isinstance(other, type(self)) or isinstance(self, type(other))):

1476 return False

1477 other = cast(NDFrame, other)

1478 return self._mgr.equals(other._mgr)

1479

1480 # -------------------------------------------------------------------------

1481 # Unary Methods

1482

1483 @final

1484 def __neg__(self: NDFrameT) -> NDFrameT:

1485 def blk_func(values: ArrayLike):

1486 if is_bool_dtype(values.dtype):

1487 # error: Argument 1 to "inv" has incompatible type "Union

1488 # [ExtensionArray, ndarray[Any, Any]]"; expected

1489 # "_SupportsInversion[ndarray[Any, dtype[bool_]]]"

1490 return operator.inv(values) # type: ignore[arg-type]

1491 else:

1492 # error: Argument 1 to "neg" has incompatible type "Union

1493 # [ExtensionArray, ndarray[Any, Any]]"; expected

1494 # "_SupportsNeg[ndarray[Any, dtype[Any]]]"

1495 return operator.neg(values) # type: ignore[arg-type]

1496

1497 new_data = self._mgr.apply(blk_func)

1498 res = self._constructor(new_data)

1499 return res.__finalize__(self, method="__neg__")

1500

1501 @final

1502 def __pos__(self: NDFrameT) -> NDFrameT:

1503 def blk_func(values: ArrayLike):

1504 if is_bool_dtype(values.dtype):

1505 return values.copy()

1506 else:

1507 # error: Argument 1 to "pos" has incompatible type "Union

1508 # [ExtensionArray, ndarray[Any, Any]]"; expected

1509 # "_SupportsPos[ndarray[Any, dtype[Any]]]"

1510 return operator.pos(values) # type: ignore[arg-type]

1511

1512 new_data = self._mgr.apply(blk_func)

1513 res = self._constructor(new_data)

1514 return res.__finalize__(self, method="__pos__")

1515

1516 @final

1517 def __invert__(self: NDFrameT) -> NDFrameT:

1518 if not self.size:

1519 # inv fails with 0 len

1520 return self

1521

1522 new_data = self._mgr.apply(operator.invert)

1523 return self._constructor(new_data).__finalize__(self, method="__invert__")

1524

1525 @final

1526 def __nonzero__(self) -> NoReturn:

1527 raise ValueError(

1528 f"The truth value of a {type(self).__name__} is ambiguous. "

1529 "Use a.empty, a.bool(), a.item(), a.any() or a.all()."

1530 )

1531

1532 __bool__ = __nonzero__

1533

1534 @final

1535 def bool(self) -> bool_t:

1536 """

1537 Return the bool of a single element Series or DataFrame.

1538

1539 This must be a boolean scalar value, either True or False. It will raise a

1540 ValueError if the Series or DataFrame does not have exactly 1 element, or that

1541 element is not boolean (integer values 0 and 1 will also raise an exception).

1542

1543 Returns

1544 -------

1545 bool

1546 The value in the Series or DataFrame.

1547

1548 See Also

1549 --------

1550 Series.astype : Change the data type of a Series, including to boolean.

1551 DataFrame.astype : Change the data type of a DataFrame, including to boolean.

1552 numpy.bool_ : NumPy boolean data type, used by pandas for boolean values.

1553

1554 Examples

1555 --------

1556 The method will only work for single element objects with a boolean value:

1557

1558 >>> pd.Series([True]).bool()

1559 True

1560 >>> pd.Series([False]).bool()

1561 False

1562

1563 >>> pd.DataFrame({'col': [True]}).bool()

1564 True

1565 >>> pd.DataFrame({'col': [False]}).bool()

1566 False

1567 """

1568 v = self.squeeze()

1569 if isinstance(v, (bool, np.bool_)):

1570 return bool(v)

1571 elif is_scalar(v):

1572 raise ValueError(

1573 "bool cannot act on a non-boolean single element "

1574 f"{type(self).__name__}"

1575 )

1576

1577 self.__nonzero__()

1578 # for mypy (__nonzero__ raises)

1579 return True

1580

1581 @final

1582 def abs(self: NDFrameT) -> NDFrameT:

1583 """

1584 Return a Series/DataFrame with absolute numeric value of each element.

1585

1586 This function only applies to elements that are all numeric.

1587

1588 Returns

1589 -------

1590 abs

1591 Series/DataFrame containing the absolute value of each element.

1592

1593 See Also

1594 --------

1595 numpy.absolute : Calculate the absolute value element-wise.

1596

1597 Notes

1598 -----

1599 For ``complex`` inputs, ``1.2 + 1j``, the absolute value is

1600 :math:`\\sqrt{ a^2 + b^2 }`.

1601

1602 Examples

1603 --------

1604 Absolute numeric values in a Series.

1605

1606 >>> s = pd.Series([-1.10, 2, -3.33, 4])

1607 >>> s.abs()

1608 0 1.10

1609 1 2.00

1610 2 3.33

1611 3 4.00

1612 dtype: float64

1613

1614 Absolute numeric values in a Series with complex numbers.

1615

1616 >>> s = pd.Series([1.2 + 1j])

1617 >>> s.abs()

1618 0 1.56205

1619 dtype: float64

1620

1621 Absolute numeric values in a Series with a Timedelta element.

1622

1623 >>> s = pd.Series([pd.Timedelta('1 days')])

1624 >>> s.abs()

1625 0 1 days

1626 dtype: timedelta64[ns]

1627

1628 Select rows with data closest to certain value using argsort (from

1629 `StackOverflow <https://stackoverflow.com/a/17758115>`__).

1630

1631 >>> df = pd.DataFrame({

1632 ... 'a': [4, 5, 6, 7],

1633 ... 'b': [10, 20, 30, 40],

1634 ... 'c': [100, 50, -30, -50]

1635 ... })

1636 >>> df

1637 a b c

1638 0 4 10 100

1639 1 5 20 50

1640 2 6 30 -30

1641 3 7 40 -50

1642 >>> df.loc[(df.c - 43).abs().argsort()]

1643 a b c

1644 1 5 20 50

1645 0 4 10 100

1646 2 6 30 -30

1647 3 7 40 -50

1648 """

1649 res_mgr = self._mgr.apply(np.abs)

1650 return self._constructor(res_mgr).__finalize__(self, name="abs")

1651

1652 @final

1653 def __abs__(self: NDFrameT) -> NDFrameT:

1654 return self.abs()

1655

1656 @final

1657 def __round__(self: NDFrameT, decimals: int = 0) -> NDFrameT:

1658 return self.round(decimals).__finalize__(self, method="__round__")

1659

1660 # -------------------------------------------------------------------------

1661 # Label or Level Combination Helpers

1662 #

1663 # A collection of helper methods for DataFrame/Series operations that

1664 # accept a combination of column/index labels and levels. All such

1665 # operations should utilize/extend these methods when possible so that we

1666 # have consistent precedence and validation logic throughout the library.

1667

1668 @final

1669 def _is_level_reference(self, key: Level, axis=0) -> bool_t:

1670 """

1671 Test whether a key is a level reference for a given axis.

1672

1673 To be considered a level reference, `key` must be a string that:

1674 - (axis=0): Matches the name of an index level and does NOT match

1675 a column label.

1676 - (axis=1): Matches the name of a column level and does NOT match

1677 an index label.

1678

1679 Parameters

1680 ----------

1681 key : Hashable

1682 Potential level name for the given axis

1683 axis : int, default 0

1684 Axis that levels are associated with (0 for index, 1 for columns)

1685

1686 Returns

1687 -------

1688 is_level : bool

1689 """

1690 axis = self._get_axis_number(axis)

1691

1692 return (

1693 key is not None

1694 and is_hashable(key)

1695 and key in self.axes[axis].names

1696 and not self._is_label_reference(key, axis=axis)

1697 )

1698

1699 @final

1700 def _is_label_reference(self, key: Level, axis=0) -> bool_t:

1701 """

1702 Test whether a key is a label reference for a given axis.

1703

1704 To be considered a label reference, `key` must be a string that:

1705 - (axis=0): Matches a column label

1706 - (axis=1): Matches an index label

1707

1708 Parameters

1709 ----------

1710 key : Hashable

1711 Potential label name, i.e. Index entry.

1712 axis : int, default 0

1713 Axis perpendicular to the axis that labels are associated with

1714 (0 means search for column labels, 1 means search for index labels)

1715

1716 Returns

1717 -------

1718 is_label: bool

1719 """

1720 axis = self._get_axis_number(axis)

1721 other_axes = (ax for ax in range(self._AXIS_LEN) if ax != axis)

1722

1723 return (

1724 key is not None

1725 and is_hashable(key)

1726 and any(key in self.axes[ax] for ax in other_axes)

1727 )

1728

1729 @final

1730 def _is_label_or_level_reference(self, key: Level, axis: int = 0) -> bool_t:

1731 """

1732 Test whether a key is a label or level reference for a given axis.

1733

1734 To be considered either a label or a level reference, `key` must be a

1735 string that:

1736 - (axis=0): Matches a column label or an index level

1737 - (axis=1): Matches an index label or a column level

1738

1739 Parameters

1740 ----------

1741 key : Hashable

1742 Potential label or level name

1743 axis : int, default 0

1744 Axis that levels are associated with (0 for index, 1 for columns)

1745

1746 Returns

1747 -------

1748 bool

1749 """

1750 return self._is_level_reference(key, axis=axis) or self._is_label_reference(

1751 key, axis=axis

1752 )

1753

1754 @final

1755 def _check_label_or_level_ambiguity(self, key: Level, axis: int = 0) -> None:

1756 """

1757 Check whether `key` is ambiguous.

1758

1759 By ambiguous, we mean that it matches both a level of the input

1760 `axis` and a label of the other axis.

1761

1762 Parameters

1763 ----------

1764 key : Hashable

1765 Label or level name.

1766 axis : int, default 0

1767 Axis that levels are associated with (0 for index, 1 for columns).

1768

1769 Raises

1770 ------

1771 ValueError: `key` is ambiguous

1772 """

1773

1774 axis = self._get_axis_number(axis)

1775 other_axes = (ax for ax in range(self._AXIS_LEN) if ax != axis)

1776

1777 if (

1778 key is not None

1779 and is_hashable(key)

1780 and key in self.axes[axis].names

1781 and any(key in self.axes[ax] for ax in other_axes)

1782 ):

1783

1784 # Build an informative and grammatical warning

1785 level_article, level_type = (

1786 ("an", "index") if axis == 0 else ("a", "column")

1787 )

1788

1789 label_article, label_type = (

1790 ("a", "column") if axis == 0 else ("an", "index")

1791 )

1792

1793 msg = (

1794 f"'{key}' is both {level_article} {level_type} level and "

1795 f"{label_article} {label_type} label, which is ambiguous."

1796 )

1797 raise ValueError(msg)

1798

1799 @final

1800 def _get_label_or_level_values(self, key: Level, axis: int = 0) -> ArrayLike:

1801 """

1802 Return a 1-D array of values associated with `key`, a label or level

1803 from the given `axis`.

1804

1805 Retrieval logic:

1806 - (axis=0): Return column values if `key` matches a column label.

1807 Otherwise return index level values if `key` matches an index

1808 level.

1809 - (axis=1): Return row values if `key` matches an index label.

1810 Otherwise return column level values if 'key' matches a column

1811 level

1812

1813 Parameters

1814 ----------

1815 key : Hashable

1816 Label or level name.

1817 axis : int, default 0

1818 Axis that levels are associated with (0 for index, 1 for columns)

1819

1820 Returns

1821 -------

1822 np.ndarray or ExtensionArray

1823

1824 Raises

1825 ------

1826 KeyError

1827 if `key` matches neither a label nor a level

1828 ValueError

1829 if `key` matches multiple labels

1830 FutureWarning

1831 if `key` is ambiguous. This will become an ambiguity error in a

1832 future version

1833 """

1834 axis = self._get_axis_number(axis)

1835 other_axes = [ax for ax in range(self._AXIS_LEN) if ax != axis]

1836

1837 if self._is_label_reference(key, axis=axis):

1838 self._check_label_or_level_ambiguity(key, axis=axis)

1839 values = self.xs(key, axis=other_axes[0])._values

1840 elif self._is_level_reference(key, axis=axis):

1841 # error: Incompatible types in assignment (expression has type "Union[

1842 # ExtensionArray, ndarray[Any, Any]]", variable has type "ndarray[Any,

1843 # Any]")

1844 values = (

1845 self.axes[axis]

1846 .get_level_values(key) # type: ignore[assignment]

1847 ._values

1848 )

1849 else:

1850 raise KeyError(key)

1851

1852 # Check for duplicates

1853 if values.ndim > 1:

1854

1855 if other_axes and isinstance(self._get_axis(other_axes[0]), MultiIndex):

1856 multi_message = (

1857 "\n"

1858 "For a multi-index, the label must be a "

1859 "tuple with elements corresponding to each level."

1860 )

1861 else:

1862 multi_message = ""

1863

1864 label_axis_name = "column" if axis == 0 else "index"

1865 raise ValueError(

1866 f"The {label_axis_name} label '{key}' is not unique.{multi_message}"

1867 )

1868

1869 return values

1870

1871 @final

1872 def _drop_labels_or_levels(self, keys, axis: int = 0):

1873 """

1874 Drop labels and/or levels for the given `axis`.

1875

1876 For each key in `keys`:

1877 - (axis=0): If key matches a column label then drop the column.

1878 Otherwise if key matches an index level then drop the level.

1879 - (axis=1): If key matches an index label then drop the row.

1880 Otherwise if key matches a column level then drop the level.

1881

1882 Parameters

1883 ----------

1884 keys : str or list of str

1885 labels or levels to drop

1886 axis : int, default 0

1887 Axis that levels are associated with (0 for index, 1 for columns)

1888

1889 Returns

1890 -------

1891 dropped: DataFrame

1892

1893 Raises

1894 ------

1895 ValueError

1896 if any `keys` match neither a label nor a level

1897 """

1898 axis = self._get_axis_number(axis)

1899

1900 # Validate keys

1901 keys = com.maybe_make_list(keys)

1902 invalid_keys = [

1903 k for k in keys if not self._is_label_or_level_reference(k, axis=axis)

1904 ]

1905

1906 if invalid_keys:

1907 raise ValueError(

1908 "The following keys are not valid labels or "

1909 f"levels for axis {axis}: {invalid_keys}"

1910 )

1911

1912 # Compute levels and labels to drop

1913 levels_to_drop = [k for k in keys if self._is_level_reference(k, axis=axis)]

1914

1915 labels_to_drop = [k for k in keys if not self._is_level_reference(k, axis=axis)]

1916

1917 # Perform copy upfront and then use inplace operations below.

1918 # This ensures that we always perform exactly one copy.

1919 # ``copy`` and/or ``inplace`` options could be added in the future.

1920 dropped = self.copy()

1921

1922 if axis == 0:

1923 # Handle dropping index levels

1924 if levels_to_drop:

1925 dropped.reset_index(levels_to_drop, drop=True, inplace=True)

1926

1927 # Handle dropping columns labels

1928 if labels_to_drop:

1929 dropped.drop(labels_to_drop, axis=1, inplace=True)

1930 else:

1931 # Handle dropping column levels

1932 if levels_to_drop:

1933 if isinstance(dropped.columns, MultiIndex):

1934 # Drop the specified levels from the MultiIndex

1935 dropped.columns = dropped.columns.droplevel(levels_to_drop)

1936 else:

1937 # Drop the last level of Index by replacing with

1938 # a RangeIndex

1939 dropped.columns = RangeIndex(dropped.columns.size)

1940

1941 # Handle dropping index labels

1942 if labels_to_drop:

1943 dropped.drop(labels_to_drop, axis=0, inplace=True)

1944

1945 return dropped

1946

1947 # ----------------------------------------------------------------------

1948 # Iteration

1949

1950 # https://github.com/python/typeshed/issues/2148#issuecomment-520783318

1951 # Incompatible types in assignment (expression has type "None", base class

1952 # "object" defined the type as "Callable[[object], int]")

1953 __hash__: ClassVar[None] # type: ignore[assignment]

1954

1955 def __iter__(self):

1956 """

1957 Iterate over info axis.

1958

1959 Returns

1960 -------

1961 iterator

1962 Info axis as iterator.

1963 """

1964 return iter(self._info_axis)

1965

1966 # can we get a better explanation of this?

1967 def keys(self) -> Index:

1968 """

1969 Get the 'info axis' (see Indexing for more).

1970

1971 This is index for Series, columns for DataFrame.

1972

1973 Returns

1974 -------

1975 Index

1976 Info axis.

1977 """

1978 return self._info_axis

1979

1980 def items(self):

1981 """

1982 Iterate over (label, values) on info axis

1983

1984 This is index for Series and columns for DataFrame.

1985

1986 Returns

1987 -------

1988 Generator

1989 """

1990 for h in self._info_axis:

1991 yield h, self[h]

1992

1993 def __len__(self) -> int:

1994 """Returns length of info axis"""

1995 return len(self._info_axis)

1996

1997 @final

1998 def __contains__(self, key) -> bool_t:

1999 """True if the key is in the info axis"""

2000 return key in self._info_axis

2001

2002 @property

2003 def empty(self) -> bool_t:

2004 """

2005 Indicator whether Series/DataFrame is empty.

2006

2007 True if Series/DataFrame is entirely empty (no items), meaning any of the

2008 axes are of length 0.

2009

2010 Returns

2011 -------

2012 bool

2013 If Series/DataFrame is empty, return True, if not return False.

2014

2015 See Also

2016 --------

2017 Series.dropna : Return series without null values.

2018 DataFrame.dropna : Return DataFrame with labels on given axis omitted

2019 where (all or any) data are missing.

2020

2021 Notes

2022 -----

2023 If Series/DataFrame contains only NaNs, it is still not considered empty. See

2024 the example below.

2025

2026 Examples

2027 --------

2028 An example of an actual empty DataFrame. Notice the index is empty:

2029

2030 >>> df_empty = pd.DataFrame({'A' : []})

2031 >>> df_empty

2032 Empty DataFrame

2033 Columns: [A]

2034 Index: []

2035 >>> df_empty.empty

2036 True

2037

2038 If we only have NaNs in our DataFrame, it is not considered empty! We

2039 will need to drop the NaNs to make the DataFrame empty:

2040

2041 >>> df = pd.DataFrame({'A' : [np.nan]})

2042 >>> df

2043 A

2044 0 NaN

2045 >>> df.empty

2046 False

2047 >>> df.dropna().empty

2048 True

2049

2050 >>> ser_empty = pd.Series({'A' : []})

2051 >>> ser_empty

2052 A []

2053 dtype: object

2054 >>> ser_empty.empty

2055 False

2056 >>> ser_empty = pd.Series()

2057 >>> ser_empty.empty

2058 True

2059 """

2060 return any(len(self._get_axis(a)) == 0 for a in self._AXIS_ORDERS)

2061

2062 # ----------------------------------------------------------------------

2063 # Array Interface

2064

2065 # This is also set in IndexOpsMixin

2066 # GH#23114 Ensure ndarray.__op__(DataFrame) returns NotImplemented

2067 __array_priority__: int = 1000

2068

2069 def __array__(self, dtype: npt.DTypeLike | None = None) -> np.ndarray:

2070 return np.asarray(self._values, dtype=dtype)

2071

2072 def __array_wrap__(

2073 self,

2074 result: np.ndarray,

2075 context: tuple[Callable, tuple[Any, ...], int] | None = None,

2076 ):

2077 """

2078 Gets called after a ufunc and other functions.

2079

2080 Parameters

2081 ----------

2082 result: np.ndarray

2083 The result of the ufunc or other function called on the NumPy array

2084 returned by __array__

2085 context: tuple of (func, tuple, int)

2086 This parameter is returned by ufuncs as a 3-element tuple: (name of the

2087 ufunc, arguments of the ufunc, domain of the ufunc), but is not set by

2088 other numpy functions.q

2089

2090 Notes

2091 -----

2092 Series implements __array_ufunc_ so this not called for ufunc on Series.

2093 """

2094 # Note: at time of dask 2022.01.0, this is still used by dask

2095 warnings.warn(

2096 "The __array_wrap__ method of DataFrame and Series will be removed in "

2097 "a future version",

2098 DeprecationWarning,

2099 stacklevel=find_stack_level(),

2100 )

2101 res = lib.item_from_zerodim(result)

2102 if is_scalar(res):

2103 # e.g. we get here with np.ptp(series)

2104 # ptp also requires the item_from_zerodim

2105 return res

2106 d = self._construct_axes_dict(self._AXIS_ORDERS, copy=False)

2107 return self._constructor(res, **d).__finalize__(self, method="__array_wrap__")

2108

2109 @final

2110 def __array_ufunc__(

2111 self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any

2112 ):

2113 return arraylike.array_ufunc(self, ufunc, method, *inputs, **kwargs)

2114

2115 # ----------------------------------------------------------------------

2116 # Picklability

2117

2118 @final

2119 def __getstate__(self) -> dict[str, Any]:

2120 meta = {k: getattr(self, k, None) for k in self._metadata}

2121 return {

2122 "_mgr": self._mgr,

2123 "_typ": self._typ,

2124 "_metadata": self._metadata,

2125 "attrs": self.attrs,

2126 "_flags": {k: self.flags[k] for k in self.flags._keys},

2127 **meta,

2128 }

2129

2130 @final

2131 def __setstate__(self, state) -> None:

2132 if isinstance(state, BlockManager):

2133 self._mgr = state

2134 elif isinstance(state, dict):

2135 if "_data" in state and "_mgr" not in state:

2136 # compat for older pickles

2137 state["_mgr"] = state.pop("_data")

2138 typ = state.get("_typ")

2139 if typ is not None:

2140 attrs = state.get("_attrs", {})

2141 object.__setattr__(self, "_attrs", attrs)

2142 flags = state.get("_flags", {"allows_duplicate_labels": True})

2143 object.__setattr__(self, "_flags", Flags(self, **flags))

2144

2145 # set in the order of internal names

2146 # to avoid definitional recursion

2147 # e.g. say fill_value needing _mgr to be

2148 # defined

2149 meta = set(self._internal_names + self._metadata)

2150 for k in list(meta):

2151 if k in state and k != "_flags":

2152 v = state[k]

2153 object.__setattr__(self, k, v)

2154

2155 for k, v in state.items():

2156 if k not in meta:

2157 object.__setattr__(self, k, v)

2158

2159 else:

2160 raise NotImplementedError("Pre-0.12 pickles are no longer supported")

2161 elif len(state) == 2:

2162 raise NotImplementedError("Pre-0.12 pickles are no longer supported")

2163

2164 self._item_cache: dict[Hashable, Series] = {}

2165

2166 # ----------------------------------------------------------------------

2167 # Rendering Methods

2168

2169 def __repr__(self) -> str:

2170 # string representation based upon iterating over self

2171 # (since, by definition, `PandasContainers` are iterable)

2172 prepr = f"[{','.join(map(pprint_thing, self))}]"

2173 return f"{type(self).__name__}({prepr})"

2174

2175 @final

2176 def _repr_latex_(self):

2177 """

2178 Returns a LaTeX representation for a particular object.

2179 Mainly for use with nbconvert (jupyter notebook conversion to pdf).

2180 """

2181 if config.get_option("display.latex.repr"):

2182 return self.to_latex()

2183 else:

2184 return None

2185

2186 @final

2187 def _repr_data_resource_(self):

2188 """

2189 Not a real Jupyter special repr method, but we use the same

2190 naming convention.

2191 """

2192 if config.get_option("display.html.table_schema"):

2193 data = self.head(config.get_option("display.max_rows"))

2194

2195 as_json = data.to_json(orient="table")

2196 as_json = cast(str, as_json)

2197 return json.loads(as_json, object_pairs_hook=collections.OrderedDict)

2198

2199 # ----------------------------------------------------------------------

2200 # I/O Methods

2201

2202 @final

2203 @deprecate_kwarg(old_arg_name="verbose", new_arg_name=None)

2204 @deprecate_kwarg(old_arg_name="encoding", new_arg_name=None)

2205 @doc(

2206 klass="object",

2207 storage_options=_shared_docs["storage_options"],

2208 storage_options_versionadded="1.2.0",

2209 )

2210 def to_excel(

2211 self,

2212 excel_writer,

2213 sheet_name: str = "Sheet1",

2214 na_rep: str = "",

2215 float_format: str | None = None,

2216 columns: Sequence[Hashable] | None = None,

2217 header: Sequence[Hashable] | bool_t = True,

2218 index: bool_t = True,

2219 index_label: IndexLabel = None,

2220 startrow: int = 0,

2221 startcol: int = 0,

2222 engine: str | None = None,

2223 merge_cells: bool_t = True,

2224 encoding: lib.NoDefault = lib.no_default,

2225 inf_rep: str = "inf",

2226 verbose: lib.NoDefault = lib.no_default,

2227 freeze_panes: tuple[int, int] | None = None,

2228 storage_options: StorageOptions = None,

2229 ) -> None:

2230 """

2231 Write {klass} to an Excel sheet.

2232

2233 To write a single {klass} to an Excel .xlsx file it is only necessary to

2234 specify a target file name. To write to multiple sheets it is necessary to

2235 create an `ExcelWriter` object with a target file name, and specify a sheet

2236 in the file to write to.

2237

2238 Multiple sheets may be written to by specifying unique `sheet_name`.

2239 With all data written to the file it is necessary to save the changes.

2240 Note that creating an `ExcelWriter` object with a file name that already

2241 exists will result in the contents of the existing file being erased.

2242

2243 Parameters

2244 ----------

2245 excel_writer : path-like, file-like, or ExcelWriter object

2246 File path or existing ExcelWriter.

2247 sheet_name : str, default 'Sheet1'

2248 Name of sheet which will contain DataFrame.

2249 na_rep : str, default ''

2250 Missing data representation.

2251 float_format : str, optional

2252 Format string for floating point numbers. For example

2253 ``float_format="%.2f"`` will format 0.1234 to 0.12.

2254 columns : sequence or list of str, optional

2255 Columns to write.

2256 header : bool or list of str, default True

2257 Write out the column names. If a list of string is given it is

2258 assumed to be aliases for the column names.

2259 index : bool, default True

2260 Write row names (index).

2261 index_label : str or sequence, optional

2262 Column label for index column(s) if desired. If not specified, and

2263 `header` and `index` are True, then the index names are used. A

2264 sequence should be given if the DataFrame uses MultiIndex.

2265 startrow : int, default 0

2266 Upper left cell row to dump data frame.

2267 startcol : int, default 0

2268 Upper left cell column to dump data frame.

2269 engine : str, optional

2270 Write engine to use, 'openpyxl' or 'xlsxwriter'. You can also set this

2271 via the options ``io.excel.xlsx.writer``, ``io.excel.xls.writer``, and

2272 ``io.excel.xlsm.writer``.

2273

2274 .. deprecated:: 1.2.0

2275

2276 As the `xlwt <https://pypi.org/project/xlwt/>`__ package is no longer

2277 maintained, the ``xlwt`` engine will be removed in a future version

2278 of pandas.

2279

2280 merge_cells : bool, default True

2281 Write MultiIndex and Hierarchical Rows as merged cells.

2282 encoding : str, optional

2283 Encoding of the resulting excel file. Only necessary for xlwt,

2284 other writers support unicode natively.

2285

2286 .. deprecated:: 1.5.0

2287

2288 This keyword was not used.

2289

2290 inf_rep : str, default 'inf'

2291 Representation for infinity (there is no native representation for

2292 infinity in Excel).

2293 verbose : bool, default True

2294 Display more information in the error logs.

2295

2296 .. deprecated:: 1.5.0

2297

2298 This keyword was not used.

2299

2300 freeze_panes : tuple of int (length 2), optional

2301 Specifies the one-based bottommost row and rightmost column that

2302 is to be frozen.

2303 {storage_options}

2304

2305 .. versionadded:: {storage_options_versionadded}

2306

2307 See Also

2308 --------

2309 to_csv : Write DataFrame to a comma-separated values (csv) file.

2310 ExcelWriter : Class for writing DataFrame objects into excel sheets.

2311 read_excel : Read an Excel file into a pandas DataFrame.

2312 read_csv : Read a comma-separated values (csv) file into DataFrame.

2313 io.formats.style.Styler.to_excel : Add styles to Excel sheet.

2314

2315 Notes

2316 -----

2317 For compatibility with :meth:`~DataFrame.to_csv`,

2318 to_excel serializes lists and dicts to strings before writing.

2319

2320 Once a workbook has been saved it is not possible to write further

2321 data without rewriting the whole workbook.

2322

2323 Examples

2324 --------

2325

2326 Create, write to and save a workbook:

2327

2328 >>> df1 = pd.DataFrame([['a', 'b'], ['c', 'd']],

2329 ... index=['row 1', 'row 2'],

2330 ... columns=['col 1', 'col 2'])

2331 >>> df1.to_excel("output.xlsx") # doctest: +SKIP

2332

2333 To specify the sheet name:

2334

2335 >>> df1.to_excel("output.xlsx",

2336 ... sheet_name='Sheet_name_1') # doctest: +SKIP

2337

2338 If you wish to write to more than one sheet in the workbook, it is

2339 necessary to specify an ExcelWriter object:

2340

2341 >>> df2 = df1.copy()

2342 >>> with pd.ExcelWriter('output.xlsx') as writer: # doctest: +SKIP

2343 ... df1.to_excel(writer, sheet_name='Sheet_name_1')

2344 ... df2.to_excel(writer, sheet_name='Sheet_name_2')

2345

2346 ExcelWriter can also be used to append to an existing Excel file:

2347

2348 >>> with pd.ExcelWriter('output.xlsx',

2349 ... mode='a') as writer: # doctest: +SKIP

2350 ... df.to_excel(writer, sheet_name='Sheet_name_3')

2351

2352 To set the library that is used to write the Excel file,

2353 you can pass the `engine` keyword (the default engine is

2354 automatically chosen depending on the file extension):

2355

2356 >>> df1.to_excel('output1.xlsx', engine='xlsxwriter') # doctest: +SKIP

2357 """

2358

2359 df = self if isinstance(self, ABCDataFrame) else self.to_frame()

2360

2361 from pandas.io.formats.excel import ExcelFormatter

2362

2363 formatter = ExcelFormatter(

2364 df,

2365 na_rep=na_rep,

2366 cols=columns,

2367 header=header,

2368 float_format=float_format,

2369 index=index,

2370 index_label=index_label,

2371 merge_cells=merge_cells,

2372 inf_rep=inf_rep,

2373 )

2374 formatter.write(

2375 excel_writer,

2376 sheet_name=sheet_name,

2377 startrow=startrow,

2378 startcol=startcol,

2379 freeze_panes=freeze_panes,

2380 engine=engine,

2381 storage_options=storage_options,

2382 )

2383

2384 @final

2385 @doc(

2386 storage_options=_shared_docs["storage_options"],

2387 compression_options=_shared_docs["compression_options"] % "path_or_buf",

2388 )

2389 def to_json(

2390 self,

2391 path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None,

2392 orient: str | None = None,

2393 date_format: str | None = None,

2394 double_precision: int = 10,

2395 force_ascii: bool_t = True,

2396 date_unit: str = "ms",

2397 default_handler: Callable[[Any], JSONSerializable] | None = None,

2398 lines: bool_t = False,

2399 compression: CompressionOptions = "infer",

2400 index: bool_t = True,

2401 indent: int | None = None,

2402 storage_options: StorageOptions = None,

2403 ) -> str | None:

2404 """

2405 Convert the object to a JSON string.

2406

2407 Note NaN's and None will be converted to null and datetime objects

2408 will be converted to UNIX timestamps.

2409

2410 Parameters

2411 ----------

2412 path_or_buf : str, path object, file-like object, or None, default None

2413 String, path object (implementing os.PathLike[str]), or file-like

2414 object implementing a write() function. If None, the result is

2415 returned as a string.

2416 orient : str

2417 Indication of expected JSON string format.

2418

2419 * Series:

2420

2421 - default is 'index'

2422 - allowed values are: {{'split', 'records', 'index', 'table'}}.

2423

2424 * DataFrame:

2425

2426 - default is 'columns'

2427 - allowed values are: {{'split', 'records', 'index', 'columns',

2428 'values', 'table'}}.

2429

2430 * The format of the JSON string:

2431

2432 - 'split' : dict like {{'index' -> [index], 'columns' -> [columns],

2433 'data' -> [values]}}

2434 - 'records' : list like [{{column -> value}}, ... , {{column -> value}}]

2435 - 'index' : dict like {{index -> {{column -> value}}}}

2436 - 'columns' : dict like {{column -> {{index -> value}}}}

2437 - 'values' : just the values array

2438 - 'table' : dict like {{'schema': {{schema}}, 'data': {{data}}}}

2439

2440 Describing the data, where data component is like ``orient='records'``.

2441

2442 date_format : {{None, 'epoch', 'iso'}}

2443 Type of date conversion. 'epoch' = epoch milliseconds,

2444 'iso' = ISO8601. The default depends on the `orient`. For

2445 ``orient='table'``, the default is 'iso'. For all other orients,

2446 the default is 'epoch'.

2447 double_precision : int, default 10

2448 The number of decimal places to use when encoding

2449 floating point values.

2450 force_ascii : bool, default True

2451 Force encoded string to be ASCII.

2452 date_unit : str, default 'ms' (milliseconds)

2453 The time unit to encode to, governs timestamp and ISO8601

2454 precision. One of 's', 'ms', 'us', 'ns' for second, millisecond,

2455 microsecond, and nanosecond respectively.

2456 default_handler : callable, default None

2457 Handler to call if object cannot otherwise be converted to a

2458 suitable format for JSON. Should receive a single argument which is

2459 the object to convert and return a serialisable object.

2460 lines : bool, default False

2461 If 'orient' is 'records' write out line-delimited json format. Will

2462 throw ValueError if incorrect 'orient' since others are not

2463 list-like.

2464 {compression_options}

2465

2466 .. versionchanged:: 1.4.0 Zstandard support.

2467

2468 index : bool, default True

2469 Whether to include the index values in the JSON string. Not

2470 including the index (``index=False``) is only supported when

2471 orient is 'split' or 'table'.

2472 indent : int, optional

2473 Length of whitespace used to indent each record.

2474

2475 .. versionadded:: 1.0.0

2476

2477 {storage_options}

2478

2479 .. versionadded:: 1.2.0

2480

2481 Returns

2482 -------

2483 None or str

2484 If path_or_buf is None, returns the resulting json format as a

2485 string. Otherwise returns None.

2486

2487 See Also

2488 --------

2489 read_json : Convert a JSON string to pandas object.

2490

2491 Notes

2492 -----

2493 The behavior of ``indent=0`` varies from the stdlib, which does not

2494 indent the output but does insert newlines. Currently, ``indent=0``

2495 and the default ``indent=None`` are equivalent in pandas, though this

2496 may change in a future release.

2497

2498 ``orient='table'`` contains a 'pandas_version' field under 'schema'.

2499 This stores the version of `pandas` used in the latest revision of the

2500 schema.

2501

2502 Examples

2503 --------

2504 >>> import json

2505 >>> df = pd.DataFrame(

2506 ... [["a", "b"], ["c", "d"]],

2507 ... index=["row 1", "row 2"],

2508 ... columns=["col 1", "col 2"],

2509 ... )

2510

2511 >>> result = df.to_json(orient="split")

2512 >>> parsed = json.loads(result)

2513 >>> json.dumps(parsed, indent=4) # doctest: +SKIP

2514 {{

2515 "columns": [

2516 "col 1",

2517 "col 2"

2518 ],

2519 "index": [

2520 "row 1",

2521 "row 2"

2522 ],

2523 "data": [

2524 [

2525 "a",

2526 "b"

2527 ],

2528 [

2529 "c",

2530 "d"

2531 ]

2532 ]

2533 }}

2534

2535 Encoding/decoding a Dataframe using ``'records'`` formatted JSON.

2536 Note that index labels are not preserved with this encoding.

2537

2538 >>> result = df.to_json(orient="records")

2539 >>> parsed = json.loads(result)

2540 >>> json.dumps(parsed, indent=4) # doctest: +SKIP

2541 [

2542 {{

2543 "col 1": "a",

2544 "col 2": "b"

2545 }},

2546 {{

2547 "col 1": "c",

2548 "col 2": "d"

2549 }}

2550 ]

2551

2552 Encoding/decoding a Dataframe using ``'index'`` formatted JSON:

2553

2554 >>> result = df.to_json(orient="index")

2555 >>> parsed = json.loads(result)

2556 >>> json.dumps(parsed, indent=4) # doctest: +SKIP

2557 {{

2558 "row 1": {{

2559 "col 1": "a",

2560 "col 2": "b"

2561 }},

2562 "row 2": {{

2563 "col 1": "c",

2564 "col 2": "d"

2565 }}

2566 }}

2567

2568 Encoding/decoding a Dataframe using ``'columns'`` formatted JSON:

2569

2570 >>> result = df.to_json(orient="columns")

2571 >>> parsed = json.loads(result)

2572 >>> json.dumps(parsed, indent=4) # doctest: +SKIP

2573 {{

2574 "col 1": {{

2575 "row 1": "a",

2576 "row 2": "c"

2577 }},

2578 "col 2": {{

2579 "row 1": "b",

2580 "row 2": "d"

2581 }}

2582 }}

2583

2584 Encoding/decoding a Dataframe using ``'values'`` formatted JSON:

2585

2586 >>> result = df.to_json(orient="values")

2587 >>> parsed = json.loads(result)

2588 >>> json.dumps(parsed, indent=4) # doctest: +SKIP

2589 [

2590 [

2591 "a",

2592 "b"

2593 ],

2594 [

2595 "c",

2596 "d"

2597 ]

2598 ]

2599

2600 Encoding with Table Schema:

2601

2602 >>> result = df.to_json(orient="table")

2603 >>> parsed = json.loads(result)

2604 >>> json.dumps(parsed, indent=4) # doctest: +SKIP

2605 {{

2606 "schema": {{

2607 "fields": [

2608 {{

2609 "name": "index",

2610 "type": "string"

2611 }},

2612 {{

2613 "name": "col 1",

2614 "type": "string"

2615 }},

2616 {{

2617 "name": "col 2",

2618 "type": "string"

2619 }}

2620 ],

2621 "primaryKey": [

2622 "index"

2623 ],

2624 "pandas_version": "1.4.0"

2625 }},

2626 "data": [

2627 {{

2628 "index": "row 1",

2629 "col 1": "a",

2630 "col 2": "b"

2631 }},

2632 {{

2633 "index": "row 2",

2634 "col 1": "c",

2635 "col 2": "d"

2636 }}

2637 ]

2638 }}

2639 """

2640 from pandas.io import json

2641

2642 if date_format is None and orient == "table":

2643 date_format = "iso"

2644 elif date_format is None:

2645 date_format = "epoch"

2646

2647 config.is_nonnegative_int(indent)

2648 indent = indent or 0

2649

2650 return json.to_json(

2651 path_or_buf=path_or_buf,

2652 obj=self,

2653 orient=orient,

2654 date_format=date_format,

2655 double_precision=double_precision,

2656 force_ascii=force_ascii,

2657 date_unit=date_unit,

2658 default_handler=default_handler,

2659 lines=lines,

2660 compression=compression,

2661 index=index,

2662 indent=indent,

2663 storage_options=storage_options,

2664 )

2665

2666 @final

2667 def to_hdf(

2668 self,

2669 path_or_buf: FilePath | HDFStore,

2670 key: str,

2671 mode: str = "a",

2672 complevel: int | None = None,

2673 complib: str | None = None,

2674 append: bool_t = False,

2675 format: str | None = None,

2676 index: bool_t = True,

2677 min_itemsize: int | dict[str, int] | None = None,

2678 nan_rep=None,

2679 dropna: bool_t | None = None,

2680 data_columns: Literal[True] | list[str] | None = None,

2681 errors: str = "strict",

2682 encoding: str = "UTF-8",

2683 ) -> None:

2684 """

2685 Write the contained data to an HDF5 file using HDFStore.

2686

2687 Hierarchical Data Format (HDF) is self-describing, allowing an

2688 application to interpret the structure and contents of a file with

2689 no outside information. One HDF file can hold a mix of related objects

2690 which can be accessed as a group or as individual objects.

2691

2692 In order to add another DataFrame or Series to an existing HDF file

2693 please use append mode and a different a key.

2694

2695 .. warning::

2696

2697 One can store a subclass of ``DataFrame`` or ``Series`` to HDF5,

2698 but the type of the subclass is lost upon storing.

2699

2700 For more information see the :ref:`user guide <io.hdf5>`.

2701

2702 Parameters

2703 ----------

2704 path_or_buf : str or pandas.HDFStore

2705 File path or HDFStore object.

2706 key : str

2707 Identifier for the group in the store.

2708 mode : {'a', 'w', 'r+'}, default 'a'

2709 Mode to open file:

2710

2711 - 'w': write, a new file is created (an existing file with

2712 the same name would be deleted).

2713 - 'a': append, an existing file is opened for reading and

2714 writing, and if the file does not exist it is created.

2715 - 'r+': similar to 'a', but the file must already exist.

2716 complevel : {0-9}, default None

2717 Specifies a compression level for data.

2718 A value of 0 or None disables compression.

2719 complib : {'zlib', 'lzo', 'bzip2', 'blosc'}, default 'zlib'

2720 Specifies the compression library to be used.

2721 As of v0.20.2 these additional compressors for Blosc are supported

2722 (default if no compressor specified: 'blosc:blosclz'):

2723 {'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy',

2724 'blosc:zlib', 'blosc:zstd'}.

2725 Specifying a compression library which is not available issues

2726 a ValueError.

2727 append : bool, default False

2728 For Table formats, append the input data to the existing.

2729 format : {'fixed', 'table', None}, default 'fixed'

2730 Possible values:

2731

2732 - 'fixed': Fixed format. Fast writing/reading. Not-appendable,

2733 nor searchable.

2734 - 'table': Table format. Write as a PyTables Table structure

2735 which may perform worse but allow more flexible operations

2736 like searching / selecting subsets of the data.

2737 - If None, pd.get_option('io.hdf.default_format') is checked,

2738 followed by fallback to "fixed".

2739 index : bool, default True

2740 Write DataFrame index as a column.

2741 min_itemsize : dict or int, optional

2742 Map column names to minimum string sizes for columns.

2743 nan_rep : Any, optional

2744 How to represent null values as str.

2745 Not allowed with append=True.

2746 dropna : bool, default False, optional

2747 Remove missing values.

2748 data_columns : list of columns or True, optional

2749 List of columns to create as indexed data columns for on-disk

2750 queries, or True to use all columns. By default only the axes

2751 of the object are indexed. See

2752 :ref:`Query via data columns<io.hdf5-query-data-columns>`. for

2753 more information.

2754 Applicable only to format='table'.

2755 errors : str, default 'strict'

2756 Specifies how encoding and decoding errors are to be handled.

2757 See the errors argument for :func:`open` for a full list

2758 of options.

2759 encoding : str, default "UTF-8"

2760

2761 See Also

2762 --------

2763 read_hdf : Read from HDF file.

2764 DataFrame.to_orc : Write a DataFrame to the binary orc format.

2765 DataFrame.to_parquet : Write a DataFrame to the binary parquet format.

2766 DataFrame.to_sql : Write to a SQL table.

2767 DataFrame.to_feather : Write out feather-format for DataFrames.

2768 DataFrame.to_csv : Write out to a csv file.

2769

2770 Examples

2771 --------

2772 >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]},

2773 ... index=['a', 'b', 'c']) # doctest: +SKIP

2774 >>> df.to_hdf('data.h5', key='df', mode='w') # doctest: +SKIP

2775

2776 We can add another object to the same file:

2777

2778 >>> s = pd.Series([1, 2, 3, 4]) # doctest: +SKIP

2779 >>> s.to_hdf('data.h5', key='s') # doctest: +SKIP

2780

2781 Reading from HDF file:

2782

2783 >>> pd.read_hdf('data.h5', 'df') # doctest: +SKIP

2784 A B

2785 a 1 4

2786 b 2 5

2787 c 3 6

2788 >>> pd.read_hdf('data.h5', 's') # doctest: +SKIP

2789 0 1

2790 1 2

2791 2 3

2792 3 4

2793 dtype: int64

2794 """

2795 from pandas.io import pytables

2796

2797 # Argument 3 to "to_hdf" has incompatible type "NDFrame"; expected

2798 # "Union[DataFrame, Series]" [arg-type]

2799 pytables.to_hdf(

2800 path_or_buf,

2801 key,

2802 self, # type: ignore[arg-type]

2803 mode=mode,

2804 complevel=complevel,

2805 complib=complib,

2806 append=append,

2807 format=format,

2808 index=index,

2809 min_itemsize=min_itemsize,

2810 nan_rep=nan_rep,

2811 dropna=dropna,

2812 data_columns=data_columns,

2813 errors=errors,

2814 encoding=encoding,

2815 )

2816

2817 @final

2818 def to_sql(

2819 self,

2820 name: str,

2821 con,

2822 schema: str | None = None,

2823 if_exists: str = "fail",

2824 index: bool_t = True,

2825 index_label: IndexLabel = None,

2826 chunksize: int | None = None,

2827 dtype: DtypeArg | None = None,

2828 method: str | None = None,

2829 ) -> int | None:

2830 """

2831 Write records stored in a DataFrame to a SQL database.

2832

2833 Databases supported by SQLAlchemy [1]_ are supported. Tables can be

2834 newly created, appended to, or overwritten.

2835

2836 Parameters

2837 ----------

2838 name : str

2839 Name of SQL table.

2840 con : sqlalchemy.engine.(Engine or Connection) or sqlite3.Connection

2841 Using SQLAlchemy makes it possible to use any DB supported by that

2842 library. Legacy support is provided for sqlite3.Connection objects. The user

2843 is responsible for engine disposal and connection closure for the SQLAlchemy

2844 connectable See `here \

2845 <https://docs.sqlalchemy.org/en/13/core/connections.html>`_.

2846

2847 schema : str, optional

2848 Specify the schema (if database flavor supports this). If None, use

2849 default schema.

2850 if_exists : {'fail', 'replace', 'append'}, default 'fail'

2851 How to behave if the table already exists.

2852

2853 * fail: Raise a ValueError.

2854 * replace: Drop the table before inserting new values.

2855 * append: Insert new values to the existing table.

2856

2857 index : bool, default True

2858 Write DataFrame index as a column. Uses `index_label` as the column

2859 name in the table.

2860 index_label : str or sequence, default None

2861 Column label for index column(s). If None is given (default) and

2862 `index` is True, then the index names are used.

2863 A sequence should be given if the DataFrame uses MultiIndex.

2864 chunksize : int, optional

2865 Specify the number of rows in each batch to be written at a time.

2866 By default, all rows will be written at once.

2867 dtype : dict or scalar, optional

2868 Specifying the datatype for columns. If a dictionary is used, the

2869 keys should be the column names and the values should be the

2870 SQLAlchemy types or strings for the sqlite3 legacy mode. If a

2871 scalar is provided, it will be applied to all columns.

2872 method : {None, 'multi', callable}, optional

2873 Controls the SQL insertion clause used:

2874

2875 * None : Uses standard SQL ``INSERT`` clause (one per row).

2876 * 'multi': Pass multiple values in a single ``INSERT`` clause.

2877 * callable with signature ``(pd_table, conn, keys, data_iter)``.

2878

2879 Details and a sample callable implementation can be found in the

2880 section :ref:`insert method <io.sql.method>`.

2881

2882 Returns

2883 -------

2884 None or int

2885 Number of rows affected by to_sql. None is returned if the callable

2886 passed into ``method`` does not return an integer number of rows.

2887

2888 The number of returned rows affected is the sum of the ``rowcount``

2889 attribute of ``sqlite3.Cursor`` or SQLAlchemy connectable which may not

2890 reflect the exact number of written rows as stipulated in the

2891 `sqlite3 <https://docs.python.org/3/library/sqlite3.html#sqlite3.Cursor.rowcount>`__ or

2892 `SQLAlchemy <https://docs.sqlalchemy.org/en/14/core/connections.html#sqlalchemy.engine.BaseCursorResult.rowcount>`__.

2893

2894 .. versionadded:: 1.4.0

2895

2896 Raises

2897 ------

2898 ValueError

2899 When the table already exists and `if_exists` is 'fail' (the

2900 default).

2901

2902 See Also

2903 --------

2904 read_sql : Read a DataFrame from a table.

2905

2906 Notes

2907 -----

2908 Timezone aware datetime columns will be written as

2909 ``Timestamp with timezone`` type with SQLAlchemy if supported by the

2910 database. Otherwise, the datetimes will be stored as timezone unaware

2911 timestamps local to the original timezone.

2912

2913 References

2914 ----------

2915 .. [1] https://docs.sqlalchemy.org

2916 .. [2] https://www.python.org/dev/peps/pep-0249/

2917

2918 Examples

2919 --------

2920 Create an in-memory SQLite database.

2921

2922 >>> from sqlalchemy import create_engine

2923 >>> engine = create_engine('sqlite://', echo=False)

2924

2925 Create a table from scratch with 3 rows.

2926

2927 >>> df = pd.DataFrame({'name' : ['User 1', 'User 2', 'User 3']})

2928 >>> df

2929 name

2930 0 User 1

2931 1 User 2

2932 2 User 3

2933

2934 >>> df.to_sql('users', con=engine)

2935 3

2936 >>> engine.execute("SELECT * FROM users").fetchall()

2937 [(0, 'User 1'), (1, 'User 2'), (2, 'User 3')]

2938

2939 An `sqlalchemy.engine.Connection` can also be passed to `con`:

2940

2941 >>> with engine.begin() as connection:

2942 ... df1 = pd.DataFrame({'name' : ['User 4', 'User 5']})

2943 ... df1.to_sql('users', con=connection, if_exists='append')

2944 2

2945

2946 This is allowed to support operations that require that the same

2947 DBAPI connection is used for the entire operation.

2948

2949 >>> df2 = pd.DataFrame({'name' : ['User 6', 'User 7']})

2950 >>> df2.to_sql('users', con=engine, if_exists='append')

2951 2

2952 >>> engine.execute("SELECT * FROM users").fetchall()

2953 [(0, 'User 1'), (1, 'User 2'), (2, 'User 3'),

2954 (0, 'User 4'), (1, 'User 5'), (0, 'User 6'),

2955 (1, 'User 7')]

2956

2957 Overwrite the table with just ``df2``.

2958

2959 >>> df2.to_sql('users', con=engine, if_exists='replace',

2960 ... index_label='id')

2961 2

2962 >>> engine.execute("SELECT * FROM users").fetchall()

2963 [(0, 'User 6'), (1, 'User 7')]

2964

2965 Specify the dtype (especially useful for integers with missing values).

2966 Notice that while pandas is forced to store the data as floating point,

2967 the database supports nullable integers. When fetching the data with

2968 Python, we get back integer scalars.

2969

2970 >>> df = pd.DataFrame({"A": [1, None, 2]})

2971 >>> df

2972 A

2973 0 1.0

2974 1 NaN

2975 2 2.0

2976

2977 >>> from sqlalchemy.types import Integer

2978 >>> df.to_sql('integers', con=engine, index=False,

2979 ... dtype={"A": Integer()})

2980 3

2981

2982 >>> engine.execute("SELECT * FROM integers").fetchall()

2983 [(1,), (None,), (2,)]

2984 """ # noqa:E501

2985 from pandas.io import sql

2986

2987 return sql.to_sql(

2988 self,

2989 name,

2990 con,

2991 schema=schema,

2992 if_exists=if_exists,

2993 index=index,

2994 index_label=index_label,

2995 chunksize=chunksize,

2996 dtype=dtype,

2997 method=method,

2998 )

2999

3000 @final

3001 @doc(

3002 storage_options=_shared_docs["storage_options"],

3003 compression_options=_shared_docs["compression_options"] % "path",

3004 )

3005 def to_pickle(

3006 self,

3007 path: FilePath | WriteBuffer[bytes],

3008 compression: CompressionOptions = "infer",

3009 protocol: int = pickle.HIGHEST_PROTOCOL,

3010 storage_options: StorageOptions = None,

3011 ) -> None:

3012 """

3013 Pickle (serialize) object to file.

3014

3015 Parameters

3016 ----------

3017 path : str, path object, or file-like object

3018 String, path object (implementing ``os.PathLike[str]``), or file-like

3019 object implementing a binary ``write()`` function. File path where

3020 the pickled object will be stored.

3021 {compression_options}

3022 protocol : int

3023 Int which indicates which protocol should be used by the pickler,

3024 default HIGHEST_PROTOCOL (see [1]_ paragraph 12.1.2). The possible

3025 values are 0, 1, 2, 3, 4, 5. A negative value for the protocol

3026 parameter is equivalent to setting its value to HIGHEST_PROTOCOL.

3027

3028 .. [1] https://docs.python.org/3/library/pickle.html.

3029

3030 {storage_options}

3031

3032 .. versionadded:: 1.2.0

3033

3034 See Also

3035 --------

3036 read_pickle : Load pickled pandas object (or any object) from file.

3037 DataFrame.to_hdf : Write DataFrame to an HDF5 file.

3038 DataFrame.to_sql : Write DataFrame to a SQL database.

3039 DataFrame.to_parquet : Write a DataFrame to the binary parquet format.

3040

3041 Examples

3042 --------

3043 >>> original_df = pd.DataFrame({{"foo": range(5), "bar": range(5, 10)}}) # doctest: +SKIP

3044 >>> original_df # doctest: +SKIP

3045 foo bar

3046 0 0 5

3047 1 1 6

3048 2 2 7

3049 3 3 8

3050 4 4 9

3051 >>> original_df.to_pickle("./dummy.pkl") # doctest: +SKIP

3052

3053 >>> unpickled_df = pd.read_pickle("./dummy.pkl") # doctest: +SKIP

3054 >>> unpickled_df # doctest: +SKIP

3055 foo bar

3056 0 0 5

3057 1 1 6

3058 2 2 7

3059 3 3 8

3060 4 4 9

3061 """ # noqa: E501

3062 from pandas.io.pickle import to_pickle

3063

3064 to_pickle(

3065 self,

3066 path,

3067 compression=compression,

3068 protocol=protocol,

3069 storage_options=storage_options,

3070 )

3071

3072 @final

3073 def to_clipboard(

3074 self, excel: bool_t = True, sep: str | None = None, **kwargs

3075 ) -> None:

3076 r"""

3077 Copy object to the system clipboard.

3078

3079 Write a text representation of object to the system clipboard.

3080 This can be pasted into Excel, for example.

3081

3082 Parameters

3083 ----------

3084 excel : bool, default True

3085 Produce output in a csv format for easy pasting into excel.

3086

3087 - True, use the provided separator for csv pasting.

3088 - False, write a string representation of the object to the clipboard.

3089

3090 sep : str, default ``'\t'``

3091 Field delimiter.

3092 **kwargs

3093 These parameters will be passed to DataFrame.to_csv.

3094

3095 See Also

3096 --------

3097 DataFrame.to_csv : Write a DataFrame to a comma-separated values

3098 (csv) file.

3099 read_clipboard : Read text from clipboard and pass to read_csv.

3100

3101 Notes

3102 -----

3103 Requirements for your platform.

3104

3105 - Linux : `xclip`, or `xsel` (with `PyQt4` modules)

3106 - Windows : none

3107 - macOS : none

3108

3109 This method uses the processes developed for the package `pyperclip`. A

3110 solution to render any output string format is given in the examples.

3111

3112 Examples

3113 --------

3114 Copy the contents of a DataFrame to the clipboard.

3115

3116 >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['A', 'B', 'C'])

3117

3118 >>> df.to_clipboard(sep=',') # doctest: +SKIP

3119 ... # Wrote the following to the system clipboard:

3120 ... # ,A,B,C

3121 ... # 0,1,2,3

3122 ... # 1,4,5,6

3123

3124 We can omit the index by passing the keyword `index` and setting

3125 it to false.

3126

3127 >>> df.to_clipboard(sep=',', index=False) # doctest: +SKIP

3128 ... # Wrote the following to the system clipboard:

3129 ... # A,B,C

3130 ... # 1,2,3

3131 ... # 4,5,6

3132

3133 Using the original `pyperclip` package for any string output format.

3134

3135 .. code-block:: python

3136

3137 import pyperclip

3138 html = df.style.to_html()

3139 pyperclip.copy(html)

3140 """

3141 from pandas.io import clipboards

3142

3143 clipboards.to_clipboard(self, excel=excel, sep=sep, **kwargs)

3144

3145 @final

3146 def to_xarray(self):

3147 """

3148 Return an xarray object from the pandas object.

3149

3150 Returns

3151 -------

3152 xarray.DataArray or xarray.Dataset

3153 Data in the pandas structure converted to Dataset if the object is

3154 a DataFrame, or a DataArray if the object is a Series.

3155

3156 See Also

3157 --------

3158 DataFrame.to_hdf : Write DataFrame to an HDF5 file.

3159 DataFrame.to_parquet : Write a DataFrame to the binary parquet format.

3160

3161 Notes

3162 -----

3163 See the `xarray docs <https://xarray.pydata.org/en/stable/>`__

3164

3165 Examples

3166 --------

3167 >>> df = pd.DataFrame([('falcon', 'bird', 389.0, 2),

3168 ... ('parrot', 'bird', 24.0, 2),

3169 ... ('lion', 'mammal', 80.5, 4),

3170 ... ('monkey', 'mammal', np.nan, 4)],

3171 ... columns=['name', 'class', 'max_speed',

3172 ... 'num_legs'])

3173 >>> df

3174 name class max_speed num_legs

3175 0 falcon bird 389.0 2

3176 1 parrot bird 24.0 2

3177 2 lion mammal 80.5 4

3178 3 monkey mammal NaN 4

3179

3180 >>> df.to_xarray()

3181 <xarray.Dataset>

3182 Dimensions: (index: 4)

3183 Coordinates:

3184 * index (index) int64 0 1 2 3

3185 Data variables:

3186 name (index) object 'falcon' 'parrot' 'lion' 'monkey'

3187 class (index) object 'bird' 'bird' 'mammal' 'mammal'

3188 max_speed (index) float64 389.0 24.0 80.5 nan

3189 num_legs (index) int64 2 2 4 4

3190

3191 >>> df['max_speed'].to_xarray()

3192 <xarray.DataArray 'max_speed' (index: 4)>

3193 array([389. , 24. , 80.5, nan])

3194 Coordinates:

3195 * index (index) int64 0 1 2 3

3196

3197 >>> dates = pd.to_datetime(['2018-01-01', '2018-01-01',

3198 ... '2018-01-02', '2018-01-02'])

3199 >>> df_multiindex = pd.DataFrame({'date': dates,

3200 ... 'animal': ['falcon', 'parrot',

3201 ... 'falcon', 'parrot'],

3202 ... 'speed': [350, 18, 361, 15]})

3203 >>> df_multiindex = df_multiindex.set_index(['date', 'animal'])

3204

3205 >>> df_multiindex

3206 speed

3207 date animal

3208 2018-01-01 falcon 350

3209 parrot 18

3210 2018-01-02 falcon 361

3211 parrot 15

3212

3213 >>> df_multiindex.to_xarray()

3214 <xarray.Dataset>

3215 Dimensions: (date: 2, animal: 2)

3216 Coordinates:

3217 * date (date) datetime64[ns] 2018-01-01 2018-01-02

3218 * animal (animal) object 'falcon' 'parrot'

3219 Data variables:

3220 speed (date, animal) int64 350 18 361 15

3221 """

3222 xarray = import_optional_dependency("xarray")

3223

3224 if self.ndim == 1:

3225 return xarray.DataArray.from_series(self)

3226 else:

3227 return xarray.Dataset.from_dataframe(self)

3228

3229 @overload

3230 def to_latex(

3231 self,

3232 buf: None = ...,

3233 columns: Sequence[Hashable] | None = ...,

3234 col_space: ColspaceArgType | None = ...,

3235 header: bool_t | Sequence[str] = ...,

3236 index: bool_t = ...,

3237 na_rep: str = ...,

3238 formatters: FormattersType | None = ...,

3239 float_format: FloatFormatType | None = ...,

3240 sparsify: bool_t | None = ...,

3241 index_names: bool_t = ...,

3242 bold_rows: bool_t = ...,

3243 column_format: str | None = ...,

3244 longtable: bool_t | None = ...,

3245 escape: bool_t | None = ...,

3246 encoding: str | None = ...,

3247 decimal: str = ...,

3248 multicolumn: bool_t | None = ...,

3249 multicolumn_format: str | None = ...,

3250 multirow: bool_t | None = ...,

3251 caption: str | tuple[str, str] | None = ...,

3252 label: str | None = ...,

3253 position: str | None = ...,

3254 ) -> str:

3255 ...

3256

3257 @overload

3258 def to_latex(

3259 self,

3260 buf: FilePath | WriteBuffer[str],

3261 columns: Sequence[Hashable] | None = ...,

3262 col_space: ColspaceArgType | None = ...,

3263 header: bool_t | Sequence[str] = ...,

3264 index: bool_t = ...,

3265 na_rep: str = ...,

3266 formatters: FormattersType | None = ...,

3267 float_format: FloatFormatType | None = ...,

3268 sparsify: bool_t | None = ...,

3269 index_names: bool_t = ...,

3270 bold_rows: bool_t = ...,

3271 column_format: str | None = ...,

3272 longtable: bool_t | None = ...,

3273 escape: bool_t | None = ...,

3274 encoding: str | None = ...,

3275 decimal: str = ...,

3276 multicolumn: bool_t | None = ...,

3277 multicolumn_format: str | None = ...,

3278 multirow: bool_t | None = ...,

3279 caption: str | tuple[str, str] | None = ...,

3280 label: str | None = ...,

3281 position: str | None = ...,

3282 ) -> None:

3283 ...

3284

3285 @final

3286 @doc(returns=fmt.return_docstring)

3287 def to_latex(

3288 self,

3289 buf: FilePath | WriteBuffer[str] | None = None,

3290 columns: Sequence[Hashable] | None = None,

3291 col_space: ColspaceArgType | None = None,

3292 header: bool_t | Sequence[str] = True,

3293 index: bool_t = True,

3294 na_rep: str = "NaN",

3295 formatters: FormattersType | None = None,

3296 float_format: FloatFormatType | None = None,

3297 sparsify: bool_t | None = None,

3298 index_names: bool_t = True,

3299 bold_rows: bool_t = False,

3300 column_format: str | None = None,

3301 longtable: bool_t | None = None,

3302 escape: bool_t | None = None,

3303 encoding: str | None = None,

3304 decimal: str = ".",

3305 multicolumn: bool_t | None = None,

3306 multicolumn_format: str | None = None,

3307 multirow: bool_t | None = None,

3308 caption: str | tuple[str, str] | None = None,

3309 label: str | None = None,

3310 position: str | None = None,

3311 ) -> str | None:

3312 r"""

3313 Render object to a LaTeX tabular, longtable, or nested table.

3314

3315 Requires ``\usepackage{{booktabs}}``. The output can be copy/pasted

3316 into a main LaTeX document or read from an external file

3317 with ``\input{{table.tex}}``.

3318

3319 .. versionchanged:: 1.0.0

3320 Added caption and label arguments.

3321

3322 .. versionchanged:: 1.2.0

3323 Added position argument, changed meaning of caption argument.

3324

3325 Parameters

3326 ----------

3327 buf : str, Path or StringIO-like, optional, default None

3328 Buffer to write to. If None, the output is returned as a string.

3329 columns : list of label, optional

3330 The subset of columns to write. Writes all columns by default.

3331 col_space : int, optional

3332 The minimum width of each column.

3333 header : bool or list of str, default True

3334 Write out the column names. If a list of strings is given,

3335 it is assumed to be aliases for the column names.

3336 index : bool, default True

3337 Write row names (index).

3338 na_rep : str, default 'NaN'

3339 Missing data representation.

3340 formatters : list of functions or dict of {{str: function}}, optional

3341 Formatter functions to apply to columns' elements by position or

3342 name. The result of each function must be a unicode string.

3343 List must be of length equal to the number of columns.

3344 float_format : one-parameter function or str, optional, default None

3345 Formatter for floating point numbers. For example

3346 ``float_format="%.2f"`` and ``float_format="{{:0.2f}}".format`` will

3347 both result in 0.1234 being formatted as 0.12.

3348 sparsify : bool, optional

3349 Set to False for a DataFrame with a hierarchical index to print

3350 every multiindex key at each row. By default, the value will be

3351 read from the config module.

3352 index_names : bool, default True

3353 Prints the names of the indexes.

3354 bold_rows : bool, default False

3355 Make the row labels bold in the output.

3356 column_format : str, optional

3357 The columns format as specified in `LaTeX table format

3358 <https://en.wikibooks.org/wiki/LaTeX/Tables>`__ e.g. 'rcl' for 3

3359 columns. By default, 'l' will be used for all columns except

3360 columns of numbers, which default to 'r'.

3361 longtable : bool, optional

3362 By default, the value will be read from the pandas config

3363 module. Use a longtable environment instead of tabular. Requires

3364 adding a \usepackage{{longtable}} to your LaTeX preamble.

3365 escape : bool, optional

3366 By default, the value will be read from the pandas config

3367 module. When set to False prevents from escaping latex special

3368 characters in column names.

3369 encoding : str, optional

3370 A string representing the encoding to use in the output file,

3371 defaults to 'utf-8'.

3372 decimal : str, default '.'

3373 Character recognized as decimal separator, e.g. ',' in Europe.

3374 multicolumn : bool, default True

3375 Use \multicolumn to enhance MultiIndex columns.

3376 The default will be read from the config module.

3377 multicolumn_format : str, default 'l'

3378 The alignment for multicolumns, similar to `column_format`

3379 The default will be read from the config module.

3380 multirow : bool, default False

3381 Use \multirow to enhance MultiIndex rows. Requires adding a

3382 \usepackage{{multirow}} to your LaTeX preamble. Will print

3383 centered labels (instead of top-aligned) across the contained

3384 rows, separating groups via clines. The default will be read

3385 from the pandas config module.

3386 caption : str or tuple, optional

3387 Tuple (full_caption, short_caption),

3388 which results in ``\caption[short_caption]{{full_caption}}``;

3389 if a single string is passed, no short caption will be set.

3390

3391 .. versionadded:: 1.0.0

3392

3393 .. versionchanged:: 1.2.0

3394 Optionally allow caption to be a tuple ``(full_caption, short_caption)``.

3395

3396 label : str, optional

3397 The LaTeX label to be placed inside ``\label{{}}`` in the output.

3398 This is used with ``\ref{{}}`` in the main ``.tex`` file.

3399

3400 .. versionadded:: 1.0.0

3401 position : str, optional

3402 The LaTeX positional argument for tables, to be placed after

3403 ``\begin{{}}`` in the output.

3404

3405 .. versionadded:: 1.2.0

3406 {returns}

3407 See Also

3408 --------

3409 io.formats.style.Styler.to_latex : Render a DataFrame to LaTeX

3410 with conditional formatting.

3411 DataFrame.to_string : Render a DataFrame to a console-friendly

3412 tabular output.

3413 DataFrame.to_html : Render a DataFrame as an HTML table.

3414

3415 Examples

3416 --------

3417 >>> df = pd.DataFrame(dict(name=['Raphael', 'Donatello'],

3418 ... mask=['red', 'purple'],

3419 ... weapon=['sai', 'bo staff']))

3420 >>> print(df.to_latex(index=False)) # doctest: +SKIP

3421 \begin{{tabular}}{{lll}}

3422 \toprule

3423 name & mask & weapon \\

3424 \midrule

3425 Raphael & red & sai \\

3426 Donatello & purple & bo staff \\

3427 \bottomrule

3428 \end{{tabular}}

3429 """

3430 msg = (

3431 "In future versions `DataFrame.to_latex` is expected to utilise the base "

3432 "implementation of `Styler.to_latex` for formatting and rendering. "

3433 "The arguments signature may therefore change. It is recommended instead "

3434 "to use `DataFrame.style.to_latex` which also contains additional "

3435 "functionality."

3436 )

3437 warnings.warn(msg, FutureWarning, stacklevel=find_stack_level())

3438

3439 # Get defaults from the pandas config

3440 if self.ndim == 1:

3441 self = self.to_frame()

3442 if longtable is None:

3443 longtable = config.get_option("display.latex.longtable")

3444 if escape is None:

3445 escape = config.get_option("display.latex.escape")

3446 if multicolumn is None:

3447 multicolumn = config.get_option("display.latex.multicolumn")

3448 if multicolumn_format is None:

3449 multicolumn_format = config.get_option("display.latex.multicolumn_format")

3450 if multirow is None:

3451 multirow = config.get_option("display.latex.multirow")

3452

3453 self = cast("DataFrame", self)

3454 formatter = DataFrameFormatter(

3455 self,

3456 columns=columns,

3457 col_space=col_space,

3458 na_rep=na_rep,

3459 header=header,

3460 index=index,

3461 formatters=formatters,

3462 float_format=float_format,

3463 bold_rows=bold_rows,

3464 sparsify=sparsify,

3465 index_names=index_names,

3466 escape=escape,

3467 decimal=decimal,

3468 )

3469 return DataFrameRenderer(formatter).to_latex(

3470 buf=buf,

3471 column_format=column_format,

3472 longtable=longtable,

3473 encoding=encoding,

3474 multicolumn=multicolumn,

3475 multicolumn_format=multicolumn_format,

3476 multirow=multirow,

3477 caption=caption,

3478 label=label,

3479 position=position,

3480 )

3481

3482 @overload

3483 def to_csv(

3484 self,

3485 path_or_buf: None = ...,

3486 sep: str = ...,

3487 na_rep: str = ...,

3488 float_format: str | Callable | None = ...,

3489 columns: Sequence[Hashable] | None = ...,

3490 header: bool_t | list[str] = ...,

3491 index: bool_t = ...,

3492 index_label: IndexLabel | None = ...,

3493 mode: str = ...,

3494 encoding: str | None = ...,

3495 compression: CompressionOptions = ...,

3496 quoting: int | None = ...,

3497 quotechar: str = ...,

3498 lineterminator: str | None = ...,

3499 chunksize: int | None = ...,

3500 date_format: str | None = ...,

3501 doublequote: bool_t = ...,

3502 escapechar: str | None = ...,

3503 decimal: str = ...,

3504 errors: str = ...,

3505 storage_options: StorageOptions = ...,

3506 ) -> str:

3507 ...

3508

3509 @overload

3510 def to_csv(

3511 self,

3512 path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str],

3513 sep: str = ...,

3514 na_rep: str = ...,

3515 float_format: str | Callable | None = ...,

3516 columns: Sequence[Hashable] | None = ...,

3517 header: bool_t | list[str] = ...,

3518 index: bool_t = ...,

3519 index_label: IndexLabel | None = ...,

3520 mode: str = ...,

3521 encoding: str | None = ...,

3522 compression: CompressionOptions = ...,

3523 quoting: int | None = ...,

3524 quotechar: str = ...,

3525 lineterminator: str | None = ...,

3526 chunksize: int | None = ...,

3527 date_format: str | None = ...,

3528 doublequote: bool_t = ...,

3529 escapechar: str | None = ...,

3530 decimal: str = ...,

3531 errors: str = ...,

3532 storage_options: StorageOptions = ...,

3533 ) -> None:

3534 ...

3535

3536 @final

3537 @doc(

3538 storage_options=_shared_docs["storage_options"],

3539 compression_options=_shared_docs["compression_options"] % "path_or_buf",

3540 )

3541 @deprecate_kwarg(old_arg_name="line_terminator", new_arg_name="lineterminator")

3542 def to_csv(

3543 self,

3544 path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None,

3545 sep: str = ",",

3546 na_rep: str = "",

3547 float_format: str | Callable | None = None,

3548 columns: Sequence[Hashable] | None = None,

3549 header: bool_t | list[str] = True,

3550 index: bool_t = True,

3551 index_label: IndexLabel | None = None,

3552 mode: str = "w",

3553 encoding: str | None = None,

3554 compression: CompressionOptions = "infer",

3555 quoting: int | None = None,

3556 quotechar: str = '"',

3557 lineterminator: str | None = None,

3558 chunksize: int | None = None,

3559 date_format: str | None = None,

3560 doublequote: bool_t = True,

3561 escapechar: str | None = None,

3562 decimal: str = ".",

3563 errors: str = "strict",

3564 storage_options: StorageOptions = None,

3565 ) -> str | None:

3566 r"""

3567 Write object to a comma-separated values (csv) file.

3568

3569 Parameters

3570 ----------

3571 path_or_buf : str, path object, file-like object, or None, default None

3572 String, path object (implementing os.PathLike[str]), or file-like

3573 object implementing a write() function. If None, the result is

3574 returned as a string. If a non-binary file object is passed, it should

3575 be opened with `newline=''`, disabling universal newlines. If a binary

3576 file object is passed, `mode` might need to contain a `'b'`.

3577

3578 .. versionchanged:: 1.2.0

3579

3580 Support for binary file objects was introduced.

3581

3582 sep : str, default ','

3583 String of length 1. Field delimiter for the output file.

3584 na_rep : str, default ''

3585 Missing data representation.

3586 float_format : str, Callable, default None

3587 Format string for floating point numbers. If a Callable is given, it takes

3588 precedence over other numeric formatting parameters, like decimal.

3589 columns : sequence, optional

3590 Columns to write.

3591 header : bool or list of str, default True

3592 Write out the column names. If a list of strings is given it is

3593 assumed to be aliases for the column names.

3594 index : bool, default True

3595 Write row names (index).

3596 index_label : str or sequence, or False, default None

3597 Column label for index column(s) if desired. If None is given, and

3598 `header` and `index` are True, then the index names are used. A

3599 sequence should be given if the object uses MultiIndex. If

3600 False do not print fields for index names. Use index_label=False

3601 for easier importing in R.

3602 mode : str, default 'w'

3603 Python write mode. The available write modes are the same as

3604 :py:func:`open`.

3605 encoding : str, optional

3606 A string representing the encoding to use in the output file,

3607 defaults to 'utf-8'. `encoding` is not supported if `path_or_buf`

3608 is a non-binary file object.

3609 {compression_options}

3610

3611 .. versionchanged:: 1.0.0

3612

3613 May now be a dict with key 'method' as compression mode

3614 and other entries as additional compression options if

3615 compression mode is 'zip'.

3616

3617 .. versionchanged:: 1.1.0

3618

3619 Passing compression options as keys in dict is

3620 supported for compression modes 'gzip', 'bz2', 'zstd', and 'zip'.

3621

3622 .. versionchanged:: 1.2.0

3623

3624 Compression is supported for binary file objects.

3625

3626 .. versionchanged:: 1.2.0

3627

3628 Previous versions forwarded dict entries for 'gzip' to

3629 `gzip.open` instead of `gzip.GzipFile` which prevented

3630 setting `mtime`.

3631

3632 quoting : optional constant from csv module

3633 Defaults to csv.QUOTE_MINIMAL. If you have set a `float_format`

3634 then floats are converted to strings and thus csv.QUOTE_NONNUMERIC

3635 will treat them as non-numeric.

3636 quotechar : str, default '\"'

3637 String of length 1. Character used to quote fields.

3638 lineterminator : str, optional

3639 The newline character or character sequence to use in the output

3640 file. Defaults to `os.linesep`, which depends on the OS in which

3641 this method is called ('\\n' for linux, '\\r\\n' for Windows, i.e.).

3642

3643 .. versionchanged:: 1.5.0

3644

3645 Previously was line_terminator, changed for consistency with

3646 read_csv and the standard library 'csv' module.

3647

3648 chunksize : int or None

3649 Rows to write at a time.

3650 date_format : str, default None

3651 Format string for datetime objects.

3652 doublequote : bool, default True

3653 Control quoting of `quotechar` inside a field.

3654 escapechar : str, default None

3655 String of length 1. Character used to escape `sep` and `quotechar`

3656 when appropriate.

3657 decimal : str, default '.'

3658 Character recognized as decimal separator. E.g. use ',' for

3659 European data.

3660 errors : str, default 'strict'

3661 Specifies how encoding and decoding errors are to be handled.

3662 See the errors argument for :func:`open` for a full list

3663 of options.

3664

3665 .. versionadded:: 1.1.0

3666

3667 {storage_options}

3668

3669 .. versionadded:: 1.2.0

3670

3671 Returns

3672 -------

3673 None or str

3674 If path_or_buf is None, returns the resulting csv format as a

3675 string. Otherwise returns None.

3676

3677 See Also

3678 --------

3679 read_csv : Load a CSV file into a DataFrame.

3680 to_excel : Write DataFrame to an Excel file.

3681

3682 Examples

3683 --------

3684 >>> df = pd.DataFrame({{'name': ['Raphael', 'Donatello'],

3685 ... 'mask': ['red', 'purple'],

3686 ... 'weapon': ['sai', 'bo staff']}})

3687 >>> df.to_csv(index=False)

3688 'name,mask,weapon\nRaphael,red,sai\nDonatello,purple,bo staff\n'

3689

3690 Create 'out.zip' containing 'out.csv'

3691

3692 >>> compression_opts = dict(method='zip',

3693 ... archive_name='out.csv') # doctest: +SKIP

3694 >>> df.to_csv('out.zip', index=False,

3695 ... compression=compression_opts) # doctest: +SKIP

3696

3697 To write a csv file to a new folder or nested folder you will first

3698 need to create it using either Pathlib or os:

3699

3700 >>> from pathlib import Path # doctest: +SKIP

3701 >>> filepath = Path('folder/subfolder/out.csv') # doctest: +SKIP

3702 >>> filepath.parent.mkdir(parents=True, exist_ok=True) # doctest: +SKIP

3703 >>> df.to_csv(filepath) # doctest: +SKIP

3704

3705 >>> import os # doctest: +SKIP

3706 >>> os.makedirs('folder/subfolder', exist_ok=True) # doctest: +SKIP

3707 >>> df.to_csv('folder/subfolder/out.csv') # doctest: +SKIP

3708 """

3709 df = self if isinstance(self, ABCDataFrame) else self.to_frame()

3710

3711 formatter = DataFrameFormatter(

3712 frame=df,

3713 header=header,

3714 index=index,

3715 na_rep=na_rep,

3716 float_format=float_format,

3717 decimal=decimal,

3718 )

3719

3720 return DataFrameRenderer(formatter).to_csv(

3721 path_or_buf,

3722 lineterminator=lineterminator,

3723 sep=sep,

3724 encoding=encoding,

3725 errors=errors,

3726 compression=compression,

3727 quoting=quoting,

3728 columns=columns,

3729 index_label=index_label,

3730 mode=mode,

3731 chunksize=chunksize,

3732 quotechar=quotechar,

3733 date_format=date_format,

3734 doublequote=doublequote,

3735 escapechar=escapechar,

3736 storage_options=storage_options,

3737 )

3738

3739 # ----------------------------------------------------------------------

3740 # Lookup Caching

3741

3742 def _reset_cacher(self) -> None:

3743 """

3744 Reset the cacher.

3745 """

3746 raise AbstractMethodError(self)

3747

3748 def _maybe_update_cacher(

3749 self,

3750 clear: bool_t = False,

3751 verify_is_copy: bool_t = True,

3752 inplace: bool_t = False,

3753 ) -> None:

3754 """

3755 See if we need to update our parent cacher if clear, then clear our

3756 cache.

3757

3758 Parameters

3759 ----------

3760 clear : bool, default False

3761 Clear the item cache.

3762 verify_is_copy : bool, default True

3763 Provide is_copy checks.

3764 """

3765

3766 if verify_is_copy:

3767 self._check_setitem_copy(t="referent")

3768

3769 if clear:

3770 self._clear_item_cache()

3771

3772 def _clear_item_cache(self) -> None:

3773 raise AbstractMethodError(self)

3774

3775 # ----------------------------------------------------------------------

3776 # Indexing Methods

3777

3778 def take(

3779 self: NDFrameT, indices, axis=0, is_copy: bool_t | None = None, **kwargs

3780 ) -> NDFrameT:

3781 """

3782 Return the elements in the given *positional* indices along an axis.

3783

3784 This means that we are not indexing according to actual values in

3785 the index attribute of the object. We are indexing according to the

3786 actual position of the element in the object.

3787

3788 Parameters

3789 ----------

3790 indices : array-like

3791 An array of ints indicating which positions to take.

3792 axis : {0 or 'index', 1 or 'columns', None}, default 0

3793 The axis on which to select elements. ``0`` means that we are

3794 selecting rows, ``1`` means that we are selecting columns.

3795 For `Series` this parameter is unused and defaults to 0.

3796 is_copy : bool

3797 Before pandas 1.0, ``is_copy=False`` can be specified to ensure

3798 that the return value is an actual copy. Starting with pandas 1.0,

3799 ``take`` always returns a copy, and the keyword is therefore

3800 deprecated.

3801

3802 .. deprecated:: 1.0.0

3803 **kwargs

3804 For compatibility with :meth:`numpy.take`. Has no effect on the

3805 output.

3806

3807 Returns

3808 -------

3809 taken : same type as caller

3810 An array-like containing the elements taken from the object.

3811

3812 See Also

3813 --------

3814 DataFrame.loc : Select a subset of a DataFrame by labels.

3815 DataFrame.iloc : Select a subset of a DataFrame by positions.

3816 numpy.take : Take elements from an array along an axis.

3817

3818 Examples

3819 --------

3820 >>> df = pd.DataFrame([('falcon', 'bird', 389.0),

3821 ... ('parrot', 'bird', 24.0),

3822 ... ('lion', 'mammal', 80.5),

3823 ... ('monkey', 'mammal', np.nan)],

3824 ... columns=['name', 'class', 'max_speed'],

3825 ... index=[0, 2, 3, 1])

3826 >>> df

3827 name class max_speed

3828 0 falcon bird 389.0

3829 2 parrot bird 24.0

3830 3 lion mammal 80.5

3831 1 monkey mammal NaN

3832

3833 Take elements at positions 0 and 3 along the axis 0 (default).

3834

3835 Note how the actual indices selected (0 and 1) do not correspond to

3836 our selected indices 0 and 3. That's because we are selecting the 0th

3837 and 3rd rows, not rows whose indices equal 0 and 3.

3838

3839 >>> df.take([0, 3])

3840 name class max_speed

3841 0 falcon bird 389.0

3842 1 monkey mammal NaN

3843

3844 Take elements at indices 1 and 2 along the axis 1 (column selection).

3845

3846 >>> df.take([1, 2], axis=1)

3847 class max_speed

3848 0 bird 389.0

3849 2 bird 24.0

3850 3 mammal 80.5

3851 1 mammal NaN

3852

3853 We may take elements using negative integers for positive indices,

3854 starting from the end of the object, just like with Python lists.

3855

3856 >>> df.take([-1, -2])

3857 name class max_speed

3858 1 monkey mammal NaN

3859 3 lion mammal 80.5

3860 """

3861 if is_copy is not None:

3862 warnings.warn(

3863 "is_copy is deprecated and will be removed in a future version. "

3864 "'take' always returns a copy, so there is no need to specify this.",

3865 FutureWarning,

3866 stacklevel=find_stack_level(),

3867 )

3868

3869 nv.validate_take((), kwargs)

3870

3871 return self._take(indices, axis)

3872

3873 def _take(

3874 self: NDFrameT,

3875 indices,

3876 axis=0,

3877 convert_indices: bool_t = True,

3878 ) -> NDFrameT:

3879 """

3880 Internal version of the `take` allowing specification of additional args.

3881

3882 See the docstring of `take` for full explanation of the parameters.

3883 """

3884 self._consolidate_inplace()

3885

3886 new_data = self._mgr.take(

3887 indices,

3888 axis=self._get_block_manager_axis(axis),

3889 verify=True,

3890 convert_indices=convert_indices,

3891 )

3892 return self._constructor(new_data).__finalize__(self, method="take")

3893

3894 def _take_with_is_copy(self: NDFrameT, indices, axis=0) -> NDFrameT:

3895 """

3896 Internal version of the `take` method that sets the `_is_copy`

3897 attribute to keep track of the parent dataframe (using in indexing

3898 for the SettingWithCopyWarning).

3899

3900 See the docstring of `take` for full explanation of the parameters.

3901 """

3902 result = self._take(indices=indices, axis=axis)

3903 # Maybe set copy if we didn't actually change the index.

3904 if not result._get_axis(axis).equals(self._get_axis(axis)):

3905 result._set_is_copy(self)

3906 return result

3907

3908 @final

3909 def xs(

3910 self: NDFrameT,

3911 key: IndexLabel,

3912 axis: Axis = 0,

3913 level: IndexLabel = None,

3914 drop_level: bool_t = True,

3915 ) -> NDFrameT:

3916 """

3917 Return cross-section from the Series/DataFrame.

3918

3919 This method takes a `key` argument to select data at a particular

3920 level of a MultiIndex.

3921

3922 Parameters

3923 ----------

3924 key : label or tuple of label

3925 Label contained in the index, or partially in a MultiIndex.

3926 axis : {0 or 'index', 1 or 'columns'}, default 0

3927 Axis to retrieve cross-section on.

3928 level : object, defaults to first n levels (n=1 or len(key))

3929 In case of a key partially contained in a MultiIndex, indicate

3930 which levels are used. Levels can be referred by label or position.

3931 drop_level : bool, default True

3932 If False, returns object with same levels as self.

3933

3934 Returns

3935 -------

3936 Series or DataFrame

3937 Cross-section from the original Series or DataFrame

3938 corresponding to the selected index levels.

3939

3940 See Also

3941 --------

3942 DataFrame.loc : Access a group of rows and columns

3943 by label(s) or a boolean array.

3944 DataFrame.iloc : Purely integer-location based indexing

3945 for selection by position.

3946

3947 Notes

3948 -----

3949 `xs` can not be used to set values.

3950

3951 MultiIndex Slicers is a generic way to get/set values on

3952 any level or levels.

3953 It is a superset of `xs` functionality, see

3954 :ref:`MultiIndex Slicers <advanced.mi_slicers>`.

3955

3956 Examples

3957 --------

3958 >>> d = {'num_legs': [4, 4, 2, 2],

3959 ... 'num_wings': [0, 0, 2, 2],

3960 ... 'class': ['mammal', 'mammal', 'mammal', 'bird'],

3961 ... 'animal': ['cat', 'dog', 'bat', 'penguin'],

3962 ... 'locomotion': ['walks', 'walks', 'flies', 'walks']}

3963 >>> df = pd.DataFrame(data=d)

3964 >>> df = df.set_index(['class', 'animal', 'locomotion'])

3965 >>> df

3966 num_legs num_wings

3967 class animal locomotion

3968 mammal cat walks 4 0

3969 dog walks 4 0

3970 bat flies 2 2

3971 bird penguin walks 2 2

3972

3973 Get values at specified index

3974

3975 >>> df.xs('mammal')

3976 num_legs num_wings

3977 animal locomotion

3978 cat walks 4 0

3979 dog walks 4 0

3980 bat flies 2 2

3981

3982 Get values at several indexes

3983

3984 >>> df.xs(('mammal', 'dog'))

3985 num_legs num_wings

3986 locomotion

3987 walks 4 0

3988

3989 Get values at specified index and level

3990

3991 >>> df.xs('cat', level=1)

3992 num_legs num_wings

3993 class locomotion

3994 mammal walks 4 0

3995

3996 Get values at several indexes and levels

3997

3998 >>> df.xs(('bird', 'walks'),

3999 ... level=[0, 'locomotion'])

4000 num_legs num_wings

4001 animal

4002 penguin 2 2

4003

4004 Get values at specified column and axis

4005

4006 >>> df.xs('num_wings', axis=1)

4007 class animal locomotion

4008 mammal cat walks 0

4009 dog walks 0

4010 bat flies 2

4011 bird penguin walks 2

4012 Name: num_wings, dtype: int64

4013 """

4014 axis = self._get_axis_number(axis)

4015 labels = self._get_axis(axis)

4016

4017 if isinstance(key, list):

4018 warnings.warn(

4019 "Passing lists as key for xs is deprecated and will be removed in a "

4020 "future version. Pass key as a tuple instead.",

4021 FutureWarning,

4022 stacklevel=find_stack_level(),

4023 )

4024

4025 if level is not None:

4026 if not isinstance(labels, MultiIndex):

4027 raise TypeError("Index must be a MultiIndex")

4028 loc, new_ax = labels.get_loc_level(key, level=level, drop_level=drop_level)

4029

4030 # create the tuple of the indexer

4031 _indexer = [slice(None)] * self.ndim

4032 _indexer[axis] = loc

4033 indexer = tuple(_indexer)

4034

4035 result = self.iloc[indexer]

4036 setattr(result, result._get_axis_name(axis), new_ax)

4037 return result

4038

4039 if axis == 1:

4040 if drop_level:

4041 return self[key]

4042 index = self.columns

4043 else:

4044 index = self.index

4045

4046 self._consolidate_inplace()

4047

4048 if isinstance(index, MultiIndex):

4049 loc, new_index = index._get_loc_level(key, level=0)

4050 if not drop_level:

4051 if lib.is_integer(loc):

4052 new_index = index[loc : loc + 1]

4053 else:

4054 new_index = index[loc]

4055 else:

4056 loc = index.get_loc(key)

4057

4058 if isinstance(loc, np.ndarray):

4059 if loc.dtype == np.bool_:

4060 (inds,) = loc.nonzero()

4061 return self._take_with_is_copy(inds, axis=axis)

4062 else:

4063 return self._take_with_is_copy(loc, axis=axis)

4064

4065 if not is_scalar(loc):

4066 new_index = index[loc]

4067

4068 if is_scalar(loc) and axis == 0:

4069 # In this case loc should be an integer

4070 if self.ndim == 1:

4071 # if we encounter an array-like and we only have 1 dim

4072 # that means that their are list/ndarrays inside the Series!

4073 # so just return them (GH 6394)

4074 return self._values[loc]

4075

4076 new_mgr = self._mgr.fast_xs(loc)

4077

4078 result = self._constructor_sliced(

4079 new_mgr, name=self.index[loc]

4080 ).__finalize__(self)

4081 elif is_scalar(loc):

4082 result = self.iloc[:, slice(loc, loc + 1)]

4083 elif axis == 1:

4084 result = self.iloc[:, loc]

4085 else:

4086 result = self.iloc[loc]

4087 result.index = new_index

4088

4089 # this could be a view

4090 # but only in a single-dtyped view sliceable case

4091 result._set_is_copy(self, copy=not result._is_view)

4092 return result

4093

4094 def __getitem__(self, item):

4095 raise AbstractMethodError(self)

4096

4097 def _slice(self: NDFrameT, slobj: slice, axis=0) -> NDFrameT:

4098 """

4099 Construct a slice of this container.

4100

4101 Slicing with this method is *always* positional.

4102 """

4103 assert isinstance(slobj, slice), type(slobj)

4104 axis = self._get_block_manager_axis(axis)

4105 result = self._constructor(self._mgr.get_slice(slobj, axis=axis))

4106 result = result.__finalize__(self)

4107

4108 # this could be a view

4109 # but only in a single-dtyped view sliceable case

4110 is_copy = axis != 0 or result._is_view

4111 result._set_is_copy(self, copy=is_copy)

4112 return result

4113

4114 @final

4115 def _set_is_copy(self, ref: NDFrame, copy: bool_t = True) -> None:

4116 if not copy:

4117 self._is_copy = None

4118 else:

4119 assert ref is not None

4120 self._is_copy = weakref.ref(ref)

4121

4122 def _check_is_chained_assignment_possible(self) -> bool_t:

4123 """

4124 Check if we are a view, have a cacher, and are of mixed type.

4125 If so, then force a setitem_copy check.

4126

4127 Should be called just near setting a value

4128

4129 Will return a boolean if it we are a view and are cached, but a

4130 single-dtype meaning that the cacher should be updated following

4131 setting.

4132 """

4133 if self._is_copy:

4134 self._check_setitem_copy(t="referent")

4135 return False

4136

4137 @final

4138 def _check_setitem_copy(self, t="setting", force=False):

4139 """

4140

4141 Parameters

4142 ----------

4143 t : str, the type of setting error

4144 force : bool, default False

4145 If True, then force showing an error.

4146

4147 validate if we are doing a setitem on a chained copy.

4148

4149 It is technically possible to figure out that we are setting on

4150 a copy even WITH a multi-dtyped pandas object. In other words, some

4151 blocks may be views while other are not. Currently _is_view will ALWAYS

4152 return False for multi-blocks to avoid having to handle this case.

4153

4154 df = DataFrame(np.arange(0,9), columns=['count'])

4155 df['group'] = 'b'

4156

4157 # This technically need not raise SettingWithCopy if both are view

4158 # (which is not generally guaranteed but is usually True. However,

4159 # this is in general not a good practice and we recommend using .loc.

4160 df.iloc[0:5]['group'] = 'a'

4161

4162 """

4163 if (

4164 config.get_option("mode.copy_on_write")

4165 and config.get_option("mode.data_manager") == "block"

4166 ):

4167 return

4168

4169 # return early if the check is not needed

4170 if not (force or self._is_copy):

4171 return

4172

4173 value = config.get_option("mode.chained_assignment")

4174 if value is None:

4175 return

4176

4177 # see if the copy is not actually referred; if so, then dissolve

4178 # the copy weakref

4179 if self._is_copy is not None and not isinstance(self._is_copy, str):

4180 r = self._is_copy()

4181 if not gc.get_referents(r) or (r is not None and r.shape == self.shape):

4182 self._is_copy = None

4183 return

4184

4185 # a custom message

4186 if isinstance(self._is_copy, str):

4187 t = self._is_copy

4188

4189 elif t == "referent":

4190 t = (

4191 "\n"

4192 "A value is trying to be set on a copy of a slice from a "

4193 "DataFrame\n\n"

4194 "See the caveats in the documentation: "

4195 "https://pandas.pydata.org/pandas-docs/stable/user_guide/"

4196 "indexing.html#returning-a-view-versus-a-copy"

4197 )

4198

4199 else:

4200 t = (

4201 "\n"

4202 "A value is trying to be set on a copy of a slice from a "

4203 "DataFrame.\n"

4204 "Try using .loc[row_indexer,col_indexer] = value "

4205 "instead\n\nSee the caveats in the documentation: "

4206 "https://pandas.pydata.org/pandas-docs/stable/user_guide/"

4207 "indexing.html#returning-a-view-versus-a-copy"

4208 )

4209

4210 if value == "raise":

4211 raise SettingWithCopyError(t)

4212 elif value == "warn":

4213 warnings.warn(t, SettingWithCopyWarning, stacklevel=find_stack_level())

4214

4215 def __delitem__(self, key) -> None:

4216 """

4217 Delete item

4218 """

4219 deleted = False

4220

4221 maybe_shortcut = False

4222 if self.ndim == 2 and isinstance(self.columns, MultiIndex):

4223 try:

4224 # By using engine's __contains__ we effectively

4225 # restrict to same-length tuples

4226 maybe_shortcut = key not in self.columns._engine

4227 except TypeError:

4228 pass

4229

4230 if maybe_shortcut:

4231 # Allow shorthand to delete all columns whose first len(key)

4232 # elements match key:

4233 if not isinstance(key, tuple):

4234 key = (key,)

4235 for col in self.columns:

4236 if isinstance(col, tuple) and col[: len(key)] == key:

4237 del self[col]

4238 deleted = True

4239 if not deleted:

4240 # If the above loop ran and didn't delete anything because

4241 # there was no match, this call should raise the appropriate

4242 # exception:

4243 loc = self.axes[-1].get_loc(key)

4244 self._mgr = self._mgr.idelete(loc)

4245

4246 # delete from the caches

4247 try:

4248 del self._item_cache[key]

4249 except KeyError:

4250 pass

4251

4252 # ----------------------------------------------------------------------

4253 # Unsorted

4254

4255 @final

4256 def _check_inplace_and_allows_duplicate_labels(self, inplace):

4257 if inplace and not self.flags.allows_duplicate_labels:

4258 raise ValueError(

4259 "Cannot specify 'inplace=True' when "

4260 "'self.flags.allows_duplicate_labels' is False."

4261 )

4262

4263 @final

4264 def get(self, key, default=None):

4265 """

4266 Get item from object for given key (ex: DataFrame column).

4267

4268 Returns default value if not found.

4269

4270 Parameters

4271 ----------

4272 key : object

4273

4274 Returns

4275 -------

4276 value : same type as items contained in object

4277

4278 Examples

4279 --------

4280 >>> df = pd.DataFrame(

4281 ... [

4282 ... [24.3, 75.7, "high"],

4283 ... [31, 87.8, "high"],

4284 ... [22, 71.6, "medium"],

4285 ... [35, 95, "medium"],

4286 ... ],

4287 ... columns=["temp_celsius", "temp_fahrenheit", "windspeed"],

4288 ... index=pd.date_range(start="2014-02-12", end="2014-02-15", freq="D"),

4289 ... )

4290

4291 >>> df

4292 temp_celsius temp_fahrenheit windspeed

4293 2014-02-12 24.3 75.7 high

4294 2014-02-13 31.0 87.8 high

4295 2014-02-14 22.0 71.6 medium

4296 2014-02-15 35.0 95.0 medium

4297

4298 >>> df.get(["temp_celsius", "windspeed"])

4299 temp_celsius windspeed

4300 2014-02-12 24.3 high

4301 2014-02-13 31.0 high

4302 2014-02-14 22.0 medium

4303 2014-02-15 35.0 medium

4304

4305 If the key isn't found, the default value will be used.

4306

4307 >>> df.get(["temp_celsius", "temp_kelvin"], default="default_value")

4308 'default_value'

4309 """

4310 try:

4311 return self[key]

4312 except (KeyError, ValueError, IndexError):

4313 return default

4314

4315 @final

4316 @property

4317 def _is_view(self) -> bool_t:

4318 """Return boolean indicating if self is view of another array"""

4319 return self._mgr.is_view

4320

4321 @final

4322 def reindex_like(

4323 self: NDFrameT,

4324 other,

4325 method: str | None = None,

4326 copy: bool_t = True,

4327 limit=None,

4328 tolerance=None,

4329 ) -> NDFrameT:

4330 """

4331 Return an object with matching indices as other object.

4332

4333 Conform the object to the same index on all axes. Optional

4334 filling logic, placing NaN in locations having no value

4335 in the previous index. A new object is produced unless the

4336 new index is equivalent to the current one and copy=False.

4337

4338 Parameters

4339 ----------

4340 other : Object of the same data type

4341 Its row and column indices are used to define the new indices

4342 of this object.

4343 method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'}

4344 Method to use for filling holes in reindexed DataFrame.

4345 Please note: this is only applicable to DataFrames/Series with a

4346 monotonically increasing/decreasing index.

4347

4348 * None (default): don't fill gaps

4349 * pad / ffill: propagate last valid observation forward to next

4350 valid

4351 * backfill / bfill: use next valid observation to fill gap

4352 * nearest: use nearest valid observations to fill gap.

4353

4354 copy : bool, default True

4355 Return a new object, even if the passed indexes are the same.

4356 limit : int, default None

4357 Maximum number of consecutive labels to fill for inexact matches.

4358 tolerance : optional

4359 Maximum distance between original and new labels for inexact

4360 matches. The values of the index at the matching locations must

4361 satisfy the equation ``abs(index[indexer] - target) <= tolerance``.

4362

4363 Tolerance may be a scalar value, which applies the same tolerance

4364 to all values, or list-like, which applies variable tolerance per

4365 element. List-like includes list, tuple, array, Series, and must be

4366 the same size as the index and its dtype must exactly match the

4367 index's type.

4368

4369 Returns

4370 -------

4371 Series or DataFrame

4372 Same type as caller, but with changed indices on each axis.

4373

4374 See Also

4375 --------

4376 DataFrame.set_index : Set row labels.

4377 DataFrame.reset_index : Remove row labels or move them to new columns.

4378 DataFrame.reindex : Change to new indices or expand indices.

4379

4380 Notes

4381 -----

4382 Same as calling

4383 ``.reindex(index=other.index, columns=other.columns,...)``.

4384

4385 Examples

4386 --------

4387 >>> df1 = pd.DataFrame([[24.3, 75.7, 'high'],

4388 ... [31, 87.8, 'high'],

4389 ... [22, 71.6, 'medium'],

4390 ... [35, 95, 'medium']],

4391 ... columns=['temp_celsius', 'temp_fahrenheit',

4392 ... 'windspeed'],

4393 ... index=pd.date_range(start='2014-02-12',

4394 ... end='2014-02-15', freq='D'))

4395

4396 >>> df1

4397 temp_celsius temp_fahrenheit windspeed

4398 2014-02-12 24.3 75.7 high

4399 2014-02-13 31.0 87.8 high

4400 2014-02-14 22.0 71.6 medium

4401 2014-02-15 35.0 95.0 medium

4402

4403 >>> df2 = pd.DataFrame([[28, 'low'],

4404 ... [30, 'low'],

4405 ... [35.1, 'medium']],

4406 ... columns=['temp_celsius', 'windspeed'],

4407 ... index=pd.DatetimeIndex(['2014-02-12', '2014-02-13',

4408 ... '2014-02-15']))

4409

4410 >>> df2

4411 temp_celsius windspeed

4412 2014-02-12 28.0 low

4413 2014-02-13 30.0 low

4414 2014-02-15 35.1 medium

4415

4416 >>> df2.reindex_like(df1)

4417 temp_celsius temp_fahrenheit windspeed

4418 2014-02-12 28.0 NaN low

4419 2014-02-13 30.0 NaN low

4420 2014-02-14 NaN NaN NaN

4421 2014-02-15 35.1 NaN medium

4422 """

4423 d = other._construct_axes_dict(

4424 axes=self._AXIS_ORDERS,

4425 method=method,

4426 copy=copy,

4427 limit=limit,

4428 tolerance=tolerance,

4429 )

4430

4431 return self.reindex(**d)

4432

4433 @overload

4434 def drop(

4435 self,

4436 labels: IndexLabel = ...,

4437 *,

4438 axis: Axis = ...,

4439 index: IndexLabel = ...,

4440 columns: IndexLabel = ...,

4441 level: Level | None = ...,

4442 inplace: Literal[True],

4443 errors: IgnoreRaise = ...,

4444 ) -> None:

4445 ...

4446

4447 @overload

4448 def drop(

4449 self: NDFrameT,

4450 labels: IndexLabel = ...,

4451 *,

4452 axis: Axis = ...,

4453 index: IndexLabel = ...,

4454 columns: IndexLabel = ...,

4455 level: Level | None = ...,

4456 inplace: Literal[False] = ...,

4457 errors: IgnoreRaise = ...,

4458 ) -> NDFrameT:

4459 ...

4460

4461 @overload

4462 def drop(

4463 self: NDFrameT,

4464 labels: IndexLabel = ...,

4465 *,

4466 axis: Axis = ...,

4467 index: IndexLabel = ...,

4468 columns: IndexLabel = ...,

4469 level: Level | None = ...,

4470 inplace: bool_t = ...,

4471 errors: IgnoreRaise = ...,

4472 ) -> NDFrameT | None:

4473 ...

4474

4475 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "labels"])

4476 def drop(

4477 self: NDFrameT,

4478 labels: IndexLabel = None,

4479 axis: Axis = 0,

4480 index: IndexLabel = None,

4481 columns: IndexLabel = None,

4482 level: Level | None = None,

4483 inplace: bool_t = False,

4484 errors: IgnoreRaise = "raise",

4485 ) -> NDFrameT | None:

4486

4487 inplace = validate_bool_kwarg(inplace, "inplace")

4488

4489 if labels is not None:

4490 if index is not None or columns is not None:

4491 raise ValueError("Cannot specify both 'labels' and 'index'/'columns'")

4492 axis_name = self._get_axis_name(axis)

4493 axes = {axis_name: labels}

4494 elif index is not None or columns is not None:

4495 axes, _ = self._construct_axes_from_arguments((index, columns), {})

4496 else:

4497 raise ValueError(

4498 "Need to specify at least one of 'labels', 'index' or 'columns'"

4499 )

4500

4501 obj = self

4502

4503 for axis, labels in axes.items():

4504 if labels is not None:

4505 obj = obj._drop_axis(labels, axis, level=level, errors=errors)

4506

4507 if inplace:

4508 self._update_inplace(obj)

4509 else:

4510 return obj

4511

4512 @final

4513 def _drop_axis(

4514 self: NDFrameT,

4515 labels,

4516 axis,

4517 level=None,

4518 errors: IgnoreRaise = "raise",

4519 only_slice: bool_t = False,

4520 ) -> NDFrameT:

4521 """

4522 Drop labels from specified axis. Used in the ``drop`` method

4523 internally.

4524

4525 Parameters

4526 ----------

4527 labels : single label or list-like

4528 axis : int or axis name

4529 level : int or level name, default None

4530 For MultiIndex

4531 errors : {'ignore', 'raise'}, default 'raise'

4532 If 'ignore', suppress error and existing labels are dropped.

4533 only_slice : bool, default False

4534 Whether indexing along columns should be view-only.

4535

4536 """

4537 axis_num = self._get_axis_number(axis)

4538 axis = self._get_axis(axis)

4539

4540 if axis.is_unique:

4541 if level is not None:

4542 if not isinstance(axis, MultiIndex):

4543 raise AssertionError("axis must be a MultiIndex")

4544 new_axis = axis.drop(labels, level=level, errors=errors)

4545 else:

4546 new_axis = axis.drop(labels, errors=errors)

4547 indexer = axis.get_indexer(new_axis)

4548

4549 # Case for non-unique axis

4550 else:

4551 is_tuple_labels = is_nested_list_like(labels) or isinstance(labels, tuple)

4552 labels = ensure_object(com.index_labels_to_array(labels))

4553 if level is not None:

4554 if not isinstance(axis, MultiIndex):

4555 raise AssertionError("axis must be a MultiIndex")

4556 mask = ~axis.get_level_values(level).isin(labels)

4557

4558 # GH 18561 MultiIndex.drop should raise if label is absent

4559 if errors == "raise" and mask.all():

4560 raise KeyError(f"{labels} not found in axis")

4561 elif (

4562 isinstance(axis, MultiIndex)

4563 and labels.dtype == "object"

4564 and not is_tuple_labels

4565 ):

4566 # Set level to zero in case of MultiIndex and label is string,

4567 # because isin can't handle strings for MultiIndexes GH#36293

4568 # In case of tuples we get dtype object but have to use isin GH#42771

4569 mask = ~axis.get_level_values(0).isin(labels)

4570 else:

4571 mask = ~axis.isin(labels)

4572 # Check if label doesn't exist along axis

4573 labels_missing = (axis.get_indexer_for(labels) == -1).any()

4574 if errors == "raise" and labels_missing:

4575 raise KeyError(f"{labels} not found in axis")

4576

4577 if is_extension_array_dtype(mask.dtype):

4578 # GH#45860

4579 mask = mask.to_numpy(dtype=bool)

4580

4581 indexer = mask.nonzero()[0]

4582 new_axis = axis.take(indexer)

4583

4584 bm_axis = self.ndim - axis_num - 1

4585 new_mgr = self._mgr.reindex_indexer(

4586 new_axis,

4587 indexer,

4588 axis=bm_axis,

4589 allow_dups=True,

4590 only_slice=only_slice,

4591 )

4592 result = self._constructor(new_mgr)

4593 if self.ndim == 1:

4594 result.name = self.name

4595

4596 return result.__finalize__(self)

4597

4598 @final

4599 def _update_inplace(self, result, verify_is_copy: bool_t = True) -> None:

4600 """

4601 Replace self internals with result.

4602

4603 Parameters

4604 ----------

4605 result : same type as self

4606 verify_is_copy : bool, default True

4607 Provide is_copy checks.

4608 """

4609 # NOTE: This does *not* call __finalize__ and that's an explicit

4610 # decision that we may revisit in the future.

4611 self._reset_cache()

4612 self._clear_item_cache()

4613 self._mgr = result._mgr

4614 self._maybe_update_cacher(verify_is_copy=verify_is_copy, inplace=True)

4615

4616 @final

4617 def add_prefix(self: NDFrameT, prefix: str) -> NDFrameT:

4618 """

4619 Prefix labels with string `prefix`.

4620

4621 For Series, the row labels are prefixed.

4622 For DataFrame, the column labels are prefixed.

4623

4624 Parameters

4625 ----------

4626 prefix : str

4627 The string to add before each label.

4628

4629 Returns

4630 -------

4631 Series or DataFrame

4632 New Series or DataFrame with updated labels.

4633

4634 See Also

4635 --------

4636 Series.add_suffix: Suffix row labels with string `suffix`.

4637 DataFrame.add_suffix: Suffix column labels with string `suffix`.

4638

4639 Examples

4640 --------

4641 >>> s = pd.Series([1, 2, 3, 4])

4642 >>> s

4643 0 1

4644 1 2

4645 2 3

4646 3 4

4647 dtype: int64

4648

4649 >>> s.add_prefix('item_')

4650 item_0 1

4651 item_1 2

4652 item_2 3

4653 item_3 4

4654 dtype: int64

4655

4656 >>> df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': [3, 4, 5, 6]})

4657 >>> df

4658 A B

4659 0 1 3

4660 1 2 4

4661 2 3 5

4662 3 4 6

4663

4664 >>> df.add_prefix('col_')

4665 col_A col_B

4666 0 1 3

4667 1 2 4

4668 2 3 5

4669 3 4 6

4670 """

4671 f = functools.partial("{prefix}{}".format, prefix=prefix)

4672

4673 mapper = {self._info_axis_name: f}

4674 # error: Incompatible return value type (got "Optional[NDFrameT]",

4675 # expected "NDFrameT")

4676 # error: Argument 1 to "rename" of "NDFrame" has incompatible type

4677 # "**Dict[str, partial[str]]"; expected "Union[str, int, None]"

4678 return self._rename(**mapper) # type: ignore[return-value, arg-type]

4679

4680 @final

4681 def add_suffix(self: NDFrameT, suffix: str) -> NDFrameT:

4682 """

4683 Suffix labels with string `suffix`.

4684

4685 For Series, the row labels are suffixed.

4686 For DataFrame, the column labels are suffixed.

4687

4688 Parameters

4689 ----------

4690 suffix : str

4691 The string to add after each label.

4692

4693 Returns

4694 -------

4695 Series or DataFrame

4696 New Series or DataFrame with updated labels.

4697

4698 See Also

4699 --------

4700 Series.add_prefix: Prefix row labels with string `prefix`.

4701 DataFrame.add_prefix: Prefix column labels with string `prefix`.

4702

4703 Examples

4704 --------

4705 >>> s = pd.Series([1, 2, 3, 4])

4706 >>> s

4707 0 1

4708 1 2

4709 2 3

4710 3 4

4711 dtype: int64

4712

4713 >>> s.add_suffix('_item')

4714 0_item 1

4715 1_item 2

4716 2_item 3

4717 3_item 4

4718 dtype: int64

4719

4720 >>> df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': [3, 4, 5, 6]})

4721 >>> df

4722 A B

4723 0 1 3

4724 1 2 4

4725 2 3 5

4726 3 4 6

4727

4728 >>> df.add_suffix('_col')

4729 A_col B_col

4730 0 1 3

4731 1 2 4

4732 2 3 5

4733 3 4 6

4734 """

4735 f = functools.partial("{}{suffix}".format, suffix=suffix)

4736

4737 mapper = {self._info_axis_name: f}

4738 # error: Incompatible return value type (got "Optional[NDFrameT]",

4739 # expected "NDFrameT")

4740 # error: Argument 1 to "rename" of "NDFrame" has incompatible type

4741 # "**Dict[str, partial[str]]"; expected "Union[str, int, None]"

4742 return self._rename(**mapper) # type: ignore[return-value, arg-type]

4743

4744 @overload

4745 def sort_values(

4746 self: NDFrameT,

4747 *,

4748 axis: Axis = ...,

4749 ascending=...,

4750 inplace: Literal[False] = ...,

4751 kind: str = ...,

4752 na_position: str = ...,

4753 ignore_index: bool_t = ...,

4754 key: ValueKeyFunc = ...,

4755 ) -> NDFrameT:

4756 ...

4757

4758 @overload

4759 def sort_values(

4760 self,

4761 *,

4762 axis: Axis = ...,

4763 ascending=...,

4764 inplace: Literal[True],

4765 kind: str = ...,

4766 na_position: str = ...,

4767 ignore_index: bool_t = ...,

4768 key: ValueKeyFunc = ...,

4769 ) -> None:

4770 ...

4771

4772 @overload

4773 def sort_values(

4774 self: NDFrameT,

4775 *,

4776 axis: Axis = ...,

4777 ascending=...,

4778 inplace: bool_t = ...,

4779 kind: str = ...,

4780 na_position: str = ...,

4781 ignore_index: bool_t = ...,

4782 key: ValueKeyFunc = ...,

4783 ) -> NDFrameT | None:

4784 ...

4785

4786 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"])

4787 def sort_values(

4788 self: NDFrameT,

4789 axis: Axis = 0,

4790 ascending=True,

4791 inplace: bool_t = False,

4792 kind: str = "quicksort",

4793 na_position: str = "last",

4794 ignore_index: bool_t = False,

4795 key: ValueKeyFunc = None,

4796 ) -> NDFrameT | None:

4797 """

4798 Sort by the values along either axis.

4799

4800 Parameters

4801 ----------%(optional_by)s

4802 axis : %(axes_single_arg)s, default 0

4803 Axis to be sorted.

4804 ascending : bool or list of bool, default True

4805 Sort ascending vs. descending. Specify list for multiple sort

4806 orders. If this is a list of bools, must match the length of

4807 the by.

4808 inplace : bool, default False

4809 If True, perform operation in-place.

4810 kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, default 'quicksort'

4811 Choice of sorting algorithm. See also :func:`numpy.sort` for more

4812 information. `mergesort` and `stable` are the only stable algorithms. For

4813 DataFrames, this option is only applied when sorting on a single

4814 column or label.

4815 na_position : {'first', 'last'}, default 'last'

4816 Puts NaNs at the beginning if `first`; `last` puts NaNs at the

4817 end.

4818 ignore_index : bool, default False

4819 If True, the resulting axis will be labeled 0, 1, …, n - 1.

4820

4821 .. versionadded:: 1.0.0

4822

4823 key : callable, optional

4824 Apply the key function to the values

4825 before sorting. This is similar to the `key` argument in the

4826 builtin :meth:`sorted` function, with the notable difference that

4827 this `key` function should be *vectorized*. It should expect a

4828 ``Series`` and return a Series with the same shape as the input.

4829 It will be applied to each column in `by` independently.

4830

4831 .. versionadded:: 1.1.0

4832

4833 Returns

4834 -------

4835 DataFrame or None

4836 DataFrame with sorted values or None if ``inplace=True``.

4837

4838 See Also

4839 --------

4840 DataFrame.sort_index : Sort a DataFrame by the index.

4841 Series.sort_values : Similar method for a Series.

4842

4843 Examples

4844 --------

4845 >>> df = pd.DataFrame({

4846 ... 'col1': ['A', 'A', 'B', np.nan, 'D', 'C'],

4847 ... 'col2': [2, 1, 9, 8, 7, 4],

4848 ... 'col3': [0, 1, 9, 4, 2, 3],

4849 ... 'col4': ['a', 'B', 'c', 'D', 'e', 'F']

4850 ... })

4851 >>> df

4852 col1 col2 col3 col4

4853 0 A 2 0 a

4854 1 A 1 1 B

4855 2 B 9 9 c

4856 3 NaN 8 4 D

4857 4 D 7 2 e

4858 5 C 4 3 F

4859

4860 Sort by col1

4861

4862 >>> df.sort_values(by=['col1'])

4863 col1 col2 col3 col4

4864 0 A 2 0 a

4865 1 A 1 1 B

4866 2 B 9 9 c

4867 5 C 4 3 F

4868 4 D 7 2 e

4869 3 NaN 8 4 D

4870

4871 Sort by multiple columns

4872

4873 >>> df.sort_values(by=['col1', 'col2'])

4874 col1 col2 col3 col4

4875 1 A 1 1 B

4876 0 A 2 0 a

4877 2 B 9 9 c

4878 5 C 4 3 F

4879 4 D 7 2 e

4880 3 NaN 8 4 D

4881

4882 Sort Descending

4883

4884 >>> df.sort_values(by='col1', ascending=False)

4885 col1 col2 col3 col4

4886 4 D 7 2 e

4887 5 C 4 3 F

4888 2 B 9 9 c

4889 0 A 2 0 a

4890 1 A 1 1 B

4891 3 NaN 8 4 D

4892

4893 Putting NAs first

4894

4895 >>> df.sort_values(by='col1', ascending=False, na_position='first')

4896 col1 col2 col3 col4

4897 3 NaN 8 4 D

4898 4 D 7 2 e

4899 5 C 4 3 F

4900 2 B 9 9 c

4901 0 A 2 0 a

4902 1 A 1 1 B

4903

4904 Sorting with a key function

4905

4906 >>> df.sort_values(by='col4', key=lambda col: col.str.lower())

4907 col1 col2 col3 col4

4908 0 A 2 0 a

4909 1 A 1 1 B

4910 2 B 9 9 c

4911 3 NaN 8 4 D

4912 4 D 7 2 e

4913 5 C 4 3 F

4914

4915 Natural sort with the key argument,

4916 using the `natsort <https://github.com/SethMMorton/natsort>` package.

4917

4918 >>> df = pd.DataFrame({

4919 ... "time": ['0hr', '128hr', '72hr', '48hr', '96hr'],

4920 ... "value": [10, 20, 30, 40, 50]

4921 ... })

4922 >>> df

4923 time value

4924 0 0hr 10

4925 1 128hr 20

4926 2 72hr 30

4927 3 48hr 40

4928 4 96hr 50

4929 >>> from natsort import index_natsorted

4930 >>> df.sort_values(

4931 ... by="time",

4932 ... key=lambda x: np.argsort(index_natsorted(df["time"]))

4933 ... )

4934 time value

4935 0 0hr 10

4936 3 48hr 40

4937 2 72hr 30

4938 4 96hr 50

4939 1 128hr 20

4940 """

4941 raise AbstractMethodError(self)

4942

4943 @overload

4944 def sort_index(

4945 self,

4946 *,

4947 axis: Axis = ...,

4948 level: IndexLabel = ...,

4949 ascending: bool_t | Sequence[bool_t] = ...,

4950 inplace: Literal[True],

4951 kind: SortKind = ...,

4952 na_position: NaPosition = ...,

4953 sort_remaining: bool_t = ...,

4954 ignore_index: bool_t = ...,

4955 key: IndexKeyFunc = ...,

4956 ) -> None:

4957 ...

4958

4959 @overload

4960 def sort_index(

4961 self: NDFrameT,

4962 *,

4963 axis: Axis = ...,

4964 level: IndexLabel = ...,

4965 ascending: bool_t | Sequence[bool_t] = ...,

4966 inplace: Literal[False] = ...,

4967 kind: SortKind = ...,

4968 na_position: NaPosition = ...,

4969 sort_remaining: bool_t = ...,

4970 ignore_index: bool_t = ...,

4971 key: IndexKeyFunc = ...,

4972 ) -> NDFrameT:

4973 ...

4974

4975 @overload

4976 def sort_index(

4977 self: NDFrameT,

4978 *,

4979 axis: Axis = ...,

4980 level: IndexLabel = ...,

4981 ascending: bool_t | Sequence[bool_t] = ...,

4982 inplace: bool_t = ...,

4983 kind: SortKind = ...,

4984 na_position: NaPosition = ...,

4985 sort_remaining: bool_t = ...,

4986 ignore_index: bool_t = ...,

4987 key: IndexKeyFunc = ...,

4988 ) -> NDFrameT | None:

4989 ...

4990

4991 def sort_index(

4992 self: NDFrameT,

4993 axis: Axis = 0,

4994 level: IndexLabel = None,

4995 ascending: bool_t | Sequence[bool_t] = True,

4996 inplace: bool_t = False,

4997 kind: SortKind = "quicksort",

4998 na_position: NaPosition = "last",

4999 sort_remaining: bool_t = True,

5000 ignore_index: bool_t = False,

5001 key: IndexKeyFunc = None,

5002 ) -> NDFrameT | None:

5003

5004 inplace = validate_bool_kwarg(inplace, "inplace")

5005 axis = self._get_axis_number(axis)

5006 ascending = validate_ascending(ascending)

5007

5008 target = self._get_axis(axis)

5009

5010 indexer = get_indexer_indexer(

5011 target, level, ascending, kind, na_position, sort_remaining, key

5012 )

5013

5014 if indexer is None:

5015 if inplace:

5016 result = self

5017 else:

5018 result = self.copy()

5019

5020 if ignore_index:

5021 result.index = default_index(len(self))

5022 if inplace:

5023 return None

5024 else:

5025 return result

5026

5027 baxis = self._get_block_manager_axis(axis)

5028 new_data = self._mgr.take(indexer, axis=baxis, verify=False)

5029

5030 # reconstruct axis if needed

5031 new_data.set_axis(baxis, new_data.axes[baxis]._sort_levels_monotonic())

5032

5033 if ignore_index:

5034 axis = 1 if isinstance(self, ABCDataFrame) else 0

5035 new_data.set_axis(axis, default_index(len(indexer)))

5036

5037 result = self._constructor(new_data)

5038

5039 if inplace:

5040 return self._update_inplace(result)

5041 else:

5042 return result.__finalize__(self, method="sort_index")

5043

5044 @doc(

5045 klass=_shared_doc_kwargs["klass"],

5046 axes=_shared_doc_kwargs["axes"],

5047 optional_labels="",

5048 optional_axis="",

5049 )

5050 def reindex(self: NDFrameT, *args, **kwargs) -> NDFrameT:

5051 """

5052 Conform {klass} to new index with optional filling logic.

5053

5054 Places NA/NaN in locations having no value in the previous index. A new object

5055 is produced unless the new index is equivalent to the current one and

5056 ``copy=False``.

5057

5058 Parameters

5059 ----------

5060 {optional_labels}

5061 {axes} : array-like, optional

5062 New labels / index to conform to, should be specified using

5063 keywords. Preferably an Index object to avoid duplicating data.

5064 {optional_axis}

5065 method : {{None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'}}

5066 Method to use for filling holes in reindexed DataFrame.

5067 Please note: this is only applicable to DataFrames/Series with a

5068 monotonically increasing/decreasing index.

5069

5070 * None (default): don't fill gaps

5071 * pad / ffill: Propagate last valid observation forward to next

5072 valid.

5073 * backfill / bfill: Use next valid observation to fill gap.

5074 * nearest: Use nearest valid observations to fill gap.

5075

5076 copy : bool, default True

5077 Return a new object, even if the passed indexes are the same.

5078 level : int or name

5079 Broadcast across a level, matching Index values on the

5080 passed MultiIndex level.

5081 fill_value : scalar, default np.NaN

5082 Value to use for missing values. Defaults to NaN, but can be any

5083 "compatible" value.

5084 limit : int, default None

5085 Maximum number of consecutive elements to forward or backward fill.

5086 tolerance : optional

5087 Maximum distance between original and new labels for inexact

5088 matches. The values of the index at the matching locations most

5089 satisfy the equation ``abs(index[indexer] - target) <= tolerance``.

5090

5091 Tolerance may be a scalar value, which applies the same tolerance

5092 to all values, or list-like, which applies variable tolerance per

5093 element. List-like includes list, tuple, array, Series, and must be

5094 the same size as the index and its dtype must exactly match the

5095 index's type.

5096

5097 Returns

5098 -------

5099 {klass} with changed index.

5100

5101 See Also

5102 --------

5103 DataFrame.set_index : Set row labels.

5104 DataFrame.reset_index : Remove row labels or move them to new columns.

5105 DataFrame.reindex_like : Change to same indices as other DataFrame.

5106

5107 Examples

5108 --------

5109 ``DataFrame.reindex`` supports two calling conventions

5110

5111 * ``(index=index_labels, columns=column_labels, ...)``

5112 * ``(labels, axis={{'index', 'columns'}}, ...)``

5113

5114 We *highly* recommend using keyword arguments to clarify your

5115 intent.

5116

5117 Create a dataframe with some fictional data.

5118

5119 >>> index = ['Firefox', 'Chrome', 'Safari', 'IE10', 'Konqueror']

5120 >>> df = pd.DataFrame({{'http_status': [200, 200, 404, 404, 301],

5121 ... 'response_time': [0.04, 0.02, 0.07, 0.08, 1.0]}},

5122 ... index=index)

5123 >>> df

5124 http_status response_time

5125 Firefox 200 0.04

5126 Chrome 200 0.02

5127 Safari 404 0.07

5128 IE10 404 0.08

5129 Konqueror 301 1.00

5130

5131 Create a new index and reindex the dataframe. By default

5132 values in the new index that do not have corresponding

5133 records in the dataframe are assigned ``NaN``.

5134

5135 >>> new_index = ['Safari', 'Iceweasel', 'Comodo Dragon', 'IE10',

5136 ... 'Chrome']

5137 >>> df.reindex(new_index)

5138 http_status response_time

5139 Safari 404.0 0.07

5140 Iceweasel NaN NaN

5141 Comodo Dragon NaN NaN

5142 IE10 404.0 0.08

5143 Chrome 200.0 0.02

5144

5145 We can fill in the missing values by passing a value to

5146 the keyword ``fill_value``. Because the index is not monotonically

5147 increasing or decreasing, we cannot use arguments to the keyword

5148 ``method`` to fill the ``NaN`` values.

5149

5150 >>> df.reindex(new_index, fill_value=0)

5151 http_status response_time

5152 Safari 404 0.07

5153 Iceweasel 0 0.00

5154 Comodo Dragon 0 0.00

5155 IE10 404 0.08

5156 Chrome 200 0.02

5157

5158 >>> df.reindex(new_index, fill_value='missing')

5159 http_status response_time

5160 Safari 404 0.07

5161 Iceweasel missing missing

5162 Comodo Dragon missing missing

5163 IE10 404 0.08

5164 Chrome 200 0.02

5165

5166 We can also reindex the columns.

5167

5168 >>> df.reindex(columns=['http_status', 'user_agent'])

5169 http_status user_agent

5170 Firefox 200 NaN

5171 Chrome 200 NaN

5172 Safari 404 NaN

5173 IE10 404 NaN

5174 Konqueror 301 NaN

5175

5176 Or we can use "axis-style" keyword arguments

5177

5178 >>> df.reindex(['http_status', 'user_agent'], axis="columns")

5179 http_status user_agent

5180 Firefox 200 NaN

5181 Chrome 200 NaN

5182 Safari 404 NaN

5183 IE10 404 NaN

5184 Konqueror 301 NaN

5185

5186 To further illustrate the filling functionality in

5187 ``reindex``, we will create a dataframe with a

5188 monotonically increasing index (for example, a sequence

5189 of dates).

5190

5191 >>> date_index = pd.date_range('1/1/2010', periods=6, freq='D')

5192 >>> df2 = pd.DataFrame({{"prices": [100, 101, np.nan, 100, 89, 88]}},

5193 ... index=date_index)

5194 >>> df2

5195 prices

5196 2010-01-01 100.0

5197 2010-01-02 101.0

5198 2010-01-03 NaN

5199 2010-01-04 100.0

5200 2010-01-05 89.0

5201 2010-01-06 88.0

5202

5203 Suppose we decide to expand the dataframe to cover a wider

5204 date range.

5205

5206 >>> date_index2 = pd.date_range('12/29/2009', periods=10, freq='D')

5207 >>> df2.reindex(date_index2)

5208 prices

5209 2009-12-29 NaN

5210 2009-12-30 NaN

5211 2009-12-31 NaN

5212 2010-01-01 100.0

5213 2010-01-02 101.0

5214 2010-01-03 NaN

5215 2010-01-04 100.0

5216 2010-01-05 89.0

5217 2010-01-06 88.0

5218 2010-01-07 NaN

5219

5220 The index entries that did not have a value in the original data frame

5221 (for example, '2009-12-29') are by default filled with ``NaN``.

5222 If desired, we can fill in the missing values using one of several

5223 options.

5224

5225 For example, to back-propagate the last valid value to fill the ``NaN``

5226 values, pass ``bfill`` as an argument to the ``method`` keyword.

5227

5228 >>> df2.reindex(date_index2, method='bfill')

5229 prices

5230 2009-12-29 100.0

5231 2009-12-30 100.0

5232 2009-12-31 100.0

5233 2010-01-01 100.0

5234 2010-01-02 101.0

5235 2010-01-03 NaN

5236 2010-01-04 100.0

5237 2010-01-05 89.0

5238 2010-01-06 88.0

5239 2010-01-07 NaN

5240

5241 Please note that the ``NaN`` value present in the original dataframe

5242 (at index value 2010-01-03) will not be filled by any of the

5243 value propagation schemes. This is because filling while reindexing

5244 does not look at dataframe values, but only compares the original and

5245 desired indexes. If you do want to fill in the ``NaN`` values present

5246 in the original dataframe, use the ``fillna()`` method.

5247

5248 See the :ref:`user guide <basics.reindexing>` for more.

5249 """

5250 # TODO: Decide if we care about having different examples for different

5251 # kinds

5252

5253 # construct the args

5254 axes, kwargs = self._construct_axes_from_arguments(args, kwargs)

5255 method = missing.clean_reindex_fill_method(kwargs.pop("method", None))

5256 level = kwargs.pop("level", None)

5257 copy = kwargs.pop("copy", None)

5258 limit = kwargs.pop("limit", None)

5259 tolerance = kwargs.pop("tolerance", None)

5260 fill_value = kwargs.pop("fill_value", None)

5261

5262 # Series.reindex doesn't use / need the axis kwarg

5263 # We pop and ignore it here, to make writing Series/Frame generic code

5264 # easier

5265 kwargs.pop("axis", None)

5266

5267 if kwargs:

5268 raise TypeError(

5269 "reindex() got an unexpected keyword "

5270 f'argument "{list(kwargs.keys())[0]}"'

5271 )

5272

5273 self._consolidate_inplace()

5274

5275 # if all axes that are requested to reindex are equal, then only copy

5276 # if indicated must have index names equal here as well as values

5277 if all(

5278 self._get_axis(axis).identical(ax)

5279 for axis, ax in axes.items()

5280 if ax is not None

5281 ):

5282 return self.copy(deep=copy)

5283

5284 # check if we are a multi reindex

5285 if self._needs_reindex_multi(axes, method, level):

5286 return self._reindex_multi(axes, copy, fill_value)

5287

5288 # perform the reindex on the axes

5289 return self._reindex_axes(

5290 axes, level, limit, tolerance, method, fill_value, copy

5291 ).__finalize__(self, method="reindex")

5292

5293 def _reindex_axes(

5294 self: NDFrameT, axes, level, limit, tolerance, method, fill_value, copy

5295 ) -> NDFrameT:

5296 """Perform the reindex for all the axes."""

5297 obj = self

5298 for a in self._AXIS_ORDERS:

5299 labels = axes[a]

5300 if labels is None:

5301 continue

5302

5303 ax = self._get_axis(a)

5304 new_index, indexer = ax.reindex(

5305 labels, level=level, limit=limit, tolerance=tolerance, method=method

5306 )

5307

5308 axis = self._get_axis_number(a)

5309 obj = obj._reindex_with_indexers(

5310 {axis: [new_index, indexer]},

5311 fill_value=fill_value,

5312 copy=copy,

5313 allow_dups=False,

5314 )

5315 # If we've made a copy once, no need to make another one

5316 copy = False

5317

5318 return obj

5319

5320 def _needs_reindex_multi(self, axes, method, level) -> bool_t:

5321 """Check if we do need a multi reindex."""

5322 return (

5323 (com.count_not_none(*axes.values()) == self._AXIS_LEN)

5324 and method is None

5325 and level is None

5326 and not self._is_mixed_type

5327 )

5328

5329 def _reindex_multi(self, axes, copy, fill_value):

5330 raise AbstractMethodError(self)

5331

5332 @final

5333 def _reindex_with_indexers(

5334 self: NDFrameT,

5335 reindexers,

5336 fill_value=None,

5337 copy: bool_t = False,

5338 allow_dups: bool_t = False,

5339 ) -> NDFrameT:

5340 """allow_dups indicates an internal call here"""

5341 # reindex doing multiple operations on different axes if indicated

5342 new_data = self._mgr

5343 for axis in sorted(reindexers.keys()):

5344 index, indexer = reindexers[axis]

5345 baxis = self._get_block_manager_axis(axis)

5346

5347 if index is None:

5348 continue

5349

5350 index = ensure_index(index)

5351 if indexer is not None:

5352 indexer = ensure_platform_int(indexer)

5353

5354 # TODO: speed up on homogeneous DataFrame objects (see _reindex_multi)

5355 new_data = new_data.reindex_indexer(

5356 index,

5357 indexer,

5358 axis=baxis,

5359 fill_value=fill_value,

5360 allow_dups=allow_dups,

5361 copy=copy,

5362 )

5363 # If we've made a copy once, no need to make another one

5364 copy = False

5365

5366 if copy and new_data is self._mgr:

5367 new_data = new_data.copy()

5368

5369 return self._constructor(new_data).__finalize__(self)

5370

5371 def filter(

5372 self: NDFrameT,

5373 items=None,

5374 like: str | None = None,

5375 regex: str | None = None,

5376 axis=None,

5377 ) -> NDFrameT:

5378 """

5379 Subset the dataframe rows or columns according to the specified index labels.

5380

5381 Note that this routine does not filter a dataframe on its

5382 contents. The filter is applied to the labels of the index.

5383

5384 Parameters

5385 ----------

5386 items : list-like

5387 Keep labels from axis which are in items.

5388 like : str

5389 Keep labels from axis for which "like in label == True".

5390 regex : str (regular expression)

5391 Keep labels from axis for which re.search(regex, label) == True.

5392 axis : {0 or ‘index’, 1 or ‘columns’, None}, default None

5393 The axis to filter on, expressed either as an index (int)

5394 or axis name (str). By default this is the info axis, 'columns' for

5395 DataFrame. For `Series` this parameter is unused and defaults to `None`.

5396

5397 Returns

5398 -------

5399 same type as input object

5400

5401 See Also

5402 --------

5403 DataFrame.loc : Access a group of rows and columns

5404 by label(s) or a boolean array.

5405

5406 Notes

5407 -----

5408 The ``items``, ``like``, and ``regex`` parameters are

5409 enforced to be mutually exclusive.

5410

5411 ``axis`` defaults to the info axis that is used when indexing

5412 with ``[]``.

5413

5414 Examples

5415 --------

5416 >>> df = pd.DataFrame(np.array(([1, 2, 3], [4, 5, 6])),

5417 ... index=['mouse', 'rabbit'],

5418 ... columns=['one', 'two', 'three'])

5419 >>> df

5420 one two three

5421 mouse 1 2 3

5422 rabbit 4 5 6

5423

5424 >>> # select columns by name

5425 >>> df.filter(items=['one', 'three'])

5426 one three

5427 mouse 1 3

5428 rabbit 4 6

5429

5430 >>> # select columns by regular expression

5431 >>> df.filter(regex='e$', axis=1)

5432 one three

5433 mouse 1 3

5434 rabbit 4 6

5435

5436 >>> # select rows containing 'bbi'

5437 >>> df.filter(like='bbi', axis=0)

5438 one two three

5439 rabbit 4 5 6

5440 """

5441 nkw = com.count_not_none(items, like, regex)

5442 if nkw > 1:

5443 raise TypeError(

5444 "Keyword arguments `items`, `like`, or `regex` "

5445 "are mutually exclusive"

5446 )

5447

5448 if axis is None:

5449 axis = self._info_axis_name

5450 labels = self._get_axis(axis)

5451

5452 if items is not None:

5453 name = self._get_axis_name(axis)

5454 return self.reindex(**{name: [r for r in items if r in labels]})

5455 elif like:

5456

5457 def f(x) -> bool_t:

5458 assert like is not None # needed for mypy

5459 return like in ensure_str(x)

5460

5461 values = labels.map(f)

5462 return self.loc(axis=axis)[values]

5463 elif regex:

5464

5465 def f(x) -> bool_t:

5466 return matcher.search(ensure_str(x)) is not None

5467

5468 matcher = re.compile(regex)

5469 values = labels.map(f)

5470 return self.loc(axis=axis)[values]

5471 else:

5472 raise TypeError("Must pass either `items`, `like`, or `regex`")

5473

5474 @final

5475 def head(self: NDFrameT, n: int = 5) -> NDFrameT:

5476 """

5477 Return the first `n` rows.

5478

5479 This function returns the first `n` rows for the object based

5480 on position. It is useful for quickly testing if your object

5481 has the right type of data in it.

5482

5483 For negative values of `n`, this function returns all rows except

5484 the last `|n|` rows, equivalent to ``df[:n]``.

5485

5486 If n is larger than the number of rows, this function returns all rows.

5487

5488 Parameters

5489 ----------

5490 n : int, default 5

5491 Number of rows to select.

5492

5493 Returns

5494 -------

5495 same type as caller

5496 The first `n` rows of the caller object.

5497

5498 See Also

5499 --------

5500 DataFrame.tail: Returns the last `n` rows.

5501

5502 Examples

5503 --------

5504 >>> df = pd.DataFrame({'animal': ['alligator', 'bee', 'falcon', 'lion',

5505 ... 'monkey', 'parrot', 'shark', 'whale', 'zebra']})

5506 >>> df

5507 animal

5508 0 alligator

5509 1 bee

5510 2 falcon

5511 3 lion

5512 4 monkey

5513 5 parrot

5514 6 shark

5515 7 whale

5516 8 zebra

5517

5518 Viewing the first 5 lines

5519

5520 >>> df.head()

5521 animal

5522 0 alligator

5523 1 bee

5524 2 falcon

5525 3 lion

5526 4 monkey

5527

5528 Viewing the first `n` lines (three in this case)

5529

5530 >>> df.head(3)

5531 animal

5532 0 alligator

5533 1 bee

5534 2 falcon

5535

5536 For negative values of `n`

5537

5538 >>> df.head(-3)

5539 animal

5540 0 alligator

5541 1 bee

5542 2 falcon

5543 3 lion

5544 4 monkey

5545 5 parrot

5546 """

5547 return self.iloc[:n]

5548

5549 @final

5550 def tail(self: NDFrameT, n: int = 5) -> NDFrameT:

5551 """

5552 Return the last `n` rows.

5553

5554 This function returns last `n` rows from the object based on

5555 position. It is useful for quickly verifying data, for example,

5556 after sorting or appending rows.

5557

5558 For negative values of `n`, this function returns all rows except

5559 the first `|n|` rows, equivalent to ``df[|n|:]``.

5560

5561 If n is larger than the number of rows, this function returns all rows.

5562

5563 Parameters

5564 ----------

5565 n : int, default 5

5566 Number of rows to select.

5567

5568 Returns

5569 -------

5570 type of caller

5571 The last `n` rows of the caller object.

5572

5573 See Also

5574 --------

5575 DataFrame.head : The first `n` rows of the caller object.

5576

5577 Examples

5578 --------

5579 >>> df = pd.DataFrame({'animal': ['alligator', 'bee', 'falcon', 'lion',

5580 ... 'monkey', 'parrot', 'shark', 'whale', 'zebra']})

5581 >>> df

5582 animal

5583 0 alligator

5584 1 bee

5585 2 falcon

5586 3 lion

5587 4 monkey

5588 5 parrot

5589 6 shark

5590 7 whale

5591 8 zebra

5592

5593 Viewing the last 5 lines

5594

5595 >>> df.tail()

5596 animal

5597 4 monkey

5598 5 parrot

5599 6 shark

5600 7 whale

5601 8 zebra

5602

5603 Viewing the last `n` lines (three in this case)

5604

5605 >>> df.tail(3)

5606 animal

5607 6 shark

5608 7 whale

5609 8 zebra

5610

5611 For negative values of `n`

5612

5613 >>> df.tail(-3)

5614 animal

5615 3 lion

5616 4 monkey

5617 5 parrot

5618 6 shark

5619 7 whale

5620 8 zebra

5621 """

5622 if n == 0:

5623 return self.iloc[0:0]

5624 return self.iloc[-n:]

5625

5626 @final

5627 def sample(

5628 self: NDFrameT,

5629 n: int | None = None,

5630 frac: float | None = None,

5631 replace: bool_t = False,

5632 weights=None,

5633 random_state: RandomState | None = None,

5634 axis: Axis | None = None,

5635 ignore_index: bool_t = False,

5636 ) -> NDFrameT:

5637 """

5638 Return a random sample of items from an axis of object.

5639

5640 You can use `random_state` for reproducibility.

5641

5642 Parameters

5643 ----------

5644 n : int, optional

5645 Number of items from axis to return. Cannot be used with `frac`.

5646 Default = 1 if `frac` = None.

5647 frac : float, optional

5648 Fraction of axis items to return. Cannot be used with `n`.

5649 replace : bool, default False

5650 Allow or disallow sampling of the same row more than once.

5651 weights : str or ndarray-like, optional

5652 Default 'None' results in equal probability weighting.

5653 If passed a Series, will align with target object on index. Index

5654 values in weights not found in sampled object will be ignored and

5655 index values in sampled object not in weights will be assigned

5656 weights of zero.

5657 If called on a DataFrame, will accept the name of a column

5658 when axis = 0.

5659 Unless weights are a Series, weights must be same length as axis

5660 being sampled.

5661 If weights do not sum to 1, they will be normalized to sum to 1.

5662 Missing values in the weights column will be treated as zero.

5663 Infinite values not allowed.

5664 random_state : int, array-like, BitGenerator, np.random.RandomState, np.random.Generator, optional

5665 If int, array-like, or BitGenerator, seed for random number generator.

5666 If np.random.RandomState or np.random.Generator, use as given.

5667

5668 .. versionchanged:: 1.1.0

5669

5670 array-like and BitGenerator object now passed to np.random.RandomState()

5671 as seed

5672

5673 .. versionchanged:: 1.4.0

5674

5675 np.random.Generator objects now accepted

5676

5677 axis : {0 or ‘index’, 1 or ‘columns’, None}, default None

5678 Axis to sample. Accepts axis number or name. Default is stat axis

5679 for given data type. For `Series` this parameter is unused and defaults to `None`.

5680 ignore_index : bool, default False

5681 If True, the resulting index will be labeled 0, 1, …, n - 1.

5682

5683 .. versionadded:: 1.3.0

5684

5685 Returns

5686 -------

5687 Series or DataFrame

5688 A new object of same type as caller containing `n` items randomly

5689 sampled from the caller object.

5690

5691 See Also

5692 --------

5693 DataFrameGroupBy.sample: Generates random samples from each group of a

5694 DataFrame object.

5695 SeriesGroupBy.sample: Generates random samples from each group of a

5696 Series object.

5697 numpy.random.choice: Generates a random sample from a given 1-D numpy

5698 array.

5699

5700 Notes

5701 -----

5702 If `frac` > 1, `replacement` should be set to `True`.

5703

5704 Examples

5705 --------

5706 >>> df = pd.DataFrame({'num_legs': [2, 4, 8, 0],

5707 ... 'num_wings': [2, 0, 0, 0],

5708 ... 'num_specimen_seen': [10, 2, 1, 8]},

5709 ... index=['falcon', 'dog', 'spider', 'fish'])

5710 >>> df

5711 num_legs num_wings num_specimen_seen

5712 falcon 2 2 10

5713 dog 4 0 2

5714 spider 8 0 1

5715 fish 0 0 8

5716

5717 Extract 3 random elements from the ``Series`` ``df['num_legs']``:

5718 Note that we use `random_state` to ensure the reproducibility of

5719 the examples.

5720

5721 >>> df['num_legs'].sample(n=3, random_state=1)

5722 fish 0

5723 spider 8

5724 falcon 2

5725 Name: num_legs, dtype: int64

5726

5727 A random 50% sample of the ``DataFrame`` with replacement:

5728

5729 >>> df.sample(frac=0.5, replace=True, random_state=1)

5730 num_legs num_wings num_specimen_seen

5731 dog 4 0 2

5732 fish 0 0 8

5733

5734 An upsample sample of the ``DataFrame`` with replacement:

5735 Note that `replace` parameter has to be `True` for `frac` parameter > 1.

5736

5737 >>> df.sample(frac=2, replace=True, random_state=1)

5738 num_legs num_wings num_specimen_seen

5739 dog 4 0 2

5740 fish 0 0 8

5741 falcon 2 2 10

5742 falcon 2 2 10

5743 fish 0 0 8

5744 dog 4 0 2

5745 fish 0 0 8

5746 dog 4 0 2

5747

5748 Using a DataFrame column as weights. Rows with larger value in the

5749 `num_specimen_seen` column are more likely to be sampled.

5750

5751 >>> df.sample(n=2, weights='num_specimen_seen', random_state=1)

5752 num_legs num_wings num_specimen_seen

5753 falcon 2 2 10

5754 fish 0 0 8

5755 """ # noqa:E501

5756 if axis is None:

5757 axis = self._stat_axis_number

5758

5759 axis = self._get_axis_number(axis)

5760 obj_len = self.shape[axis]

5761

5762 # Process random_state argument

5763 rs = com.random_state(random_state)

5764

5765 size = sample.process_sampling_size(n, frac, replace)

5766 if size is None:

5767 assert frac is not None

5768 size = round(frac * obj_len)

5769

5770 if weights is not None:

5771 weights = sample.preprocess_weights(self, weights, axis)

5772

5773 sampled_indices = sample.sample(obj_len, size, replace, weights, rs)

5774 result = self.take(sampled_indices, axis=axis)

5775

5776 if ignore_index:

5777 result.index = default_index(len(result))

5778

5779 return result

5780

5781 @final

5782 @doc(klass=_shared_doc_kwargs["klass"])

5783 def pipe(

5784 self,

5785 func: Callable[..., T] | tuple[Callable[..., T], str],

5786 *args,

5787 **kwargs,

5788 ) -> T:

5789 r"""

5790 Apply chainable functions that expect Series or DataFrames.

5791

5792 Parameters

5793 ----------

5794 func : function

5795 Function to apply to the {klass}.

5796 ``args``, and ``kwargs`` are passed into ``func``.

5797 Alternatively a ``(callable, data_keyword)`` tuple where

5798 ``data_keyword`` is a string indicating the keyword of

5799 ``callable`` that expects the {klass}.

5800 args : iterable, optional

5801 Positional arguments passed into ``func``.

5802 kwargs : mapping, optional

5803 A dictionary of keyword arguments passed into ``func``.

5804

5805 Returns

5806 -------

5807 object : the return type of ``func``.

5808

5809 See Also

5810 --------

5811 DataFrame.apply : Apply a function along input axis of DataFrame.

5812 DataFrame.applymap : Apply a function elementwise on a whole DataFrame.

5813 Series.map : Apply a mapping correspondence on a

5814 :class:`~pandas.Series`.

5815

5816 Notes

5817 -----

5818 Use ``.pipe`` when chaining together functions that expect

5819 Series, DataFrames or GroupBy objects. Instead of writing

5820

5821 >>> func(g(h(df), arg1=a), arg2=b, arg3=c) # doctest: +SKIP

5822

5823 You can write

5824

5825 >>> (df.pipe(h)

5826 ... .pipe(g, arg1=a)

5827 ... .pipe(func, arg2=b, arg3=c)

5828 ... ) # doctest: +SKIP

5829

5830 If you have a function that takes the data as (say) the second

5831 argument, pass a tuple indicating which keyword expects the

5832 data. For example, suppose ``f`` takes its data as ``arg2``:

5833

5834 >>> (df.pipe(h)

5835 ... .pipe(g, arg1=a)

5836 ... .pipe((func, 'arg2'), arg1=a, arg3=c)

5837 ... ) # doctest: +SKIP

5838 """

5839 return com.pipe(self, func, *args, **kwargs)

5840

5841 # ----------------------------------------------------------------------

5842 # Attribute access

5843

5844 @final

5845 def __finalize__(

5846 self: NDFrameT, other, method: str | None = None, **kwargs

5847 ) -> NDFrameT:

5848 """

5849 Propagate metadata from other to self.

5850

5851 Parameters

5852 ----------

5853 other : the object from which to get the attributes that we are going

5854 to propagate

5855 method : str, optional

5856 A passed method name providing context on where ``__finalize__``

5857 was called.

5858

5859 .. warning::

5860

5861 The value passed as `method` are not currently considered

5862 stable across pandas releases.

5863 """

5864 if isinstance(other, NDFrame):

5865 for name in other.attrs:

5866 self.attrs[name] = other.attrs[name]

5867

5868 self.flags.allows_duplicate_labels = other.flags.allows_duplicate_labels

5869 # For subclasses using _metadata.

5870 for name in set(self._metadata) & set(other._metadata):

5871 assert isinstance(name, str)

5872 object.__setattr__(self, name, getattr(other, name, None))

5873

5874 if method == "concat":

5875 attrs = other.objs[0].attrs

5876 check_attrs = all(objs.attrs == attrs for objs in other.objs[1:])

5877 if check_attrs:

5878 for name in attrs:

5879 self.attrs[name] = attrs[name]

5880

5881 allows_duplicate_labels = all(

5882 x.flags.allows_duplicate_labels for x in other.objs

5883 )

5884 self.flags.allows_duplicate_labels = allows_duplicate_labels

5885

5886 return self

5887

5888 def __getattr__(self, name: str):

5889 """

5890 After regular attribute access, try looking up the name

5891 This allows simpler access to columns for interactive use.

5892 """

5893 # Note: obj.x will always call obj.__getattribute__('x') prior to

5894 # calling obj.__getattr__('x').

5895 if (

5896 name not in self._internal_names_set

5897 and name not in self._metadata

5898 and name not in self._accessors

5899 and self._info_axis._can_hold_identifiers_and_holds_name(name)

5900 ):

5901 return self[name]

5902 return object.__getattribute__(self, name)

5903

5904 def __setattr__(self, name: str, value) -> None:

5905 """

5906 After regular attribute access, try setting the name

5907 This allows simpler access to columns for interactive use.

5908 """

5909 # first try regular attribute access via __getattribute__, so that

5910 # e.g. ``obj.x`` and ``obj.x = 4`` will always reference/modify

5911 # the same attribute.

5912

5913 try:

5914 object.__getattribute__(self, name)

5915 return object.__setattr__(self, name, value)

5916 except AttributeError:

5917 pass

5918

5919 # if this fails, go on to more involved attribute setting

5920 # (note that this matches __getattr__, above).

5921 if name in self._internal_names_set:

5922 object.__setattr__(self, name, value)

5923 elif name in self._metadata:

5924 object.__setattr__(self, name, value)

5925 else:

5926 try:

5927 existing = getattr(self, name)

5928 if isinstance(existing, Index):

5929 object.__setattr__(self, name, value)

5930 elif name in self._info_axis:

5931 self[name] = value

5932 else:

5933 object.__setattr__(self, name, value)

5934 except (AttributeError, TypeError):

5935 if isinstance(self, ABCDataFrame) and (is_list_like(value)):

5936 warnings.warn(

5937 "Pandas doesn't allow columns to be "

5938 "created via a new attribute name - see "

5939 "https://pandas.pydata.org/pandas-docs/"

5940 "stable/indexing.html#attribute-access",

5941 stacklevel=find_stack_level(),

5942 )

5943 object.__setattr__(self, name, value)

5944

5945 @final

5946 def _dir_additions(self) -> set[str]:

5947 """

5948 add the string-like attributes from the info_axis.

5949 If info_axis is a MultiIndex, its first level values are used.

5950 """

5951 additions = super()._dir_additions()

5952 if self._info_axis._can_hold_strings:

5953 additions.update(self._info_axis._dir_additions_for_owner)

5954 return additions

5955

5956 # ----------------------------------------------------------------------

5957 # Consolidation of internals

5958

5959 @final

5960 def _protect_consolidate(self, f):

5961 """

5962 Consolidate _mgr -- if the blocks have changed, then clear the

5963 cache

5964 """

5965 if isinstance(self._mgr, (ArrayManager, SingleArrayManager)):

5966 return f()

5967 blocks_before = len(self._mgr.blocks)

5968 result = f()

5969 if len(self._mgr.blocks) != blocks_before:

5970 self._clear_item_cache()

5971 return result

5972

5973 @final

5974 def _consolidate_inplace(self) -> None:

5975 """Consolidate data in place and return None"""

5976

5977 def f():

5978 self._mgr = self._mgr.consolidate()

5979

5980 self._protect_consolidate(f)

5981

5982 @final

5983 def _consolidate(self):

5984 """

5985 Compute NDFrame with "consolidated" internals (data of each dtype

5986 grouped together in a single ndarray).

5987

5988 Returns

5989 -------

5990 consolidated : same type as caller

5991 """

5992 f = lambda: self._mgr.consolidate()

5993 cons_data = self._protect_consolidate(f)

5994 return self._constructor(cons_data).__finalize__(self)

5995

5996 @final

5997 @property

5998 def _is_mixed_type(self) -> bool_t:

5999 if self._mgr.is_single_block:

6000 return False

6001

6002 if self._mgr.any_extension_types:

6003 # Even if they have the same dtype, we can't consolidate them,

6004 # so we pretend this is "mixed'"

6005 return True

6006

6007 return self.dtypes.nunique() > 1

6008

6009 @final

6010 def _check_inplace_setting(self, value) -> bool_t:

6011 """check whether we allow in-place setting with this type of value"""

6012 if self._is_mixed_type and not self._mgr.is_numeric_mixed_type:

6013

6014 # allow an actual np.nan thru

6015 if is_float(value) and np.isnan(value):

6016 return True

6017

6018 raise TypeError(

6019 "Cannot do inplace boolean setting on "

6020 "mixed-types with a non np.nan value"

6021 )

6022

6023 return True

6024

6025 @final

6026 def _get_numeric_data(self: NDFrameT) -> NDFrameT:

6027 return self._constructor(self._mgr.get_numeric_data()).__finalize__(self)

6028

6029 @final

6030 def _get_bool_data(self):

6031 return self._constructor(self._mgr.get_bool_data()).__finalize__(self)

6032

6033 # ----------------------------------------------------------------------

6034 # Internal Interface Methods

6035

6036 @property

6037 def values(self):

6038 raise AbstractMethodError(self)

6039

6040 @property

6041 def _values(self) -> np.ndarray:

6042 """internal implementation"""

6043 raise AbstractMethodError(self)

6044

6045 @property

6046 def dtypes(self):

6047 """

6048 Return the dtypes in the DataFrame.

6049

6050 This returns a Series with the data type of each column.

6051 The result's index is the original DataFrame's columns. Columns

6052 with mixed types are stored with the ``object`` dtype. See

6053 :ref:`the User Guide <basics.dtypes>` for more.

6054

6055 Returns

6056 -------

6057 pandas.Series

6058 The data type of each column.

6059

6060 Examples

6061 --------

6062 >>> df = pd.DataFrame({'float': [1.0],

6063 ... 'int': [1],

6064 ... 'datetime': [pd.Timestamp('20180310')],

6065 ... 'string': ['foo']})

6066 >>> df.dtypes

6067 float float64

6068 int int64

6069 datetime datetime64[ns]

6070 string object

6071 dtype: object

6072 """

6073 data = self._mgr.get_dtypes()

6074 return self._constructor_sliced(data, index=self._info_axis, dtype=np.object_)

6075

6076 def astype(

6077 self: NDFrameT, dtype, copy: bool_t = True, errors: IgnoreRaise = "raise"

6078 ) -> NDFrameT:

6079 """

6080 Cast a pandas object to a specified dtype ``dtype``.

6081

6082 Parameters

6083 ----------

6084 dtype : data type, or dict of column name -> data type

6085 Use a numpy.dtype or Python type to cast entire pandas object to

6086 the same type. Alternatively, use {col: dtype, ...}, where col is a

6087 column label and dtype is a numpy.dtype or Python type to cast one

6088 or more of the DataFrame's columns to column-specific types.

6089 copy : bool, default True

6090 Return a copy when ``copy=True`` (be very careful setting

6091 ``copy=False`` as changes to values then may propagate to other

6092 pandas objects).

6093 errors : {'raise', 'ignore'}, default 'raise'

6094 Control raising of exceptions on invalid data for provided dtype.

6095

6096 - ``raise`` : allow exceptions to be raised

6097 - ``ignore`` : suppress exceptions. On error return original object.

6098

6099 Returns

6100 -------

6101 casted : same type as caller

6102

6103 See Also

6104 --------

6105 to_datetime : Convert argument to datetime.

6106 to_timedelta : Convert argument to timedelta.

6107 to_numeric : Convert argument to a numeric type.

6108 numpy.ndarray.astype : Cast a numpy array to a specified type.

6109

6110 Notes

6111 -----

6112 .. deprecated:: 1.3.0

6113

6114 Using ``astype`` to convert from timezone-naive dtype to

6115 timezone-aware dtype is deprecated and will raise in a

6116 future version. Use :meth:`Series.dt.tz_localize` instead.

6117

6118 Examples

6119 --------

6120 Create a DataFrame:

6121

6122 >>> d = {'col1': [1, 2], 'col2': [3, 4]}

6123 >>> df = pd.DataFrame(data=d)

6124 >>> df.dtypes

6125 col1 int64

6126 col2 int64

6127 dtype: object

6128

6129 Cast all columns to int32:

6130

6131 >>> df.astype('int32').dtypes

6132 col1 int32

6133 col2 int32

6134 dtype: object

6135

6136 Cast col1 to int32 using a dictionary:

6137

6138 >>> df.astype({'col1': 'int32'}).dtypes

6139 col1 int32

6140 col2 int64

6141 dtype: object

6142

6143 Create a series:

6144

6145 >>> ser = pd.Series([1, 2], dtype='int32')

6146 >>> ser

6147 0 1

6148 1 2

6149 dtype: int32

6150 >>> ser.astype('int64')

6151 0 1

6152 1 2

6153 dtype: int64

6154

6155 Convert to categorical type:

6156

6157 >>> ser.astype('category')

6158 0 1

6159 1 2

6160 dtype: category

6161 Categories (2, int64): [1, 2]

6162

6163 Convert to ordered categorical type with custom ordering:

6164

6165 >>> from pandas.api.types import CategoricalDtype

6166 >>> cat_dtype = CategoricalDtype(

6167 ... categories=[2, 1], ordered=True)

6168 >>> ser.astype(cat_dtype)

6169 0 1

6170 1 2

6171 dtype: category

6172 Categories (2, int64): [2 < 1]

6173

6174 Note that using ``copy=False`` and changing data on a new

6175 pandas object may propagate changes:

6176

6177 >>> s1 = pd.Series([1, 2])

6178 >>> s2 = s1.astype('int64', copy=False)

6179 >>> s2[0] = 10

6180 >>> s1 # note that s1[0] has changed too

6181 0 10

6182 1 2

6183 dtype: int64

6184

6185 Create a series of dates:

6186

6187 >>> ser_date = pd.Series(pd.date_range('20200101', periods=3))

6188 >>> ser_date

6189 0 2020-01-01

6190 1 2020-01-02

6191 2 2020-01-03

6192 dtype: datetime64[ns]

6193 """

6194 if is_dict_like(dtype):

6195 if self.ndim == 1: # i.e. Series

6196 if len(dtype) > 1 or self.name not in dtype:

6197 raise KeyError(

6198 "Only the Series name can be used for "

6199 "the key in Series dtype mappings."

6200 )

6201 new_type = dtype[self.name]

6202 return self.astype(new_type, copy, errors)

6203

6204 # GH#44417 cast to Series so we can use .iat below, which will be

6205 # robust in case we

6206 from pandas import Series

6207

6208 dtype_ser = Series(dtype, dtype=object)

6209

6210 for col_name in dtype_ser.index:

6211 if col_name not in self:

6212 raise KeyError(

6213 "Only a column name can be used for the "

6214 "key in a dtype mappings argument. "

6215 f"'{col_name}' not found in columns."

6216 )

6217

6218 dtype_ser = dtype_ser.reindex(self.columns, fill_value=None, copy=False)

6219

6220 results = []

6221 for i, (col_name, col) in enumerate(self.items()):

6222 cdt = dtype_ser.iat[i]

6223 if isna(cdt):

6224 res_col = col.copy() if copy else col

6225 else:

6226 res_col = col.astype(dtype=cdt, copy=copy, errors=errors)

6227 results.append(res_col)

6228

6229 elif is_extension_array_dtype(dtype) and self.ndim > 1:

6230 # GH 18099/22869: columnwise conversion to extension dtype

6231 # GH 24704: use iloc to handle duplicate column names

6232 # TODO(EA2D): special case not needed with 2D EAs

6233 results = [

6234 self.iloc[:, i].astype(dtype, copy=copy)

6235 for i in range(len(self.columns))

6236 ]

6237

6238 else:

6239 # else, only a single dtype is given

6240 new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors)

6241 return self._constructor(new_data).__finalize__(self, method="astype")

6242

6243 # GH 33113: handle empty frame or series

6244 if not results:

6245 return self.copy()

6246

6247 # GH 19920: retain column metadata after concat

6248 result = concat(results, axis=1, copy=False)

6249 # GH#40810 retain subclass

6250 # error: Incompatible types in assignment

6251 # (expression has type "NDFrameT", variable has type "DataFrame")

6252 result = self._constructor(result) # type: ignore[assignment]

6253 result.columns = self.columns

6254 result = result.__finalize__(self, method="astype")

6255 # https://github.com/python/mypy/issues/8354

6256 return cast(NDFrameT, result)

6257

6258 @final

6259 def copy(self: NDFrameT, deep: bool_t | None = True) -> NDFrameT:

6260 """

6261 Make a copy of this object's indices and data.

6262

6263 When ``deep=True`` (default), a new object will be created with a

6264 copy of the calling object's data and indices. Modifications to

6265 the data or indices of the copy will not be reflected in the

6266 original object (see notes below).

6267

6268 When ``deep=False``, a new object will be created without copying

6269 the calling object's data or index (only references to the data

6270 and index are copied). Any changes to the data of the original

6271 will be reflected in the shallow copy (and vice versa).

6272

6273 Parameters

6274 ----------

6275 deep : bool, default True

6276 Make a deep copy, including a copy of the data and the indices.

6277 With ``deep=False`` neither the indices nor the data are copied.

6278

6279 Returns

6280 -------

6281 copy : Series or DataFrame

6282 Object type matches caller.

6283

6284 Notes

6285 -----

6286 When ``deep=True``, data is copied but actual Python objects

6287 will not be copied recursively, only the reference to the object.

6288 This is in contrast to `copy.deepcopy` in the Standard Library,

6289 which recursively copies object data (see examples below).

6290

6291 While ``Index`` objects are copied when ``deep=True``, the underlying

6292 numpy array is not copied for performance reasons. Since ``Index`` is

6293 immutable, the underlying data can be safely shared and a copy

6294 is not needed.

6295

6296 Since pandas is not thread safe, see the

6297 :ref:`gotchas <gotchas.thread-safety>` when copying in a threading

6298 environment.

6299

6300 Examples

6301 --------

6302 >>> s = pd.Series([1, 2], index=["a", "b"])

6303 >>> s

6304 a 1

6305 b 2

6306 dtype: int64

6307

6308 >>> s_copy = s.copy()

6309 >>> s_copy

6310 a 1

6311 b 2

6312 dtype: int64

6313

6314 **Shallow copy versus default (deep) copy:**

6315

6316 >>> s = pd.Series([1, 2], index=["a", "b"])

6317 >>> deep = s.copy()

6318 >>> shallow = s.copy(deep=False)

6319

6320 Shallow copy shares data and index with original.

6321

6322 >>> s is shallow

6323 False

6324 >>> s.values is shallow.values and s.index is shallow.index

6325 True

6326

6327 Deep copy has own copy of data and index.

6328

6329 >>> s is deep

6330 False

6331 >>> s.values is deep.values or s.index is deep.index

6332 False

6333

6334 Updates to the data shared by shallow copy and original is reflected

6335 in both; deep copy remains unchanged.

6336

6337 >>> s[0] = 3

6338 >>> shallow[1] = 4

6339 >>> s

6340 a 3

6341 b 4

6342 dtype: int64

6343 >>> shallow

6344 a 3

6345 b 4

6346 dtype: int64

6347 >>> deep

6348 a 1

6349 b 2

6350 dtype: int64

6351

6352 Note that when copying an object containing Python objects, a deep copy

6353 will copy the data, but will not do so recursively. Updating a nested

6354 data object will be reflected in the deep copy.

6355

6356 >>> s = pd.Series([[1, 2], [3, 4]])

6357 >>> deep = s.copy()

6358 >>> s[0][0] = 10

6359 >>> s

6360 0 [10, 2]

6361 1 [3, 4]

6362 dtype: object

6363 >>> deep

6364 0 [10, 2]

6365 1 [3, 4]

6366 dtype: object

6367 """

6368 data = self._mgr.copy(deep=deep)

6369 self._clear_item_cache()

6370 return self._constructor(data).__finalize__(self, method="copy")

6371

6372 @final

6373 def __copy__(self: NDFrameT, deep: bool_t = True) -> NDFrameT:

6374 return self.copy(deep=deep)

6375

6376 @final

6377 def __deepcopy__(self: NDFrameT, memo=None) -> NDFrameT:

6378 """

6379 Parameters

6380 ----------

6381 memo, default None

6382 Standard signature. Unused

6383 """

6384 return self.copy(deep=True)

6385

6386 @final

6387 def _convert(

6388 self: NDFrameT,

6389 datetime: bool_t = False,

6390 numeric: bool_t = False,

6391 timedelta: bool_t = False,

6392 ) -> NDFrameT:

6393 """

6394 Attempt to infer better dtype for object columns.

6395

6396 Parameters

6397 ----------

6398 datetime : bool, default False

6399 If True, convert to date where possible.

6400 numeric : bool, default False

6401 If True, attempt to convert to numbers (including strings), with

6402 unconvertible values becoming NaN.

6403 timedelta : bool, default False

6404 If True, convert to timedelta where possible.

6405

6406 Returns

6407 -------

6408 converted : same as input object

6409 """

6410 validate_bool_kwarg(datetime, "datetime")

6411 validate_bool_kwarg(numeric, "numeric")

6412 validate_bool_kwarg(timedelta, "timedelta")

6413 return self._constructor(

6414 self._mgr.convert(

6415 datetime=datetime,

6416 numeric=numeric,

6417 timedelta=timedelta,

6418 copy=True,

6419 )

6420 ).__finalize__(self)

6421

6422 @final

6423 def infer_objects(self: NDFrameT) -> NDFrameT:

6424 """

6425 Attempt to infer better dtypes for object columns.

6426

6427 Attempts soft conversion of object-dtyped

6428 columns, leaving non-object and unconvertible

6429 columns unchanged. The inference rules are the

6430 same as during normal Series/DataFrame construction.

6431

6432 Returns

6433 -------

6434 converted : same type as input object

6435

6436 See Also

6437 --------

6438 to_datetime : Convert argument to datetime.

6439 to_timedelta : Convert argument to timedelta.

6440 to_numeric : Convert argument to numeric type.

6441 convert_dtypes : Convert argument to best possible dtype.

6442

6443 Examples

6444 --------

6445 >>> df = pd.DataFrame({"A": ["a", 1, 2, 3]})

6446 >>> df = df.iloc[1:]

6447 >>> df

6448 A

6449 1 1

6450 2 2

6451 3 3

6452

6453 >>> df.dtypes

6454 A object

6455 dtype: object

6456

6457 >>> df.infer_objects().dtypes

6458 A int64

6459 dtype: object

6460 """

6461 # numeric=False necessary to only soft convert;

6462 # python objects will still be converted to

6463 # native numpy numeric types

6464 return self._constructor(

6465 self._mgr.convert(datetime=True, numeric=False, timedelta=True, copy=True)

6466 ).__finalize__(self, method="infer_objects")

6467

6468 @final

6469 def convert_dtypes(

6470 self: NDFrameT,

6471 infer_objects: bool_t = True,

6472 convert_string: bool_t = True,

6473 convert_integer: bool_t = True,

6474 convert_boolean: bool_t = True,

6475 convert_floating: bool_t = True,

6476 ) -> NDFrameT:

6477 """

6478 Convert columns to best possible dtypes using dtypes supporting ``pd.NA``.

6479

6480 .. versionadded:: 1.0.0

6481

6482 Parameters

6483 ----------

6484 infer_objects : bool, default True

6485 Whether object dtypes should be converted to the best possible types.

6486 convert_string : bool, default True

6487 Whether object dtypes should be converted to ``StringDtype()``.

6488 convert_integer : bool, default True

6489 Whether, if possible, conversion can be done to integer extension types.

6490 convert_boolean : bool, defaults True

6491 Whether object dtypes should be converted to ``BooleanDtypes()``.

6492 convert_floating : bool, defaults True

6493 Whether, if possible, conversion can be done to floating extension types.

6494 If `convert_integer` is also True, preference will be give to integer

6495 dtypes if the floats can be faithfully casted to integers.

6496

6497 .. versionadded:: 1.2.0

6498

6499 Returns

6500 -------

6501 Series or DataFrame

6502 Copy of input object with new dtype.

6503

6504 See Also

6505 --------

6506 infer_objects : Infer dtypes of objects.

6507 to_datetime : Convert argument to datetime.

6508 to_timedelta : Convert argument to timedelta.

6509 to_numeric : Convert argument to a numeric type.

6510

6511 Notes

6512 -----

6513 By default, ``convert_dtypes`` will attempt to convert a Series (or each

6514 Series in a DataFrame) to dtypes that support ``pd.NA``. By using the options

6515 ``convert_string``, ``convert_integer``, ``convert_boolean`` and

6516 ``convert_boolean``, it is possible to turn off individual conversions

6517 to ``StringDtype``, the integer extension types, ``BooleanDtype``

6518 or floating extension types, respectively.

6519

6520 For object-dtyped columns, if ``infer_objects`` is ``True``, use the inference

6521 rules as during normal Series/DataFrame construction. Then, if possible,

6522 convert to ``StringDtype``, ``BooleanDtype`` or an appropriate integer

6523 or floating extension type, otherwise leave as ``object``.

6524

6525 If the dtype is integer, convert to an appropriate integer extension type.

6526

6527 If the dtype is numeric, and consists of all integers, convert to an

6528 appropriate integer extension type. Otherwise, convert to an

6529 appropriate floating extension type.

6530

6531 .. versionchanged:: 1.2

6532 Starting with pandas 1.2, this method also converts float columns

6533 to the nullable floating extension type.

6534

6535 In the future, as new dtypes are added that support ``pd.NA``, the results

6536 of this method will change to support those new dtypes.

6537

6538 Examples

6539 --------

6540 >>> df = pd.DataFrame(

6541 ... {

6542 ... "a": pd.Series([1, 2, 3], dtype=np.dtype("int32")),

6543 ... "b": pd.Series(["x", "y", "z"], dtype=np.dtype("O")),

6544 ... "c": pd.Series([True, False, np.nan], dtype=np.dtype("O")),

6545 ... "d": pd.Series(["h", "i", np.nan], dtype=np.dtype("O")),

6546 ... "e": pd.Series([10, np.nan, 20], dtype=np.dtype("float")),

6547 ... "f": pd.Series([np.nan, 100.5, 200], dtype=np.dtype("float")),

6548 ... }

6549 ... )

6550

6551 Start with a DataFrame with default dtypes.

6552

6553 >>> df

6554 a b c d e f

6555 0 1 x True h 10.0 NaN

6556 1 2 y False i NaN 100.5

6557 2 3 z NaN NaN 20.0 200.0

6558

6559 >>> df.dtypes

6560 a int32

6561 b object

6562 c object

6563 d object

6564 e float64

6565 f float64

6566 dtype: object

6567

6568 Convert the DataFrame to use best possible dtypes.

6569

6570 >>> dfn = df.convert_dtypes()

6571 >>> dfn

6572 a b c d e f

6573 0 1 x True h 10 <NA>

6574 1 2 y False i <NA> 100.5

6575 2 3 z <NA> <NA> 20 200.0

6576

6577 >>> dfn.dtypes

6578 a Int32

6579 b string

6580 c boolean

6581 d string

6582 e Int64

6583 f Float64

6584 dtype: object

6585

6586 Start with a Series of strings and missing data represented by ``np.nan``.

6587

6588 >>> s = pd.Series(["a", "b", np.nan])

6589 >>> s

6590 0 a

6591 1 b

6592 2 NaN

6593 dtype: object

6594

6595 Obtain a Series with dtype ``StringDtype``.

6596

6597 >>> s.convert_dtypes()

6598 0 a

6599 1 b

6600 2 <NA>

6601 dtype: string

6602 """

6603 if self.ndim == 1:

6604 return self._convert_dtypes(

6605 infer_objects,

6606 convert_string,

6607 convert_integer,

6608 convert_boolean,

6609 convert_floating,

6610 )

6611 else:

6612 results = [

6613 col._convert_dtypes(

6614 infer_objects,

6615 convert_string,

6616 convert_integer,

6617 convert_boolean,

6618 convert_floating,

6619 )

6620 for col_name, col in self.items()

6621 ]

6622 if len(results) > 0:

6623 result = concat(results, axis=1, copy=False, keys=self.columns)

6624 cons = cast(Type["DataFrame"], self._constructor)

6625 result = cons(result)

6626 result = result.__finalize__(self, method="convert_dtypes")

6627 # https://github.com/python/mypy/issues/8354

6628 return cast(NDFrameT, result)

6629 else:

6630 return self.copy()

6631

6632 # ----------------------------------------------------------------------

6633 # Filling NA's

6634

6635 @overload

6636 def fillna(

6637 self: NDFrameT,

6638 value: Hashable | Mapping | Series | DataFrame = ...,

6639 *,

6640 method: FillnaOptions | None = ...,

6641 axis: Axis | None = ...,

6642 inplace: Literal[False] = ...,

6643 limit: int | None = ...,

6644 downcast: dict | None = ...,

6645 ) -> NDFrameT:

6646 ...

6647

6648 @overload

6649 def fillna(

6650 self,

6651 value: Hashable | Mapping | Series | DataFrame = ...,

6652 *,

6653 method: FillnaOptions | None = ...,

6654 axis: Axis | None = ...,

6655 inplace: Literal[True],

6656 limit: int | None = ...,

6657 downcast: dict | None = ...,

6658 ) -> None:

6659 ...

6660

6661 @overload

6662 def fillna(

6663 self: NDFrameT,

6664 value: Hashable | Mapping | Series | DataFrame = ...,

6665 *,

6666 method: FillnaOptions | None = ...,

6667 axis: Axis | None = ...,

6668 inplace: bool_t = ...,

6669 limit: int | None = ...,

6670 downcast: dict | None = ...,

6671 ) -> NDFrameT | None:

6672 ...

6673

6674 @doc(**_shared_doc_kwargs)

6675 def fillna(

6676 self: NDFrameT,

6677 value: Hashable | Mapping | Series | DataFrame = None,

6678 method: FillnaOptions | None = None,

6679 axis: Axis | None = None,

6680 inplace: bool_t = False,

6681 limit: int | None = None,

6682 downcast: dict | None = None,

6683 ) -> NDFrameT | None:

6684 """

6685 Fill NA/NaN values using the specified method.

6686

6687 Parameters

6688 ----------

6689 value : scalar, dict, Series, or DataFrame

6690 Value to use to fill holes (e.g. 0), alternately a

6691 dict/Series/DataFrame of values specifying which value to use for

6692 each index (for a Series) or column (for a DataFrame). Values not

6693 in the dict/Series/DataFrame will not be filled. This value cannot

6694 be a list.

6695 method : {{'backfill', 'bfill', 'pad', 'ffill', None}}, default None

6696 Method to use for filling holes in reindexed Series

6697 pad / ffill: propagate last valid observation forward to next valid

6698 backfill / bfill: use next valid observation to fill gap.

6699 axis : {axes_single_arg}

6700 Axis along which to fill missing values. For `Series`

6701 this parameter is unused and defaults to 0.

6702 inplace : bool, default False

6703 If True, fill in-place. Note: this will modify any

6704 other views on this object (e.g., a no-copy slice for a column in a

6705 DataFrame).

6706 limit : int, default None

6707 If method is specified, this is the maximum number of consecutive

6708 NaN values to forward/backward fill. In other words, if there is

6709 a gap with more than this number of consecutive NaNs, it will only

6710 be partially filled. If method is not specified, this is the

6711 maximum number of entries along the entire axis where NaNs will be

6712 filled. Must be greater than 0 if not None.

6713 downcast : dict, default is None

6714 A dict of item->dtype of what to downcast if possible,

6715 or the string 'infer' which will try to downcast to an appropriate

6716 equal type (e.g. float64 to int64 if possible).

6717

6718 Returns

6719 -------

6720 {klass} or None

6721 Object with missing values filled or None if ``inplace=True``.

6722

6723 See Also

6724 --------

6725 interpolate : Fill NaN values using interpolation.

6726 reindex : Conform object to new index.

6727 asfreq : Convert TimeSeries to specified frequency.

6728

6729 Examples

6730 --------

6731 >>> df = pd.DataFrame([[np.nan, 2, np.nan, 0],

6732 ... [3, 4, np.nan, 1],

6733 ... [np.nan, np.nan, np.nan, np.nan],

6734 ... [np.nan, 3, np.nan, 4]],

6735 ... columns=list("ABCD"))

6736 >>> df

6737 A B C D

6738 0 NaN 2.0 NaN 0.0

6739 1 3.0 4.0 NaN 1.0

6740 2 NaN NaN NaN NaN

6741 3 NaN 3.0 NaN 4.0

6742

6743 Replace all NaN elements with 0s.

6744

6745 >>> df.fillna(0)

6746 A B C D

6747 0 0.0 2.0 0.0 0.0

6748 1 3.0 4.0 0.0 1.0

6749 2 0.0 0.0 0.0 0.0

6750 3 0.0 3.0 0.0 4.0

6751

6752 We can also propagate non-null values forward or backward.

6753

6754 >>> df.fillna(method="ffill")

6755 A B C D

6756 0 NaN 2.0 NaN 0.0

6757 1 3.0 4.0 NaN 1.0

6758 2 3.0 4.0 NaN 1.0

6759 3 3.0 3.0 NaN 4.0

6760

6761 Replace all NaN elements in column 'A', 'B', 'C', and 'D', with 0, 1,

6762 2, and 3 respectively.

6763

6764 >>> values = {{"A": 0, "B": 1, "C": 2, "D": 3}}

6765 >>> df.fillna(value=values)

6766 A B C D

6767 0 0.0 2.0 2.0 0.0

6768 1 3.0 4.0 2.0 1.0

6769 2 0.0 1.0 2.0 3.0

6770 3 0.0 3.0 2.0 4.0

6771

6772 Only replace the first NaN element.

6773

6774 >>> df.fillna(value=values, limit=1)

6775 A B C D

6776 0 0.0 2.0 2.0 0.0

6777 1 3.0 4.0 NaN 1.0

6778 2 NaN 1.0 NaN 3.0

6779 3 NaN 3.0 NaN 4.0

6780

6781 When filling using a DataFrame, replacement happens along

6782 the same column names and same indices

6783

6784 >>> df2 = pd.DataFrame(np.zeros((4, 4)), columns=list("ABCE"))

6785 >>> df.fillna(df2)

6786 A B C D

6787 0 0.0 2.0 0.0 0.0

6788 1 3.0 4.0 0.0 1.0

6789 2 0.0 0.0 0.0 NaN

6790 3 0.0 3.0 0.0 4.0

6791

6792 Note that column D is not affected since it is not present in df2.

6793 """

6794 inplace = validate_bool_kwarg(inplace, "inplace")

6795 value, method = validate_fillna_kwargs(value, method)

6796

6797 self._consolidate_inplace()

6798

6799 # set the default here, so functions examining the signaure

6800 # can detect if something was set (e.g. in groupby) (GH9221)

6801 if axis is None:

6802 axis = 0

6803 axis = self._get_axis_number(axis)

6804

6805 if value is None:

6806 if not self._mgr.is_single_block and axis == 1:

6807 if inplace:

6808 raise NotImplementedError()

6809 result = self.T.fillna(method=method, limit=limit).T

6810

6811 return result

6812

6813 new_data = self._mgr.interpolate(

6814 method=method,

6815 axis=axis,

6816 limit=limit,

6817 inplace=inplace,

6818 downcast=downcast,

6819 )

6820 else:

6821 if self.ndim == 1:

6822 if isinstance(value, (dict, ABCSeries)):

6823 if not len(value):

6824 # test_fillna_nonscalar

6825 if inplace:

6826 return None

6827 return self.copy()

6828 value = create_series_with_explicit_dtype(

6829 value, dtype_if_empty=object

6830 )

6831 value = value.reindex(self.index, copy=False)

6832 value = value._values

6833 elif not is_list_like(value):

6834 pass

6835 else:

6836 raise TypeError(

6837 '"value" parameter must be a scalar, dict '

6838 "or Series, but you passed a "

6839 f'"{type(value).__name__}"'

6840 )

6841

6842 new_data = self._mgr.fillna(

6843 value=value, limit=limit, inplace=inplace, downcast=downcast

6844 )

6845

6846 elif isinstance(value, (dict, ABCSeries)):

6847 if axis == 1:

6848 raise NotImplementedError(

6849 "Currently only can fill "

6850 "with dict/Series column "

6851 "by column"

6852 )

6853

6854 result = self if inplace else self.copy()

6855 is_dict = isinstance(downcast, dict)

6856 for k, v in value.items():

6857 if k not in result:

6858 continue

6859

6860 # error: Item "None" of "Optional[Dict[Any, Any]]" has no

6861 # attribute "get"

6862 downcast_k = (

6863 downcast

6864 if not is_dict

6865 else downcast.get(k) # type: ignore[union-attr]

6866 )

6867

6868 res_k = result[k].fillna(v, limit=limit, downcast=downcast_k)

6869

6870 if not inplace:

6871 result[k] = res_k

6872 else:

6873 # We can write into our existing column(s) iff dtype

6874 # was preserved.

6875 if isinstance(res_k, ABCSeries):

6876 # i.e. 'k' only shows up once in self.columns

6877 if res_k.dtype == result[k].dtype:

6878 result.loc[:, k] = res_k

6879 else:

6880 # Different dtype -> no way to do inplace.

6881 result[k] = res_k

6882 else:

6883 # see test_fillna_dict_inplace_nonunique_columns

6884 locs = result.columns.get_loc(k)

6885 if isinstance(locs, slice):

6886 locs = np.arange(self.shape[1])[locs]

6887 elif (

6888 isinstance(locs, np.ndarray) and locs.dtype.kind == "b"

6889 ):

6890 locs = locs.nonzero()[0]

6891 elif not (

6892 isinstance(locs, np.ndarray) and locs.dtype.kind == "i"

6893 ):

6894 # Should never be reached, but let's cover our bases

6895 raise NotImplementedError(

6896 "Unexpected get_loc result, please report a bug at "

6897 "https://github.com/pandas-dev/pandas"

6898 )

6899

6900 for i, loc in enumerate(locs):

6901 res_loc = res_k.iloc[:, i]

6902 target = self.iloc[:, loc]

6903

6904 if res_loc.dtype == target.dtype:

6905 result.iloc[:, loc] = res_loc

6906 else:

6907 result.isetitem(loc, res_loc)

6908

6909 return result if not inplace else None

6910

6911 elif not is_list_like(value):

6912 if axis == 1:

6913

6914 result = self.T.fillna(value=value, limit=limit).T

6915

6916 # error: Incompatible types in assignment (expression has type

6917 # "NDFrameT", variable has type "Union[ArrayManager,

6918 # SingleArrayManager, BlockManager, SingleBlockManager]")

6919 new_data = result # type: ignore[assignment]

6920 else:

6921

6922 new_data = self._mgr.fillna(

6923 value=value, limit=limit, inplace=inplace, downcast=downcast

6924 )

6925 elif isinstance(value, ABCDataFrame) and self.ndim == 2:

6926

6927 new_data = self.where(self.notna(), value)._mgr

6928 else:

6929 raise ValueError(f"invalid fill value with a {type(value)}")

6930

6931 result = self._constructor(new_data)

6932 if inplace:

6933 return self._update_inplace(result)

6934 else:

6935 return result.__finalize__(self, method="fillna")

6936

6937 @overload

6938 def ffill(

6939 self: NDFrameT,

6940 *,

6941 axis: None | Axis = ...,

6942 inplace: Literal[False] = ...,

6943 limit: None | int = ...,

6944 downcast: dict | None = ...,

6945 ) -> NDFrameT:

6946 ...

6947

6948 @overload

6949 def ffill(

6950 self,

6951 *,

6952 axis: None | Axis = ...,

6953 inplace: Literal[True],

6954 limit: None | int = ...,

6955 downcast: dict | None = ...,

6956 ) -> None:

6957 ...

6958

6959 @overload

6960 def ffill(

6961 self: NDFrameT,

6962 *,

6963 axis: None | Axis = ...,

6964 inplace: bool_t = ...,

6965 limit: None | int = ...,

6966 downcast: dict | None = ...,

6967 ) -> NDFrameT | None:

6968 ...

6969

6970 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"])

6971 @doc(klass=_shared_doc_kwargs["klass"])

6972 def ffill(

6973 self: NDFrameT,

6974 axis: None | Axis = None,

6975 inplace: bool_t = False,

6976 limit: None | int = None,

6977 downcast: dict | None = None,

6978 ) -> NDFrameT | None:

6979 """

6980 Synonym for :meth:`DataFrame.fillna` with ``method='ffill'``.

6981

6982 Returns

6983 -------

6984 {klass} or None

6985 Object with missing values filled or None if ``inplace=True``.

6986 """

6987 return self.fillna(

6988 method="ffill", axis=axis, inplace=inplace, limit=limit, downcast=downcast

6989 )

6990

6991 pad = ffill

6992

6993 @overload

6994 def bfill(

6995 self: NDFrameT,

6996 *,

6997 axis: None | Axis = ...,

6998 inplace: Literal[False] = ...,

6999 limit: None | int = ...,

7000 downcast: dict | None = ...,

7001 ) -> NDFrameT:

7002 ...

7003

7004 @overload

7005 def bfill(

7006 self,

7007 *,

7008 axis: None | Axis = ...,

7009 inplace: Literal[True],

7010 limit: None | int = ...,

7011 downcast: dict | None = ...,

7012 ) -> None:

7013 ...

7014

7015 @overload

7016 def bfill(

7017 self: NDFrameT,

7018 *,

7019 axis: None | Axis = ...,

7020 inplace: bool_t = ...,

7021 limit: None | int = ...,

7022 downcast: dict | None = ...,

7023 ) -> NDFrameT | None:

7024 ...

7025

7026 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"])

7027 @doc(klass=_shared_doc_kwargs["klass"])

7028 def bfill(

7029 self: NDFrameT,

7030 axis: None | Axis = None,

7031 inplace: bool_t = False,

7032 limit: None | int = None,

7033 downcast: dict | None = None,

7034 ) -> NDFrameT | None:

7035 """

7036 Synonym for :meth:`DataFrame.fillna` with ``method='bfill'``.

7037

7038 Returns

7039 -------

7040 {klass} or None

7041 Object with missing values filled or None if ``inplace=True``.

7042 """

7043 return self.fillna(

7044 method="bfill", axis=axis, inplace=inplace, limit=limit, downcast=downcast

7045 )

7046

7047 backfill = bfill

7048

7049 @overload

7050 def replace(

7051 self: NDFrameT,

7052 to_replace=...,

7053 value=...,

7054 *,

7055 inplace: Literal[False] = ...,

7056 limit: int | None = ...,

7057 regex: bool_t = ...,

7058 method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = ...,

7059 ) -> NDFrameT:

7060 ...

7061

7062 @overload

7063 def replace(

7064 self,

7065 to_replace=...,

7066 value=...,

7067 *,

7068 inplace: Literal[True],

7069 limit: int | None = ...,

7070 regex: bool_t = ...,

7071 method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = ...,

7072 ) -> None:

7073 ...

7074

7075 @overload

7076 def replace(

7077 self: NDFrameT,

7078 to_replace=...,

7079 value=...,

7080 *,

7081 inplace: bool_t = ...,

7082 limit: int | None = ...,

7083 regex: bool_t = ...,

7084 method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = ...,

7085 ) -> NDFrameT | None:

7086 ...

7087

7088 @deprecate_nonkeyword_arguments(

7089 version=None, allowed_args=["self", "to_replace", "value"]

7090 )

7091 @doc(

7092 _shared_docs["replace"],

7093 klass=_shared_doc_kwargs["klass"],

7094 inplace=_shared_doc_kwargs["inplace"],

7095 replace_iloc=_shared_doc_kwargs["replace_iloc"],

7096 )

7097 def replace(

7098 self: NDFrameT,

7099 to_replace=None,

7100 value=lib.no_default,

7101 inplace: bool_t = False,

7102 limit: int | None = None,

7103 regex: bool_t = False,

7104 method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = lib.no_default,

7105 ) -> NDFrameT | None:

7106 if not (

7107 is_scalar(to_replace)

7108 or is_re_compilable(to_replace)

7109 or is_list_like(to_replace)

7110 ):

7111 raise TypeError(

7112 "Expecting 'to_replace' to be either a scalar, array-like, "

7113 "dict or None, got invalid type "

7114 f"{repr(type(to_replace).__name__)}"

7115 )

7116

7117 inplace = validate_bool_kwarg(inplace, "inplace")

7118 if not is_bool(regex) and to_replace is not None:

7119 raise ValueError("'to_replace' must be 'None' if 'regex' is not a bool")

7120

7121 self._consolidate_inplace()

7122

7123 if value is lib.no_default or method is not lib.no_default:

7124 # GH#36984 if the user explicitly passes value=None we want to

7125 # respect that. We have the corner case where the user explicitly

7126 # passes value=None *and* a method, which we interpret as meaning

7127 # they want the (documented) default behavior.

7128 if method is lib.no_default:

7129 # TODO: get this to show up as the default in the docs?

7130 method = "pad"

7131

7132 # passing a single value that is scalar like

7133 # when value is None (GH5319), for compat

7134 if not is_dict_like(to_replace) and not is_dict_like(regex):

7135 to_replace = [to_replace]

7136

7137 if isinstance(to_replace, (tuple, list)):

7138 if isinstance(self, ABCDataFrame):

7139 from pandas import Series

7140

7141 result = self.apply(

7142 Series._replace_single,

7143 args=(to_replace, method, inplace, limit),

7144 )

7145 if inplace:

7146 return None

7147 return result

7148 return self._replace_single(to_replace, method, inplace, limit)

7149

7150 if not is_dict_like(to_replace):

7151 if not is_dict_like(regex):

7152 raise TypeError(

7153 'If "to_replace" and "value" are both None '

7154 'and "to_replace" is not a list, then '

7155 "regex must be a mapping"

7156 )

7157 to_replace = regex

7158 regex = True

7159

7160 items = list(to_replace.items())

7161 if items:

7162 keys, values = zip(*items)

7163 else:

7164 keys, values = ([], [])

7165

7166 are_mappings = [is_dict_like(v) for v in values]

7167

7168 if any(are_mappings):

7169 if not all(are_mappings):

7170 raise TypeError(

7171 "If a nested mapping is passed, all values "

7172 "of the top level mapping must be mappings"

7173 )

7174 # passed a nested dict/Series

7175 to_rep_dict = {}

7176 value_dict = {}

7177

7178 for k, v in items:

7179 keys, values = list(zip(*v.items())) or ([], [])

7180

7181 to_rep_dict[k] = list(keys)

7182 value_dict[k] = list(values)

7183

7184 to_replace, value = to_rep_dict, value_dict

7185 else:

7186 to_replace, value = keys, values

7187

7188 return self.replace(

7189 to_replace, value, inplace=inplace, limit=limit, regex=regex

7190 )

7191 else:

7192

7193 # need a non-zero len on all axes

7194 if not self.size:

7195 if inplace:

7196 return None

7197 return self.copy()

7198

7199 if is_dict_like(to_replace):

7200 if is_dict_like(value): # {'A' : NA} -> {'A' : 0}

7201 # Note: Checking below for `in foo.keys()` instead of

7202 # `in foo` is needed for when we have a Series and not dict

7203 mapping = {

7204 col: (to_replace[col], value[col])

7205 for col in to_replace.keys()

7206 if col in value.keys() and col in self

7207 }

7208 return self._replace_columnwise(mapping, inplace, regex)

7209

7210 # {'A': NA} -> 0

7211 elif not is_list_like(value):

7212 # Operate column-wise

7213 if self.ndim == 1:

7214 raise ValueError(

7215 "Series.replace cannot use dict-like to_replace "

7216 "and non-None value"

7217 )

7218 mapping = {

7219 col: (to_rep, value) for col, to_rep in to_replace.items()

7220 }

7221 return self._replace_columnwise(mapping, inplace, regex)

7222 else:

7223 raise TypeError("value argument must be scalar, dict, or Series")

7224

7225 elif is_list_like(to_replace):

7226 if not is_list_like(value):

7227 # e.g. to_replace = [NA, ''] and value is 0,

7228 # so we replace NA with 0 and then replace '' with 0

7229 value = [value] * len(to_replace)

7230

7231 # e.g. we have to_replace = [NA, ''] and value = [0, 'missing']

7232 if len(to_replace) != len(value):

7233 raise ValueError(

7234 f"Replacement lists must match in length. "

7235 f"Expecting {len(to_replace)} got {len(value)} "

7236 )

7237 new_data = self._mgr.replace_list(

7238 src_list=to_replace,

7239 dest_list=value,

7240 inplace=inplace,

7241 regex=regex,

7242 )

7243

7244 elif to_replace is None:

7245 if not (

7246 is_re_compilable(regex)

7247 or is_list_like(regex)

7248 or is_dict_like(regex)

7249 ):

7250 raise TypeError(

7251 f"'regex' must be a string or a compiled regular expression "

7252 f"or a list or dict of strings or regular expressions, "

7253 f"you passed a {repr(type(regex).__name__)}"

7254 )

7255 return self.replace(

7256 regex, value, inplace=inplace, limit=limit, regex=True

7257 )

7258 else:

7259

7260 # dest iterable dict-like

7261 if is_dict_like(value): # NA -> {'A' : 0, 'B' : -1}

7262 # Operate column-wise

7263 if self.ndim == 1:

7264 raise ValueError(

7265 "Series.replace cannot use dict-value and "

7266 "non-None to_replace"

7267 )

7268 mapping = {col: (to_replace, val) for col, val in value.items()}

7269 return self._replace_columnwise(mapping, inplace, regex)

7270

7271 elif not is_list_like(value): # NA -> 0

7272 regex = should_use_regex(regex, to_replace)

7273 if regex:

7274 new_data = self._mgr.replace_regex(

7275 to_replace=to_replace,

7276 value=value,

7277 inplace=inplace,

7278 )

7279 else:

7280 new_data = self._mgr.replace(

7281 to_replace=to_replace, value=value, inplace=inplace

7282 )

7283 else:

7284 raise TypeError(

7285 f'Invalid "to_replace" type: {repr(type(to_replace).__name__)}'

7286 )

7287

7288 result = self._constructor(new_data)

7289 if inplace:

7290 return self._update_inplace(result)

7291 else:

7292 return result.__finalize__(self, method="replace")

7293

7294 def interpolate(

7295 self: NDFrameT,

7296 method: str = "linear",

7297 axis: Axis = 0,

7298 limit: int | None = None,

7299 inplace: bool_t = False,

7300 limit_direction: str | None = None,

7301 limit_area: str | None = None,

7302 downcast: str | None = None,

7303 **kwargs,

7304 ) -> NDFrameT | None:

7305 """

7306 Fill NaN values using an interpolation method.

7307

7308 Please note that only ``method='linear'`` is supported for

7309 DataFrame/Series with a MultiIndex.

7310

7311 Parameters

7312 ----------

7313 method : str, default 'linear'

7314 Interpolation technique to use. One of:

7315

7316 * 'linear': Ignore the index and treat the values as equally

7317 spaced. This is the only method supported on MultiIndexes.

7318 * 'time': Works on daily and higher resolution data to interpolate

7319 given length of interval.

7320 * 'index', 'values': use the actual numerical values of the index.

7321 * 'pad': Fill in NaNs using existing values.

7322 * 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'spline',

7323 'barycentric', 'polynomial': Passed to

7324 `scipy.interpolate.interp1d`. These methods use the numerical

7325 values of the index. Both 'polynomial' and 'spline' require that

7326 you also specify an `order` (int), e.g.

7327 ``df.interpolate(method='polynomial', order=5)``.

7328 * 'krogh', 'piecewise_polynomial', 'spline', 'pchip', 'akima',

7329 'cubicspline': Wrappers around the SciPy interpolation methods of

7330 similar names. See `Notes`.

7331 * 'from_derivatives': Refers to

7332 `scipy.interpolate.BPoly.from_derivatives` which

7333 replaces 'piecewise_polynomial' interpolation method in

7334 scipy 0.18.

7335

7336 axis : {{0 or 'index', 1 or 'columns', None}}, default None

7337 Axis to interpolate along. For `Series` this parameter is unused

7338 and defaults to 0.

7339 limit : int, optional

7340 Maximum number of consecutive NaNs to fill. Must be greater than

7341 0.

7342 inplace : bool, default False

7343 Update the data in place if possible.

7344 limit_direction : {{'forward', 'backward', 'both'}}, Optional

7345 Consecutive NaNs will be filled in this direction.

7346

7347 If limit is specified:

7348 * If 'method' is 'pad' or 'ffill', 'limit_direction' must be 'forward'.

7349 * If 'method' is 'backfill' or 'bfill', 'limit_direction' must be

7350 'backwards'.

7351

7352 If 'limit' is not specified:

7353 * If 'method' is 'backfill' or 'bfill', the default is 'backward'

7354 * else the default is 'forward'

7355

7356 .. versionchanged:: 1.1.0

7357 raises ValueError if `limit_direction` is 'forward' or 'both' and

7358 method is 'backfill' or 'bfill'.

7359 raises ValueError if `limit_direction` is 'backward' or 'both' and

7360 method is 'pad' or 'ffill'.

7361

7362 limit_area : {{`None`, 'inside', 'outside'}}, default None

7363 If limit is specified, consecutive NaNs will be filled with this

7364 restriction.

7365

7366 * ``None``: No fill restriction.

7367 * 'inside': Only fill NaNs surrounded by valid values

7368 (interpolate).

7369 * 'outside': Only fill NaNs outside valid values (extrapolate).

7370

7371 downcast : optional, 'infer' or None, defaults to None

7372 Downcast dtypes if possible.

7373 ``**kwargs`` : optional

7374 Keyword arguments to pass on to the interpolating function.

7375

7376 Returns

7377 -------

7378 Series or DataFrame or None

7379 Returns the same object type as the caller, interpolated at

7380 some or all ``NaN`` values or None if ``inplace=True``.

7381

7382 See Also

7383 --------

7384 fillna : Fill missing values using different methods.

7385 scipy.interpolate.Akima1DInterpolator : Piecewise cubic polynomials

7386 (Akima interpolator).

7387 scipy.interpolate.BPoly.from_derivatives : Piecewise polynomial in the

7388 Bernstein basis.

7389 scipy.interpolate.interp1d : Interpolate a 1-D function.

7390 scipy.interpolate.KroghInterpolator : Interpolate polynomial (Krogh

7391 interpolator).

7392 scipy.interpolate.PchipInterpolator : PCHIP 1-d monotonic cubic

7393 interpolation.

7394 scipy.interpolate.CubicSpline : Cubic spline data interpolator.

7395

7396 Notes

7397 -----

7398 The 'krogh', 'piecewise_polynomial', 'spline', 'pchip' and 'akima'

7399 methods are wrappers around the respective SciPy implementations of

7400 similar names. These use the actual numerical values of the index.

7401 For more information on their behavior, see the

7402 `SciPy documentation

7403 <https://docs.scipy.org/doc/scipy/reference/interpolate.html#univariate-interpolation>`__.

7404

7405 Examples

7406 --------

7407 Filling in ``NaN`` in a :class:`~pandas.Series` via linear

7408 interpolation.

7409

7410 >>> s = pd.Series([0, 1, np.nan, 3])

7411 >>> s

7412 0 0.0

7413 1 1.0

7414 2 NaN

7415 3 3.0

7416 dtype: float64

7417 >>> s.interpolate()

7418 0 0.0

7419 1 1.0

7420 2 2.0

7421 3 3.0

7422 dtype: float64

7423

7424 Filling in ``NaN`` in a Series by padding, but filling at most two

7425 consecutive ``NaN`` at a time.

7426

7427 >>> s = pd.Series([np.nan, "single_one", np.nan,

7428 ... "fill_two_more", np.nan, np.nan, np.nan,

7429 ... 4.71, np.nan])

7430 >>> s

7431 0 NaN

7432 1 single_one

7433 2 NaN

7434 3 fill_two_more

7435 4 NaN

7436 5 NaN

7437 6 NaN

7438 7 4.71

7439 8 NaN

7440 dtype: object

7441 >>> s.interpolate(method='pad', limit=2)

7442 0 NaN

7443 1 single_one

7444 2 single_one

7445 3 fill_two_more

7446 4 fill_two_more

7447 5 fill_two_more

7448 6 NaN

7449 7 4.71

7450 8 4.71

7451 dtype: object

7452

7453 Filling in ``NaN`` in a Series via polynomial interpolation or splines:

7454 Both 'polynomial' and 'spline' methods require that you also specify

7455 an ``order`` (int).

7456

7457 >>> s = pd.Series([0, 2, np.nan, 8])

7458 >>> s.interpolate(method='polynomial', order=2)

7459 0 0.000000

7460 1 2.000000

7461 2 4.666667

7462 3 8.000000

7463 dtype: float64

7464

7465 Fill the DataFrame forward (that is, going down) along each column

7466 using linear interpolation.

7467

7468 Note how the last entry in column 'a' is interpolated differently,

7469 because there is no entry after it to use for interpolation.

7470 Note how the first entry in column 'b' remains ``NaN``, because there

7471 is no entry before it to use for interpolation.

7472

7473 >>> df = pd.DataFrame([(0.0, np.nan, -1.0, 1.0),

7474 ... (np.nan, 2.0, np.nan, np.nan),

7475 ... (2.0, 3.0, np.nan, 9.0),

7476 ... (np.nan, 4.0, -4.0, 16.0)],

7477 ... columns=list('abcd'))

7478 >>> df

7479 a b c d

7480 0 0.0 NaN -1.0 1.0

7481 1 NaN 2.0 NaN NaN

7482 2 2.0 3.0 NaN 9.0

7483 3 NaN 4.0 -4.0 16.0

7484 >>> df.interpolate(method='linear', limit_direction='forward', axis=0)

7485 a b c d

7486 0 0.0 NaN -1.0 1.0

7487 1 1.0 2.0 -2.0 5.0

7488 2 2.0 3.0 -3.0 9.0

7489 3 2.0 4.0 -4.0 16.0

7490

7491 Using polynomial interpolation.

7492

7493 >>> df['d'].interpolate(method='polynomial', order=2)

7494 0 1.0

7495 1 4.0

7496 2 9.0

7497 3 16.0

7498 Name: d, dtype: float64

7499 """

7500 inplace = validate_bool_kwarg(inplace, "inplace")

7501

7502 axis = self._get_axis_number(axis)

7503

7504 fillna_methods = ["ffill", "bfill", "pad", "backfill"]

7505 should_transpose = axis == 1 and method not in fillna_methods

7506

7507 obj = self.T if should_transpose else self

7508

7509 if obj.empty:

7510 return self.copy()

7511

7512 if method not in fillna_methods:

7513 axis = self._info_axis_number

7514

7515 if isinstance(obj.index, MultiIndex) and method != "linear":

7516 raise ValueError(

7517 "Only `method=linear` interpolation is supported on MultiIndexes."

7518 )

7519

7520 # Set `limit_direction` depending on `method`

7521 if limit_direction is None:

7522 limit_direction = (

7523 "backward" if method in ("backfill", "bfill") else "forward"

7524 )

7525 else:

7526 if method in ("pad", "ffill") and limit_direction != "forward":

7527 raise ValueError(

7528 f"`limit_direction` must be 'forward' for method `{method}`"

7529 )

7530 if method in ("backfill", "bfill") and limit_direction != "backward":

7531 raise ValueError(

7532 f"`limit_direction` must be 'backward' for method `{method}`"

7533 )

7534

7535 if obj.ndim == 2 and np.all(obj.dtypes == np.dtype("object")):

7536 raise TypeError(

7537 "Cannot interpolate with all object-dtype columns "

7538 "in the DataFrame. Try setting at least one "

7539 "column to a numeric dtype."

7540 )

7541

7542 # create/use the index

7543 if method == "linear":

7544 # prior default

7545 index = Index(np.arange(len(obj.index)))

7546 else:

7547 index = obj.index

7548 methods = {"index", "values", "nearest", "time"}

7549 is_numeric_or_datetime = (

7550 is_numeric_dtype(index.dtype)

7551 or is_datetime64_any_dtype(index.dtype)

7552 or is_timedelta64_dtype(index.dtype)

7553 )

7554 if method not in methods and not is_numeric_or_datetime:

7555 raise ValueError(

7556 "Index column must be numeric or datetime type when "

7557 f"using {method} method other than linear. "

7558 "Try setting a numeric or datetime index column before "

7559 "interpolating."

7560 )

7561

7562 if isna(index).any():

7563 raise NotImplementedError(

7564 "Interpolation with NaNs in the index "

7565 "has not been implemented. Try filling "

7566 "those NaNs before interpolating."

7567 )

7568 new_data = obj._mgr.interpolate(

7569 method=method,

7570 axis=axis,

7571 index=index,

7572 limit=limit,

7573 limit_direction=limit_direction,

7574 limit_area=limit_area,

7575 inplace=inplace,

7576 downcast=downcast,

7577 **kwargs,

7578 )

7579

7580 result = self._constructor(new_data)

7581 if should_transpose:

7582 result = result.T

7583 if inplace:

7584 return self._update_inplace(result)

7585 else:

7586 return result.__finalize__(self, method="interpolate")

7587

7588 # ----------------------------------------------------------------------

7589 # Timeseries methods Methods

7590

7591 @final

7592 def asof(self, where, subset=None):

7593 """

7594 Return the last row(s) without any NaNs before `where`.

7595

7596 The last row (for each element in `where`, if list) without any

7597 NaN is taken.

7598 In case of a :class:`~pandas.DataFrame`, the last row without NaN

7599 considering only the subset of columns (if not `None`)

7600

7601 If there is no good value, NaN is returned for a Series or

7602 a Series of NaN values for a DataFrame

7603

7604 Parameters

7605 ----------

7606 where : date or array-like of dates

7607 Date(s) before which the last row(s) are returned.

7608 subset : str or array-like of str, default `None`

7609 For DataFrame, if not `None`, only use these columns to

7610 check for NaNs.

7611

7612 Returns

7613 -------

7614 scalar, Series, or DataFrame

7615

7616 The return can be:

7617

7618 * scalar : when `self` is a Series and `where` is a scalar

7619 * Series: when `self` is a Series and `where` is an array-like,

7620 or when `self` is a DataFrame and `where` is a scalar

7621 * DataFrame : when `self` is a DataFrame and `where` is an

7622 array-like

7623

7624 Return scalar, Series, or DataFrame.

7625

7626 See Also

7627 --------

7628 merge_asof : Perform an asof merge. Similar to left join.

7629

7630 Notes

7631 -----

7632 Dates are assumed to be sorted. Raises if this is not the case.

7633

7634 Examples

7635 --------

7636 A Series and a scalar `where`.

7637

7638 >>> s = pd.Series([1, 2, np.nan, 4], index=[10, 20, 30, 40])

7639 >>> s

7640 10 1.0

7641 20 2.0

7642 30 NaN

7643 40 4.0

7644 dtype: float64

7645

7646 >>> s.asof(20)

7647 2.0

7648

7649 For a sequence `where`, a Series is returned. The first value is

7650 NaN, because the first element of `where` is before the first

7651 index value.

7652

7653 >>> s.asof([5, 20])

7654 5 NaN

7655 20 2.0

7656 dtype: float64

7657

7658 Missing values are not considered. The following is ``2.0``, not

7659 NaN, even though NaN is at the index location for ``30``.

7660

7661 >>> s.asof(30)

7662 2.0

7663

7664 Take all columns into consideration

7665

7666 >>> df = pd.DataFrame({'a': [10, 20, 30, 40, 50],

7667 ... 'b': [None, None, None, None, 500]},

7668 ... index=pd.DatetimeIndex(['2018-02-27 09:01:00',

7669 ... '2018-02-27 09:02:00',

7670 ... '2018-02-27 09:03:00',

7671 ... '2018-02-27 09:04:00',

7672 ... '2018-02-27 09:05:00']))

7673 >>> df.asof(pd.DatetimeIndex(['2018-02-27 09:03:30',

7674 ... '2018-02-27 09:04:30']))

7675 a b

7676 2018-02-27 09:03:30 NaN NaN

7677 2018-02-27 09:04:30 NaN NaN

7678

7679 Take a single column into consideration

7680

7681 >>> df.asof(pd.DatetimeIndex(['2018-02-27 09:03:30',

7682 ... '2018-02-27 09:04:30']),

7683 ... subset=['a'])

7684 a b

7685 2018-02-27 09:03:30 30 NaN

7686 2018-02-27 09:04:30 40 NaN

7687 """

7688 if isinstance(where, str):

7689 where = Timestamp(where)

7690

7691 if not self.index.is_monotonic_increasing:

7692 raise ValueError("asof requires a sorted index")

7693

7694 is_series = isinstance(self, ABCSeries)

7695 if is_series:

7696 if subset is not None:

7697 raise ValueError("subset is not valid for Series")

7698 else:

7699 if subset is None:

7700 subset = self.columns

7701 if not is_list_like(subset):

7702 subset = [subset]

7703

7704 is_list = is_list_like(where)

7705 if not is_list:

7706 start = self.index[0]

7707 if isinstance(self.index, PeriodIndex):

7708 where = Period(where, freq=self.index.freq)

7709

7710 if where < start:

7711 if not is_series:

7712 return self._constructor_sliced(

7713 index=self.columns, name=where, dtype=np.float64

7714 )

7715 return np.nan

7716

7717 # It's always much faster to use a *while* loop here for

7718 # Series than pre-computing all the NAs. However a

7719 # *while* loop is extremely expensive for DataFrame

7720 # so we later pre-compute all the NAs and use the same

7721 # code path whether *where* is a scalar or list.

7722 # See PR: https://github.com/pandas-dev/pandas/pull/14476

7723 if is_series:

7724 loc = self.index.searchsorted(where, side="right")

7725 if loc > 0:

7726 loc -= 1

7727

7728 values = self._values

7729 while loc > 0 and isna(values[loc]):

7730 loc -= 1

7731 return values[loc]

7732

7733 if not isinstance(where, Index):

7734 where = Index(where) if is_list else Index([where])

7735

7736 nulls = self.isna() if is_series else self[subset].isna().any(axis=1)

7737 if nulls.all():

7738 if is_series:

7739 self = cast("Series", self)

7740 return self._constructor(np.nan, index=where, name=self.name)

7741 elif is_list:

7742 self = cast("DataFrame", self)

7743 return self._constructor(np.nan, index=where, columns=self.columns)

7744 else:

7745 self = cast("DataFrame", self)

7746 return self._constructor_sliced(

7747 np.nan, index=self.columns, name=where[0]

7748 )

7749

7750 locs = self.index.asof_locs(where, ~(nulls._values))

7751

7752 # mask the missing

7753 missing = locs == -1

7754 data = self.take(locs)

7755 data.index = where

7756 if missing.any():

7757 # GH#16063 only do this setting when necessary, otherwise

7758 # we'd cast e.g. bools to floats

7759 data.loc[missing] = np.nan

7760 return data if is_list else data.iloc[-1]

7761

7762 # ----------------------------------------------------------------------

7763 # Action Methods

7764

7765 @doc(klass=_shared_doc_kwargs["klass"])

7766 def isna(self: NDFrameT) -> NDFrameT:

7767 """

7768 Detect missing values.

7769

7770 Return a boolean same-sized object indicating if the values are NA.

7771 NA values, such as None or :attr:`numpy.NaN`, gets mapped to True

7772 values.

7773 Everything else gets mapped to False values. Characters such as empty

7774 strings ``''`` or :attr:`numpy.inf` are not considered NA values

7775 (unless you set ``pandas.options.mode.use_inf_as_na = True``).

7776

7777 Returns

7778 -------

7779 {klass}

7780 Mask of bool values for each element in {klass} that

7781 indicates whether an element is an NA value.

7782

7783 See Also

7784 --------

7785 {klass}.isnull : Alias of isna.

7786 {klass}.notna : Boolean inverse of isna.

7787 {klass}.dropna : Omit axes labels with missing values.

7788 isna : Top-level isna.

7789

7790 Examples

7791 --------

7792 Show which entries in a DataFrame are NA.

7793

7794 >>> df = pd.DataFrame(dict(age=[5, 6, np.NaN],

7795 ... born=[pd.NaT, pd.Timestamp('1939-05-27'),

7796 ... pd.Timestamp('1940-04-25')],

7797 ... name=['Alfred', 'Batman', ''],

7798 ... toy=[None, 'Batmobile', 'Joker']))

7799 >>> df

7800 age born name toy

7801 0 5.0 NaT Alfred None

7802 1 6.0 1939-05-27 Batman Batmobile

7803 2 NaN 1940-04-25 Joker

7804

7805 >>> df.isna()

7806 age born name toy

7807 0 False True False True

7808 1 False False False False

7809 2 True False False False

7810

7811 Show which entries in a Series are NA.

7812

7813 >>> ser = pd.Series([5, 6, np.NaN])

7814 >>> ser

7815 0 5.0

7816 1 6.0

7817 2 NaN

7818 dtype: float64

7819

7820 >>> ser.isna()

7821 0 False

7822 1 False

7823 2 True

7824 dtype: bool

7825 """

7826 return isna(self).__finalize__(self, method="isna")

7827

7828 @doc(isna, klass=_shared_doc_kwargs["klass"])

7829 def isnull(self: NDFrameT) -> NDFrameT:

7830 return isna(self).__finalize__(self, method="isnull")

7831

7832 @doc(klass=_shared_doc_kwargs["klass"])

7833 def notna(self: NDFrameT) -> NDFrameT:

7834 """

7835 Detect existing (non-missing) values.

7836

7837 Return a boolean same-sized object indicating if the values are not NA.

7838 Non-missing values get mapped to True. Characters such as empty

7839 strings ``''`` or :attr:`numpy.inf` are not considered NA values

7840 (unless you set ``pandas.options.mode.use_inf_as_na = True``).

7841 NA values, such as None or :attr:`numpy.NaN`, get mapped to False

7842 values.

7843

7844 Returns

7845 -------

7846 {klass}

7847 Mask of bool values for each element in {klass} that

7848 indicates whether an element is not an NA value.

7849

7850 See Also

7851 --------

7852 {klass}.notnull : Alias of notna.

7853 {klass}.isna : Boolean inverse of notna.

7854 {klass}.dropna : Omit axes labels with missing values.

7855 notna : Top-level notna.

7856

7857 Examples

7858 --------

7859 Show which entries in a DataFrame are not NA.

7860

7861 >>> df = pd.DataFrame(dict(age=[5, 6, np.NaN],

7862 ... born=[pd.NaT, pd.Timestamp('1939-05-27'),

7863 ... pd.Timestamp('1940-04-25')],

7864 ... name=['Alfred', 'Batman', ''],

7865 ... toy=[None, 'Batmobile', 'Joker']))

7866 >>> df

7867 age born name toy

7868 0 5.0 NaT Alfred None

7869 1 6.0 1939-05-27 Batman Batmobile

7870 2 NaN 1940-04-25 Joker

7871

7872 >>> df.notna()

7873 age born name toy

7874 0 True False True False

7875 1 True True True True

7876 2 False True True True

7877

7878 Show which entries in a Series are not NA.

7879

7880 >>> ser = pd.Series([5, 6, np.NaN])

7881 >>> ser

7882 0 5.0

7883 1 6.0

7884 2 NaN

7885 dtype: float64

7886

7887 >>> ser.notna()

7888 0 True

7889 1 True

7890 2 False

7891 dtype: bool

7892 """

7893 return notna(self).__finalize__(self, method="notna")

7894

7895 @doc(notna, klass=_shared_doc_kwargs["klass"])

7896 def notnull(self: NDFrameT) -> NDFrameT:

7897 return notna(self).__finalize__(self, method="notnull")

7898

7899 @final

7900 def _clip_with_scalar(self, lower, upper, inplace: bool_t = False):

7901 if (lower is not None and np.any(isna(lower))) or (

7902 upper is not None and np.any(isna(upper))

7903 ):

7904 raise ValueError("Cannot use an NA value as a clip threshold")

7905

7906 result = self

7907 mask = isna(self._values)

7908

7909 with np.errstate(all="ignore"):

7910 if upper is not None:

7911 subset = self <= upper

7912 result = result.where(subset, upper, axis=None, inplace=False)

7913 if lower is not None:

7914 subset = self >= lower

7915 result = result.where(subset, lower, axis=None, inplace=False)

7916

7917 if np.any(mask):

7918 result[mask] = np.nan

7919

7920 if inplace:

7921 return self._update_inplace(result)

7922 else:

7923 return result

7924

7925 @final

7926 def _clip_with_one_bound(self, threshold, method, axis, inplace):

7927

7928 if axis is not None:

7929 axis = self._get_axis_number(axis)

7930

7931 # method is self.le for upper bound and self.ge for lower bound

7932 if is_scalar(threshold) and is_number(threshold):

7933 if method.__name__ == "le":

7934 return self._clip_with_scalar(None, threshold, inplace=inplace)

7935 return self._clip_with_scalar(threshold, None, inplace=inplace)

7936

7937 # GH #15390

7938 # In order for where method to work, the threshold must

7939 # be transformed to NDFrame from other array like structure.

7940 if (not isinstance(threshold, ABCSeries)) and is_list_like(threshold):

7941 if isinstance(self, ABCSeries):

7942 threshold = self._constructor(threshold, index=self.index)

7943 else:

7944 threshold = align_method_FRAME(self, threshold, axis, flex=None)[1]

7945

7946 # GH 40420

7947 # Treat missing thresholds as no bounds, not clipping the values

7948 if is_list_like(threshold):

7949 fill_value = np.inf if method.__name__ == "le" else -np.inf

7950 threshold_inf = threshold.fillna(fill_value)

7951 else:

7952 threshold_inf = threshold

7953

7954 subset = method(threshold_inf, axis=axis) | isna(self)

7955

7956 # GH 40420

7957 return self.where(subset, threshold, axis=axis, inplace=inplace)

7958

7959 def clip(

7960 self: NDFrameT,

7961 lower=None,

7962 upper=None,

7963 axis: Axis | None = None,

7964 inplace: bool_t = False,

7965 *args,

7966 **kwargs,

7967 ) -> NDFrameT | None:

7968 """

7969 Trim values at input threshold(s).

7970

7971 Assigns values outside boundary to boundary values. Thresholds

7972 can be singular values or array like, and in the latter case

7973 the clipping is performed element-wise in the specified axis.

7974

7975 Parameters

7976 ----------

7977 lower : float or array-like, default None

7978 Minimum threshold value. All values below this

7979 threshold will be set to it. A missing

7980 threshold (e.g `NA`) will not clip the value.

7981 upper : float or array-like, default None

7982 Maximum threshold value. All values above this

7983 threshold will be set to it. A missing

7984 threshold (e.g `NA`) will not clip the value.

7985 axis : {{0 or 'index', 1 or 'columns', None}}, default None

7986 Align object with lower and upper along the given axis.

7987 For `Series` this parameter is unused and defaults to `None`.

7988 inplace : bool, default False

7989 Whether to perform the operation in place on the data.

7990 *args, **kwargs

7991 Additional keywords have no effect but might be accepted

7992 for compatibility with numpy.

7993

7994 Returns

7995 -------

7996 Series or DataFrame or None

7997 Same type as calling object with the values outside the

7998 clip boundaries replaced or None if ``inplace=True``.

7999

8000 See Also

8001 --------

8002 Series.clip : Trim values at input threshold in series.

8003 DataFrame.clip : Trim values at input threshold in dataframe.

8004 numpy.clip : Clip (limit) the values in an array.

8005

8006 Examples

8007 --------

8008 >>> data = {'col_0': [9, -3, 0, -1, 5], 'col_1': [-2, -7, 6, 8, -5]}

8009 >>> df = pd.DataFrame(data)

8010 >>> df

8011 col_0 col_1

8012 0 9 -2

8013 1 -3 -7

8014 2 0 6

8015 3 -1 8

8016 4 5 -5

8017

8018 Clips per column using lower and upper thresholds:

8019

8020 >>> df.clip(-4, 6)

8021 col_0 col_1

8022 0 6 -2

8023 1 -3 -4

8024 2 0 6

8025 3 -1 6

8026 4 5 -4

8027

8028 Clips using specific lower and upper thresholds per column element:

8029

8030 >>> t = pd.Series([2, -4, -1, 6, 3])

8031 >>> t

8032 0 2

8033 1 -4

8034 2 -1

8035 3 6

8036 4 3

8037 dtype: int64

8038

8039 >>> df.clip(t, t + 4, axis=0)

8040 col_0 col_1

8041 0 6 2

8042 1 -3 -4

8043 2 0 3

8044 3 6 8

8045 4 5 3

8046

8047 Clips using specific lower threshold per column element, with missing values:

8048

8049 >>> t = pd.Series([2, -4, np.NaN, 6, 3])

8050 >>> t

8051 0 2.0

8052 1 -4.0

8053 2 NaN

8054 3 6.0

8055 4 3.0

8056 dtype: float64

8057

8058 >>> df.clip(t, axis=0)

8059 col_0 col_1

8060 0 9 2

8061 1 -3 -4

8062 2 0 6

8063 3 6 8

8064 4 5 3

8065 """

8066 inplace = validate_bool_kwarg(inplace, "inplace")

8067

8068 axis = nv.validate_clip_with_axis(axis, args, kwargs)

8069 if axis is not None:

8070 axis = self._get_axis_number(axis)

8071

8072 # GH 17276

8073 # numpy doesn't like NaN as a clip value

8074 # so ignore

8075 # GH 19992

8076 # numpy doesn't drop a list-like bound containing NaN

8077 isna_lower = isna(lower)

8078 if not is_list_like(lower):

8079 if np.any(isna_lower):

8080 lower = None

8081 elif np.all(isna_lower):

8082 lower = None

8083 isna_upper = isna(upper)

8084 if not is_list_like(upper):

8085 if np.any(isna_upper):

8086 upper = None

8087 elif np.all(isna_upper):

8088 upper = None

8089

8090 # GH 2747 (arguments were reversed)

8091 if (

8092 lower is not None

8093 and upper is not None

8094 and is_scalar(lower)

8095 and is_scalar(upper)

8096 ):

8097 lower, upper = min(lower, upper), max(lower, upper)

8098

8099 # fast-path for scalars

8100 if (lower is None or (is_scalar(lower) and is_number(lower))) and (

8101 upper is None or (is_scalar(upper) and is_number(upper))

8102 ):

8103 return self._clip_with_scalar(lower, upper, inplace=inplace)

8104

8105 result = self

8106 if lower is not None:

8107 result = result._clip_with_one_bound(

8108 lower, method=self.ge, axis=axis, inplace=inplace

8109 )

8110 if upper is not None:

8111 if inplace:

8112 result = self

8113 result = result._clip_with_one_bound(

8114 upper, method=self.le, axis=axis, inplace=inplace

8115 )

8116

8117 return result

8118

8119 @doc(**_shared_doc_kwargs)

8120 def asfreq(

8121 self: NDFrameT,

8122 freq: Frequency,

8123 method: FillnaOptions | None = None,

8124 how: str | None = None,

8125 normalize: bool_t = False,

8126 fill_value: Hashable = None,

8127 ) -> NDFrameT:

8128 """

8129 Convert time series to specified frequency.

8130

8131 Returns the original data conformed to a new index with the specified

8132 frequency.

8133

8134 If the index of this {klass} is a :class:`~pandas.PeriodIndex`, the new index

8135 is the result of transforming the original index with

8136 :meth:`PeriodIndex.asfreq <pandas.PeriodIndex.asfreq>` (so the original index

8137 will map one-to-one to the new index).

8138

8139 Otherwise, the new index will be equivalent to ``pd.date_range(start, end,

8140 freq=freq)`` where ``start`` and ``end`` are, respectively, the first and

8141 last entries in the original index (see :func:`pandas.date_range`). The

8142 values corresponding to any timesteps in the new index which were not present

8143 in the original index will be null (``NaN``), unless a method for filling

8144 such unknowns is provided (see the ``method`` parameter below).

8145

8146 The :meth:`resample` method is more appropriate if an operation on each group of

8147 timesteps (such as an aggregate) is necessary to represent the data at the new

8148 frequency.

8149

8150 Parameters

8151 ----------

8152 freq : DateOffset or str

8153 Frequency DateOffset or string.

8154 method : {{'backfill'/'bfill', 'pad'/'ffill'}}, default None

8155 Method to use for filling holes in reindexed Series (note this

8156 does not fill NaNs that already were present):

8157

8158 * 'pad' / 'ffill': propagate last valid observation forward to next

8159 valid

8160 * 'backfill' / 'bfill': use NEXT valid observation to fill.

8161 how : {{'start', 'end'}}, default end

8162 For PeriodIndex only (see PeriodIndex.asfreq).

8163 normalize : bool, default False

8164 Whether to reset output index to midnight.

8165 fill_value : scalar, optional

8166 Value to use for missing values, applied during upsampling (note

8167 this does not fill NaNs that already were present).

8168

8169 Returns

8170 -------

8171 {klass}

8172 {klass} object reindexed to the specified frequency.

8173

8174 See Also

8175 --------

8176 reindex : Conform DataFrame to new index with optional filling logic.

8177

8178 Notes

8179 -----

8180 To learn more about the frequency strings, please see `this link

8181 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.

8182

8183 Examples

8184 --------

8185 Start by creating a series with 4 one minute timestamps.

8186

8187 >>> index = pd.date_range('1/1/2000', periods=4, freq='T')

8188 >>> series = pd.Series([0.0, None, 2.0, 3.0], index=index)

8189 >>> df = pd.DataFrame({{'s': series}})

8190 >>> df

8191 s

8192 2000-01-01 00:00:00 0.0

8193 2000-01-01 00:01:00 NaN

8194 2000-01-01 00:02:00 2.0

8195 2000-01-01 00:03:00 3.0

8196

8197 Upsample the series into 30 second bins.

8198

8199 >>> df.asfreq(freq='30S')

8200 s

8201 2000-01-01 00:00:00 0.0

8202 2000-01-01 00:00:30 NaN

8203 2000-01-01 00:01:00 NaN

8204 2000-01-01 00:01:30 NaN

8205 2000-01-01 00:02:00 2.0

8206 2000-01-01 00:02:30 NaN

8207 2000-01-01 00:03:00 3.0

8208

8209 Upsample again, providing a ``fill value``.

8210

8211 >>> df.asfreq(freq='30S', fill_value=9.0)

8212 s

8213 2000-01-01 00:00:00 0.0

8214 2000-01-01 00:00:30 9.0

8215 2000-01-01 00:01:00 NaN

8216 2000-01-01 00:01:30 9.0

8217 2000-01-01 00:02:00 2.0

8218 2000-01-01 00:02:30 9.0

8219 2000-01-01 00:03:00 3.0

8220

8221 Upsample again, providing a ``method``.

8222

8223 >>> df.asfreq(freq='30S', method='bfill')

8224 s

8225 2000-01-01 00:00:00 0.0

8226 2000-01-01 00:00:30 NaN

8227 2000-01-01 00:01:00 NaN

8228 2000-01-01 00:01:30 2.0

8229 2000-01-01 00:02:00 2.0

8230 2000-01-01 00:02:30 3.0

8231 2000-01-01 00:03:00 3.0

8232 """

8233 from pandas.core.resample import asfreq

8234

8235 return asfreq(

8236 self,

8237 freq,

8238 method=method,

8239 how=how,

8240 normalize=normalize,

8241 fill_value=fill_value,

8242 )

8243

8244 @final

8245 def at_time(self: NDFrameT, time, asof: bool_t = False, axis=None) -> NDFrameT:

8246 """

8247 Select values at particular time of day (e.g., 9:30AM).

8248

8249 Parameters

8250 ----------

8251 time : datetime.time or str

8252 axis : {0 or 'index', 1 or 'columns'}, default 0

8253 For `Series` this parameter is unused and defaults to 0.

8254

8255 Returns

8256 -------

8257 Series or DataFrame

8258

8259 Raises

8260 ------

8261 TypeError

8262 If the index is not a :class:`DatetimeIndex`

8263

8264 See Also

8265 --------

8266 between_time : Select values between particular times of the day.

8267 first : Select initial periods of time series based on a date offset.

8268 last : Select final periods of time series based on a date offset.

8269 DatetimeIndex.indexer_at_time : Get just the index locations for

8270 values at particular time of the day.

8271

8272 Examples

8273 --------

8274 >>> i = pd.date_range('2018-04-09', periods=4, freq='12H')

8275 >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i)

8276 >>> ts

8277 A

8278 2018-04-09 00:00:00 1

8279 2018-04-09 12:00:00 2

8280 2018-04-10 00:00:00 3

8281 2018-04-10 12:00:00 4

8282

8283 >>> ts.at_time('12:00')

8284 A

8285 2018-04-09 12:00:00 2

8286 2018-04-10 12:00:00 4

8287 """

8288 if axis is None:

8289 axis = self._stat_axis_number

8290 axis = self._get_axis_number(axis)

8291

8292 index = self._get_axis(axis)

8293

8294 if not isinstance(index, DatetimeIndex):

8295 raise TypeError("Index must be DatetimeIndex")

8296

8297 indexer = index.indexer_at_time(time, asof=asof)

8298 return self._take_with_is_copy(indexer, axis=axis)

8299

8300 @final

8301 def between_time(

8302 self: NDFrameT,

8303 start_time,

8304 end_time,

8305 include_start: bool_t | lib.NoDefault = lib.no_default,

8306 include_end: bool_t | lib.NoDefault = lib.no_default,

8307 inclusive: IntervalClosedType | None = None,

8308 axis=None,

8309 ) -> NDFrameT:

8310 """

8311 Select values between particular times of the day (e.g., 9:00-9:30 AM).

8312

8313 By setting ``start_time`` to be later than ``end_time``,

8314 you can get the times that are *not* between the two times.

8315

8316 Parameters

8317 ----------

8318 start_time : datetime.time or str

8319 Initial time as a time filter limit.

8320 end_time : datetime.time or str

8321 End time as a time filter limit.

8322 include_start : bool, default True

8323 Whether the start time needs to be included in the result.

8324

8325 .. deprecated:: 1.4.0

8326 Arguments `include_start` and `include_end` have been deprecated

8327 to standardize boundary inputs. Use `inclusive` instead, to set

8328 each bound as closed or open.

8329 include_end : bool, default True

8330 Whether the end time needs to be included in the result.

8331

8332 .. deprecated:: 1.4.0

8333 Arguments `include_start` and `include_end` have been deprecated

8334 to standardize boundary inputs. Use `inclusive` instead, to set

8335 each bound as closed or open.

8336 inclusive : {"both", "neither", "left", "right"}, default "both"

8337 Include boundaries; whether to set each bound as closed or open.

8338 axis : {0 or 'index', 1 or 'columns'}, default 0

8339 Determine range time on index or columns value.

8340 For `Series` this parameter is unused and defaults to 0.

8341

8342 Returns

8343 -------

8344 Series or DataFrame

8345 Data from the original object filtered to the specified dates range.

8346

8347 Raises

8348 ------

8349 TypeError

8350 If the index is not a :class:`DatetimeIndex`

8351

8352 See Also

8353 --------

8354 at_time : Select values at a particular time of the day.

8355 first : Select initial periods of time series based on a date offset.

8356 last : Select final periods of time series based on a date offset.

8357 DatetimeIndex.indexer_between_time : Get just the index locations for

8358 values between particular times of the day.

8359

8360 Examples

8361 --------

8362 >>> i = pd.date_range('2018-04-09', periods=4, freq='1D20min')

8363 >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i)

8364 >>> ts

8365 A

8366 2018-04-09 00:00:00 1

8367 2018-04-10 00:20:00 2

8368 2018-04-11 00:40:00 3

8369 2018-04-12 01:00:00 4

8370

8371 >>> ts.between_time('0:15', '0:45')

8372 A

8373 2018-04-10 00:20:00 2

8374 2018-04-11 00:40:00 3

8375

8376 You get the times that are *not* between two times by setting

8377 ``start_time`` later than ``end_time``:

8378

8379 >>> ts.between_time('0:45', '0:15')

8380 A

8381 2018-04-09 00:00:00 1

8382 2018-04-12 01:00:00 4

8383 """

8384 if axis is None:

8385 axis = self._stat_axis_number

8386 axis = self._get_axis_number(axis)

8387

8388 index = self._get_axis(axis)

8389 if not isinstance(index, DatetimeIndex):

8390 raise TypeError("Index must be DatetimeIndex")

8391

8392 old_include_arg_used = (include_start != lib.no_default) or (

8393 include_end != lib.no_default

8394 )

8395

8396 if old_include_arg_used and inclusive is not None:

8397 raise ValueError(

8398 "Deprecated arguments `include_start` and `include_end` "

8399 "cannot be passed if `inclusive` has been given."

8400 )

8401 # If any of the deprecated arguments ('include_start', 'include_end')

8402 # have been passed

8403 elif old_include_arg_used:

8404 warnings.warn(

8405 "`include_start` and `include_end` are deprecated in "

8406 "favour of `inclusive`.",

8407 FutureWarning,

8408 stacklevel=find_stack_level(),

8409 )

8410 left = True if include_start is lib.no_default else include_start

8411 right = True if include_end is lib.no_default else include_end

8412

8413 inc_dict: dict[tuple[bool_t, bool_t], IntervalClosedType] = {

8414 (True, True): "both",

8415 (True, False): "left",

8416 (False, True): "right",

8417 (False, False): "neither",

8418 }

8419 inclusive = inc_dict[(left, right)]

8420 elif inclusive is None:

8421 # On arg removal inclusive can default to "both"

8422 inclusive = "both"

8423 left_inclusive, right_inclusive = validate_inclusive(inclusive)

8424 indexer = index.indexer_between_time(

8425 start_time,

8426 end_time,

8427 include_start=left_inclusive,

8428 include_end=right_inclusive,

8429 )

8430 return self._take_with_is_copy(indexer, axis=axis)

8431

8432 @doc(**_shared_doc_kwargs)

8433 def resample(

8434 self,

8435 rule,

8436 axis: Axis = 0,

8437 closed: str | None = None,

8438 label: str | None = None,

8439 convention: str = "start",

8440 kind: str | None = None,

8441 loffset=None,

8442 base: int | None = None,

8443 on: Level = None,

8444 level: Level = None,

8445 origin: str | TimestampConvertibleTypes = "start_day",

8446 offset: TimedeltaConvertibleTypes | None = None,

8447 group_keys: bool_t | lib.NoDefault = lib.no_default,

8448 ) -> Resampler:

8449 """

8450 Resample time-series data.

8451

8452 Convenience method for frequency conversion and resampling of time series.

8453 The object must have a datetime-like index (`DatetimeIndex`, `PeriodIndex`,

8454 or `TimedeltaIndex`), or the caller must pass the label of a datetime-like

8455 series/index to the ``on``/``level`` keyword parameter.

8456

8457 Parameters

8458 ----------

8459 rule : DateOffset, Timedelta or str

8460 The offset string or object representing target conversion.

8461 axis : {{0 or 'index', 1 or 'columns'}}, default 0

8462 Which axis to use for up- or down-sampling. For `Series` this parameter

8463 is unused and defaults to 0. Must be

8464 `DatetimeIndex`, `TimedeltaIndex` or `PeriodIndex`.

8465 closed : {{'right', 'left'}}, default None

8466 Which side of bin interval is closed. The default is 'left'

8467 for all frequency offsets except for 'M', 'A', 'Q', 'BM',

8468 'BA', 'BQ', and 'W' which all have a default of 'right'.

8469 label : {{'right', 'left'}}, default None

8470 Which bin edge label to label bucket with. The default is 'left'

8471 for all frequency offsets except for 'M', 'A', 'Q', 'BM',

8472 'BA', 'BQ', and 'W' which all have a default of 'right'.

8473 convention : {{'start', 'end', 's', 'e'}}, default 'start'

8474 For `PeriodIndex` only, controls whether to use the start or

8475 end of `rule`.

8476 kind : {{'timestamp', 'period'}}, optional, default None

8477 Pass 'timestamp' to convert the resulting index to a

8478 `DateTimeIndex` or 'period' to convert it to a `PeriodIndex`.

8479 By default the input representation is retained.

8480 loffset : timedelta, default None

8481 Adjust the resampled time labels.

8482

8483 .. deprecated:: 1.1.0

8484 You should add the loffset to the `df.index` after the resample.

8485 See below.

8486

8487 base : int, default 0

8488 For frequencies that evenly subdivide 1 day, the "origin" of the

8489 aggregated intervals. For example, for '5min' frequency, base could

8490 range from 0 through 4. Defaults to 0.

8491

8492 .. deprecated:: 1.1.0

8493 The new arguments that you should use are 'offset' or 'origin'.

8494

8495 on : str, optional

8496 For a DataFrame, column to use instead of index for resampling.

8497 Column must be datetime-like.

8498 level : str or int, optional

8499 For a MultiIndex, level (name or number) to use for

8500 resampling. `level` must be datetime-like.

8501 origin : Timestamp or str, default 'start_day'

8502 The timestamp on which to adjust the grouping. The timezone of origin

8503 must match the timezone of the index.

8504 If string, must be one of the following:

8505

8506 - 'epoch': `origin` is 1970-01-01

8507 - 'start': `origin` is the first value of the timeseries

8508 - 'start_day': `origin` is the first day at midnight of the timeseries

8509

8510 .. versionadded:: 1.1.0

8511

8512 - 'end': `origin` is the last value of the timeseries

8513 - 'end_day': `origin` is the ceiling midnight of the last day

8514

8515 .. versionadded:: 1.3.0

8516

8517 offset : Timedelta or str, default is None

8518 An offset timedelta added to the origin.

8519

8520 .. versionadded:: 1.1.0

8521

8522 group_keys : bool, optional

8523 Whether to include the group keys in the result index when using

8524 ``.apply()`` on the resampled object. Not specifying ``group_keys``

8525 will retain values-dependent behavior from pandas 1.4

8526 and earlier (see :ref:`pandas 1.5.0 Release notes

8527 <whatsnew_150.enhancements.resample_group_keys>`

8528 for examples). In a future version of pandas, the behavior will

8529 default to the same as specifying ``group_keys=False``.

8530

8531 .. versionadded:: 1.5.0

8532

8533 Returns

8534 -------

8535 pandas.core.Resampler

8536 :class:`~pandas.core.Resampler` object.

8537

8538 See Also

8539 --------

8540 Series.resample : Resample a Series.

8541 DataFrame.resample : Resample a DataFrame.

8542 groupby : Group {klass} by mapping, function, label, or list of labels.

8543 asfreq : Reindex a {klass} with the given frequency without grouping.

8544

8545 Notes

8546 -----

8547 See the `user guide

8548 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#resampling>`__

8549 for more.

8550

8551 To learn more about the offset strings, please see `this link

8552 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects>`__.

8553

8554 Examples

8555 --------

8556 Start by creating a series with 9 one minute timestamps.

8557

8558 >>> index = pd.date_range('1/1/2000', periods=9, freq='T')

8559 >>> series = pd.Series(range(9), index=index)

8560 >>> series

8561 2000-01-01 00:00:00 0

8562 2000-01-01 00:01:00 1

8563 2000-01-01 00:02:00 2

8564 2000-01-01 00:03:00 3

8565 2000-01-01 00:04:00 4

8566 2000-01-01 00:05:00 5

8567 2000-01-01 00:06:00 6

8568 2000-01-01 00:07:00 7

8569 2000-01-01 00:08:00 8

8570 Freq: T, dtype: int64

8571

8572 Downsample the series into 3 minute bins and sum the values

8573 of the timestamps falling into a bin.

8574

8575 >>> series.resample('3T').sum()

8576 2000-01-01 00:00:00 3

8577 2000-01-01 00:03:00 12

8578 2000-01-01 00:06:00 21

8579 Freq: 3T, dtype: int64

8580

8581 Downsample the series into 3 minute bins as above, but label each

8582 bin using the right edge instead of the left. Please note that the

8583 value in the bucket used as the label is not included in the bucket,

8584 which it labels. For example, in the original series the

8585 bucket ``2000-01-01 00:03:00`` contains the value 3, but the summed

8586 value in the resampled bucket with the label ``2000-01-01 00:03:00``

8587 does not include 3 (if it did, the summed value would be 6, not 3).

8588 To include this value close the right side of the bin interval as

8589 illustrated in the example below this one.

8590

8591 >>> series.resample('3T', label='right').sum()

8592 2000-01-01 00:03:00 3

8593 2000-01-01 00:06:00 12

8594 2000-01-01 00:09:00 21

8595 Freq: 3T, dtype: int64

8596

8597 Downsample the series into 3 minute bins as above, but close the right

8598 side of the bin interval.

8599

8600 >>> series.resample('3T', label='right', closed='right').sum()

8601 2000-01-01 00:00:00 0

8602 2000-01-01 00:03:00 6

8603 2000-01-01 00:06:00 15

8604 2000-01-01 00:09:00 15

8605 Freq: 3T, dtype: int64

8606

8607 Upsample the series into 30 second bins.

8608

8609 >>> series.resample('30S').asfreq()[0:5] # Select first 5 rows

8610 2000-01-01 00:00:00 0.0

8611 2000-01-01 00:00:30 NaN

8612 2000-01-01 00:01:00 1.0

8613 2000-01-01 00:01:30 NaN

8614 2000-01-01 00:02:00 2.0

8615 Freq: 30S, dtype: float64

8616

8617 Upsample the series into 30 second bins and fill the ``NaN``

8618 values using the ``ffill`` method.

8619

8620 >>> series.resample('30S').ffill()[0:5]

8621 2000-01-01 00:00:00 0

8622 2000-01-01 00:00:30 0

8623 2000-01-01 00:01:00 1

8624 2000-01-01 00:01:30 1

8625 2000-01-01 00:02:00 2

8626 Freq: 30S, dtype: int64

8627

8628 Upsample the series into 30 second bins and fill the

8629 ``NaN`` values using the ``bfill`` method.

8630

8631 >>> series.resample('30S').bfill()[0:5]

8632 2000-01-01 00:00:00 0

8633 2000-01-01 00:00:30 1

8634 2000-01-01 00:01:00 1

8635 2000-01-01 00:01:30 2

8636 2000-01-01 00:02:00 2

8637 Freq: 30S, dtype: int64

8638

8639 Pass a custom function via ``apply``

8640

8641 >>> def custom_resampler(arraylike):

8642 ... return np.sum(arraylike) + 5

8643 ...

8644 >>> series.resample('3T').apply(custom_resampler)

8645 2000-01-01 00:00:00 8

8646 2000-01-01 00:03:00 17

8647 2000-01-01 00:06:00 26

8648 Freq: 3T, dtype: int64

8649

8650 For a Series with a PeriodIndex, the keyword `convention` can be

8651 used to control whether to use the start or end of `rule`.

8652

8653 Resample a year by quarter using 'start' `convention`. Values are

8654 assigned to the first quarter of the period.

8655

8656 >>> s = pd.Series([1, 2], index=pd.period_range('2012-01-01',

8657 ... freq='A',

8658 ... periods=2))

8659 >>> s

8660 2012 1

8661 2013 2

8662 Freq: A-DEC, dtype: int64

8663 >>> s.resample('Q', convention='start').asfreq()

8664 2012Q1 1.0

8665 2012Q2 NaN

8666 2012Q3 NaN

8667 2012Q4 NaN

8668 2013Q1 2.0

8669 2013Q2 NaN

8670 2013Q3 NaN

8671 2013Q4 NaN

8672 Freq: Q-DEC, dtype: float64

8673

8674 Resample quarters by month using 'end' `convention`. Values are

8675 assigned to the last month of the period.

8676

8677 >>> q = pd.Series([1, 2, 3, 4], index=pd.period_range('2018-01-01',

8678 ... freq='Q',

8679 ... periods=4))

8680 >>> q

8681 2018Q1 1

8682 2018Q2 2

8683 2018Q3 3

8684 2018Q4 4

8685 Freq: Q-DEC, dtype: int64

8686 >>> q.resample('M', convention='end').asfreq()

8687 2018-03 1.0

8688 2018-04 NaN

8689 2018-05 NaN

8690 2018-06 2.0

8691 2018-07 NaN

8692 2018-08 NaN

8693 2018-09 3.0

8694 2018-10 NaN

8695 2018-11 NaN

8696 2018-12 4.0

8697 Freq: M, dtype: float64

8698

8699 For DataFrame objects, the keyword `on` can be used to specify the

8700 column instead of the index for resampling.

8701

8702 >>> d = {{'price': [10, 11, 9, 13, 14, 18, 17, 19],

8703 ... 'volume': [50, 60, 40, 100, 50, 100, 40, 50]}}

8704 >>> df = pd.DataFrame(d)

8705 >>> df['week_starting'] = pd.date_range('01/01/2018',

8706 ... periods=8,

8707 ... freq='W')

8708 >>> df

8709 price volume week_starting

8710 0 10 50 2018-01-07

8711 1 11 60 2018-01-14

8712 2 9 40 2018-01-21

8713 3 13 100 2018-01-28

8714 4 14 50 2018-02-04

8715 5 18 100 2018-02-11

8716 6 17 40 2018-02-18

8717 7 19 50 2018-02-25

8718 >>> df.resample('M', on='week_starting').mean()

8719 price volume

8720 week_starting

8721 2018-01-31 10.75 62.5

8722 2018-02-28 17.00 60.0

8723

8724 For a DataFrame with MultiIndex, the keyword `level` can be used to

8725 specify on which level the resampling needs to take place.

8726

8727 >>> days = pd.date_range('1/1/2000', periods=4, freq='D')

8728 >>> d2 = {{'price': [10, 11, 9, 13, 14, 18, 17, 19],

8729 ... 'volume': [50, 60, 40, 100, 50, 100, 40, 50]}}

8730 >>> df2 = pd.DataFrame(

8731 ... d2,

8732 ... index=pd.MultiIndex.from_product(

8733 ... [days, ['morning', 'afternoon']]

8734 ... )

8735 ... )

8736 >>> df2

8737 price volume

8738 2000-01-01 morning 10 50

8739 afternoon 11 60

8740 2000-01-02 morning 9 40

8741 afternoon 13 100

8742 2000-01-03 morning 14 50

8743 afternoon 18 100

8744 2000-01-04 morning 17 40

8745 afternoon 19 50

8746 >>> df2.resample('D', level=0).sum()

8747 price volume

8748 2000-01-01 21 110

8749 2000-01-02 22 140

8750 2000-01-03 32 150

8751 2000-01-04 36 90

8752

8753 If you want to adjust the start of the bins based on a fixed timestamp:

8754

8755 >>> start, end = '2000-10-01 23:30:00', '2000-10-02 00:30:00'

8756 >>> rng = pd.date_range(start, end, freq='7min')

8757 >>> ts = pd.Series(np.arange(len(rng)) * 3, index=rng)

8758 >>> ts

8759 2000-10-01 23:30:00 0

8760 2000-10-01 23:37:00 3

8761 2000-10-01 23:44:00 6

8762 2000-10-01 23:51:00 9

8763 2000-10-01 23:58:00 12

8764 2000-10-02 00:05:00 15

8765 2000-10-02 00:12:00 18

8766 2000-10-02 00:19:00 21

8767 2000-10-02 00:26:00 24

8768 Freq: 7T, dtype: int64

8769

8770 >>> ts.resample('17min').sum()

8771 2000-10-01 23:14:00 0

8772 2000-10-01 23:31:00 9

8773 2000-10-01 23:48:00 21

8774 2000-10-02 00:05:00 54

8775 2000-10-02 00:22:00 24

8776 Freq: 17T, dtype: int64

8777

8778 >>> ts.resample('17min', origin='epoch').sum()

8779 2000-10-01 23:18:00 0

8780 2000-10-01 23:35:00 18

8781 2000-10-01 23:52:00 27

8782 2000-10-02 00:09:00 39

8783 2000-10-02 00:26:00 24

8784 Freq: 17T, dtype: int64

8785

8786 >>> ts.resample('17min', origin='2000-01-01').sum()

8787 2000-10-01 23:24:00 3

8788 2000-10-01 23:41:00 15

8789 2000-10-01 23:58:00 45

8790 2000-10-02 00:15:00 45

8791 Freq: 17T, dtype: int64

8792

8793 If you want to adjust the start of the bins with an `offset` Timedelta, the two

8794 following lines are equivalent:

8795

8796 >>> ts.resample('17min', origin='start').sum()

8797 2000-10-01 23:30:00 9

8798 2000-10-01 23:47:00 21

8799 2000-10-02 00:04:00 54

8800 2000-10-02 00:21:00 24

8801 Freq: 17T, dtype: int64

8802

8803 >>> ts.resample('17min', offset='23h30min').sum()

8804 2000-10-01 23:30:00 9

8805 2000-10-01 23:47:00 21

8806 2000-10-02 00:04:00 54

8807 2000-10-02 00:21:00 24

8808 Freq: 17T, dtype: int64

8809

8810 If you want to take the largest Timestamp as the end of the bins:

8811

8812 >>> ts.resample('17min', origin='end').sum()

8813 2000-10-01 23:35:00 0

8814 2000-10-01 23:52:00 18

8815 2000-10-02 00:09:00 27

8816 2000-10-02 00:26:00 63

8817 Freq: 17T, dtype: int64

8818

8819 In contrast with the `start_day`, you can use `end_day` to take the ceiling

8820 midnight of the largest Timestamp as the end of the bins and drop the bins

8821 not containing data:

8822

8823 >>> ts.resample('17min', origin='end_day').sum()

8824 2000-10-01 23:38:00 3

8825 2000-10-01 23:55:00 15

8826 2000-10-02 00:12:00 45

8827 2000-10-02 00:29:00 45

8828 Freq: 17T, dtype: int64

8829

8830 To replace the use of the deprecated `base` argument, you can now use `offset`,

8831 in this example it is equivalent to have `base=2`:

8832

8833 >>> ts.resample('17min', offset='2min').sum()

8834 2000-10-01 23:16:00 0

8835 2000-10-01 23:33:00 9

8836 2000-10-01 23:50:00 36

8837 2000-10-02 00:07:00 39

8838 2000-10-02 00:24:00 24

8839 Freq: 17T, dtype: int64

8840

8841 To replace the use of the deprecated `loffset` argument:

8842

8843 >>> from pandas.tseries.frequencies import to_offset

8844 >>> loffset = '19min'

8845 >>> ts_out = ts.resample('17min').sum()

8846 >>> ts_out.index = ts_out.index + to_offset(loffset)

8847 >>> ts_out

8848 2000-10-01 23:33:00 0

8849 2000-10-01 23:50:00 9

8850 2000-10-02 00:07:00 21

8851 2000-10-02 00:24:00 54

8852 2000-10-02 00:41:00 24

8853 Freq: 17T, dtype: int64

8854 """

8855 from pandas.core.resample import get_resampler

8856

8857 axis = self._get_axis_number(axis)

8858 return get_resampler(

8859 self,

8860 freq=rule,

8861 label=label,

8862 closed=closed,

8863 axis=axis,

8864 kind=kind,

8865 loffset=loffset,

8866 convention=convention,

8867 base=base,

8868 key=on,

8869 level=level,

8870 origin=origin,

8871 offset=offset,

8872 group_keys=group_keys,

8873 )

8874

8875 @final

8876 def first(self: NDFrameT, offset) -> NDFrameT:

8877 """

8878 Select initial periods of time series data based on a date offset.

8879

8880 When having a DataFrame with dates as index, this function can

8881 select the first few rows based on a date offset.

8882

8883 Parameters

8884 ----------

8885 offset : str, DateOffset or dateutil.relativedelta

8886 The offset length of the data that will be selected. For instance,

8887 '1M' will display all the rows having their index within the first month.

8888

8889 Returns

8890 -------

8891 Series or DataFrame

8892 A subset of the caller.

8893

8894 Raises

8895 ------

8896 TypeError

8897 If the index is not a :class:`DatetimeIndex`

8898

8899 See Also

8900 --------

8901 last : Select final periods of time series based on a date offset.

8902 at_time : Select values at a particular time of the day.

8903 between_time : Select values between particular times of the day.

8904

8905 Examples

8906 --------

8907 >>> i = pd.date_range('2018-04-09', periods=4, freq='2D')

8908 >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i)

8909 >>> ts

8910 A

8911 2018-04-09 1

8912 2018-04-11 2

8913 2018-04-13 3

8914 2018-04-15 4

8915

8916 Get the rows for the first 3 days:

8917

8918 >>> ts.first('3D')

8919 A

8920 2018-04-09 1

8921 2018-04-11 2

8922

8923 Notice the data for 3 first calendar days were returned, not the first

8924 3 days observed in the dataset, and therefore data for 2018-04-13 was

8925 not returned.

8926 """

8927 if not isinstance(self.index, DatetimeIndex):

8928 raise TypeError("'first' only supports a DatetimeIndex index")

8929

8930 if len(self.index) == 0:

8931 return self

8932

8933 offset = to_offset(offset)

8934 if not isinstance(offset, Tick) and offset.is_on_offset(self.index[0]):

8935 # GH#29623 if first value is end of period, remove offset with n = 1

8936 # before adding the real offset

8937 end_date = end = self.index[0] - offset.base + offset

8938 else:

8939 end_date = end = self.index[0] + offset

8940

8941 # Tick-like, e.g. 3 weeks

8942 if isinstance(offset, Tick) and end_date in self.index:

8943 end = self.index.searchsorted(end_date, side="left")

8944 return self.iloc[:end]

8945

8946 return self.loc[:end]

8947

8948 @final

8949 def last(self: NDFrameT, offset) -> NDFrameT:

8950 """

8951 Select final periods of time series data based on a date offset.

8952

8953 For a DataFrame with a sorted DatetimeIndex, this function

8954 selects the last few rows based on a date offset.

8955

8956 Parameters

8957 ----------

8958 offset : str, DateOffset, dateutil.relativedelta

8959 The offset length of the data that will be selected. For instance,

8960 '3D' will display all the rows having their index within the last 3 days.

8961

8962 Returns

8963 -------

8964 Series or DataFrame

8965 A subset of the caller.

8966

8967 Raises

8968 ------

8969 TypeError

8970 If the index is not a :class:`DatetimeIndex`

8971

8972 See Also

8973 --------

8974 first : Select initial periods of time series based on a date offset.

8975 at_time : Select values at a particular time of the day.

8976 between_time : Select values between particular times of the day.

8977

8978 Examples

8979 --------

8980 >>> i = pd.date_range('2018-04-09', periods=4, freq='2D')

8981 >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i)

8982 >>> ts

8983 A

8984 2018-04-09 1

8985 2018-04-11 2

8986 2018-04-13 3

8987 2018-04-15 4

8988

8989 Get the rows for the last 3 days:

8990

8991 >>> ts.last('3D')

8992 A

8993 2018-04-13 3

8994 2018-04-15 4

8995

8996 Notice the data for 3 last calendar days were returned, not the last

8997 3 observed days in the dataset, and therefore data for 2018-04-11 was

8998 not returned.

8999 """

9000 if not isinstance(self.index, DatetimeIndex):

9001 raise TypeError("'last' only supports a DatetimeIndex index")

9002

9003 if len(self.index) == 0:

9004 return self

9005

9006 offset = to_offset(offset)

9007

9008 start_date = self.index[-1] - offset

9009 start = self.index.searchsorted(start_date, side="right")

9010 return self.iloc[start:]

9011

9012 @final

9013 def rank(

9014 self: NDFrameT,

9015 axis=0,

9016 method: str = "average",

9017 numeric_only: bool_t | None | lib.NoDefault = lib.no_default,

9018 na_option: str = "keep",

9019 ascending: bool_t = True,

9020 pct: bool_t = False,

9021 ) -> NDFrameT:

9022 """

9023 Compute numerical data ranks (1 through n) along axis.

9024

9025 By default, equal values are assigned a rank that is the average of the

9026 ranks of those values.

9027

9028 Parameters

9029 ----------

9030 axis : {0 or 'index', 1 or 'columns'}, default 0

9031 Index to direct ranking.

9032 For `Series` this parameter is unused and defaults to 0.

9033 method : {'average', 'min', 'max', 'first', 'dense'}, default 'average'

9034 How to rank the group of records that have the same value (i.e. ties):

9035

9036 * average: average rank of the group

9037 * min: lowest rank in the group

9038 * max: highest rank in the group

9039 * first: ranks assigned in order they appear in the array

9040 * dense: like 'min', but rank always increases by 1 between groups.

9041

9042 numeric_only : bool, optional

9043 For DataFrame objects, rank only numeric columns if set to True.

9044 na_option : {'keep', 'top', 'bottom'}, default 'keep'

9045 How to rank NaN values:

9046

9047 * keep: assign NaN rank to NaN values

9048 * top: assign lowest rank to NaN values

9049 * bottom: assign highest rank to NaN values

9050

9051 ascending : bool, default True

9052 Whether or not the elements should be ranked in ascending order.

9053 pct : bool, default False

9054 Whether or not to display the returned rankings in percentile

9055 form.

9056

9057 Returns

9058 -------

9059 same type as caller

9060 Return a Series or DataFrame with data ranks as values.

9061

9062 See Also

9063 --------

9064 core.groupby.GroupBy.rank : Rank of values within each group.

9065

9066 Examples

9067 --------

9068 >>> df = pd.DataFrame(data={'Animal': ['cat', 'penguin', 'dog',

9069 ... 'spider', 'snake'],

9070 ... 'Number_legs': [4, 2, 4, 8, np.nan]})

9071 >>> df

9072 Animal Number_legs

9073 0 cat 4.0

9074 1 penguin 2.0

9075 2 dog 4.0

9076 3 spider 8.0

9077 4 snake NaN

9078

9079 Ties are assigned the mean of the ranks (by default) for the group.

9080

9081 >>> s = pd.Series(range(5), index=list("abcde"))

9082 >>> s["d"] = s["b"]

9083 >>> s.rank()

9084 a 1.0

9085 b 2.5

9086 c 4.0

9087 d 2.5

9088 e 5.0

9089 dtype: float64

9090

9091 The following example shows how the method behaves with the above

9092 parameters:

9093

9094 * default_rank: this is the default behaviour obtained without using

9095 any parameter.

9096 * max_rank: setting ``method = 'max'`` the records that have the

9097 same values are ranked using the highest rank (e.g.: since 'cat'

9098 and 'dog' are both in the 2nd and 3rd position, rank 3 is assigned.)

9099 * NA_bottom: choosing ``na_option = 'bottom'``, if there are records

9100 with NaN values they are placed at the bottom of the ranking.

9101 * pct_rank: when setting ``pct = True``, the ranking is expressed as

9102 percentile rank.

9103

9104 >>> df['default_rank'] = df['Number_legs'].rank()

9105 >>> df['max_rank'] = df['Number_legs'].rank(method='max')

9106 >>> df['NA_bottom'] = df['Number_legs'].rank(na_option='bottom')

9107 >>> df['pct_rank'] = df['Number_legs'].rank(pct=True)

9108 >>> df

9109 Animal Number_legs default_rank max_rank NA_bottom pct_rank

9110 0 cat 4.0 2.5 3.0 2.5 0.625

9111 1 penguin 2.0 1.0 1.0 1.0 0.250

9112 2 dog 4.0 2.5 3.0 2.5 0.625

9113 3 spider 8.0 4.0 4.0 4.0 1.000

9114 4 snake NaN NaN NaN 5.0 NaN

9115 """

9116 warned = False

9117 if numeric_only is None:

9118 # GH#45036

9119 warnings.warn(

9120 f"'numeric_only=None' in {type(self).__name__}.rank is deprecated "

9121 "and will raise in a future version. Pass either 'True' or "

9122 "'False'. 'False' will be the default.",

9123 FutureWarning,

9124 stacklevel=find_stack_level(),

9125 )

9126 warned = True

9127 elif numeric_only is lib.no_default:

9128 numeric_only = None

9129

9130 axis = self._get_axis_number(axis)

9131

9132 if na_option not in {"keep", "top", "bottom"}:

9133 msg = "na_option must be one of 'keep', 'top', or 'bottom'"

9134 raise ValueError(msg)

9135

9136 def ranker(data):

9137 if data.ndim == 2:

9138 # i.e. DataFrame, we cast to ndarray

9139 values = data.values

9140 else:

9141 # i.e. Series, can dispatch to EA

9142 values = data._values

9143

9144 if isinstance(values, ExtensionArray):

9145 ranks = values._rank(

9146 axis=axis,

9147 method=method,

9148 ascending=ascending,

9149 na_option=na_option,

9150 pct=pct,

9151 )

9152 else:

9153 ranks = algos.rank(

9154 values,

9155 axis=axis,

9156 method=method,

9157 ascending=ascending,

9158 na_option=na_option,

9159 pct=pct,

9160 )

9161

9162 ranks_obj = self._constructor(ranks, **data._construct_axes_dict())

9163 return ranks_obj.__finalize__(self, method="rank")

9164

9165 # if numeric_only is None, and we can't get anything, we try with

9166 # numeric_only=True

9167 if numeric_only is None:

9168 try:

9169 return ranker(self)

9170 except TypeError:

9171 numeric_only = True

9172 if not warned:

9173 # Only warn here if we didn't already issue a warning above

9174 # GH#45036

9175 warnings.warn(

9176 f"Dropping of nuisance columns in {type(self).__name__}.rank "

9177 "is deprecated; in a future version this will raise TypeError. "

9178 "Select only valid columns before calling rank.",

9179 FutureWarning,

9180 stacklevel=find_stack_level(),

9181 )

9182

9183 if numeric_only:

9184 if self.ndim == 1 and not is_numeric_dtype(self.dtype):

9185 # GH#47500

9186 warnings.warn(

9187 f"Calling Series.rank with numeric_only={numeric_only} and dtype "

9188 f"{self.dtype} is deprecated and will raise a TypeError in a "

9189 "future version of pandas",

9190 category=FutureWarning,

9191 stacklevel=find_stack_level(),

9192 )

9193 data = self._get_numeric_data()

9194 else:

9195 data = self

9196

9197 return ranker(data)

9198

9199 @doc(_shared_docs["compare"], klass=_shared_doc_kwargs["klass"])

9200 def compare(

9201 self,

9202 other,

9203 align_axis: Axis = 1,

9204 keep_shape: bool_t = False,

9205 keep_equal: bool_t = False,

9206 result_names: Suffixes = ("self", "other"),

9207 ):

9208 from pandas.core.reshape.concat import concat

9209

9210 if type(self) is not type(other):

9211 cls_self, cls_other = type(self).__name__, type(other).__name__

9212 raise TypeError(

9213 f"can only compare '{cls_self}' (not '{cls_other}') with '{cls_self}'"

9214 )

9215

9216 mask = ~((self == other) | (self.isna() & other.isna()))

9217

9218 if not keep_equal:

9219 self = self.where(mask)

9220 other = other.where(mask)

9221

9222 if not keep_shape:

9223 if isinstance(self, ABCDataFrame):

9224 cmask = mask.any()

9225 rmask = mask.any(axis=1)

9226 self = self.loc[rmask, cmask]

9227 other = other.loc[rmask, cmask]

9228 else:

9229 self = self[mask]

9230 other = other[mask]

9231 if not isinstance(result_names, tuple):

9232 raise TypeError(

9233 f"Passing 'result_names' as a {type(result_names)} is not "

9234 "supported. Provide 'result_names' as a tuple instead."

9235 )

9236

9237 if align_axis in (1, "columns"): # This is needed for Series

9238 axis = 1

9239 else:

9240 axis = self._get_axis_number(align_axis)

9241

9242 diff = concat([self, other], axis=axis, keys=result_names)

9243

9244 if axis >= self.ndim:

9245 # No need to reorganize data if stacking on new axis

9246 # This currently applies for stacking two Series on columns

9247 return diff

9248

9249 ax = diff._get_axis(axis)

9250 ax_names = np.array(ax.names)

9251

9252 # set index names to positions to avoid confusion

9253 ax.names = np.arange(len(ax_names))

9254

9255 # bring self-other to inner level

9256 order = list(range(1, ax.nlevels)) + [0]

9257 if isinstance(diff, ABCDataFrame):

9258 diff = diff.reorder_levels(order, axis=axis)

9259 else:

9260 diff = diff.reorder_levels(order)

9261

9262 # restore the index names in order

9263 diff._get_axis(axis=axis).names = ax_names[order]

9264

9265 # reorder axis to keep things organized

9266 indices = (

9267 np.arange(diff.shape[axis]).reshape([2, diff.shape[axis] // 2]).T.flatten()

9268 )

9269 diff = diff.take(indices, axis=axis)

9270

9271 return diff

9272

9273 @doc(**_shared_doc_kwargs)

9274 def align(

9275 self: NDFrameT,

9276 other: NDFrameT,

9277 join: Literal["outer", "inner", "left", "right"] = "outer",

9278 axis: Axis | None = None,

9279 level: Level = None,

9280 copy: bool_t = True,

9281 fill_value: Hashable = None,

9282 method: FillnaOptions | None = None,

9283 limit: int | None = None,

9284 fill_axis: Axis = 0,

9285 broadcast_axis: Axis | None = None,

9286 ) -> NDFrameT:

9287 """

9288 Align two objects on their axes with the specified join method.

9289

9290 Join method is specified for each axis Index.

9291

9292 Parameters

9293 ----------

9294 other : DataFrame or Series

9295 join : {{'outer', 'inner', 'left', 'right'}}, default 'outer'

9296 axis : allowed axis of the other object, default None

9297 Align on index (0), columns (1), or both (None).

9298 level : int or level name, default None

9299 Broadcast across a level, matching Index values on the

9300 passed MultiIndex level.

9301 copy : bool, default True

9302 Always returns new objects. If copy=False and no reindexing is

9303 required then original objects are returned.

9304 fill_value : scalar, default np.NaN

9305 Value to use for missing values. Defaults to NaN, but can be any

9306 "compatible" value.

9307 method : {{'backfill', 'bfill', 'pad', 'ffill', None}}, default None

9308 Method to use for filling holes in reindexed Series:

9309

9310 - pad / ffill: propagate last valid observation forward to next valid.

9311 - backfill / bfill: use NEXT valid observation to fill gap.

9312

9313 limit : int, default None

9314 If method is specified, this is the maximum number of consecutive

9315 NaN values to forward/backward fill. In other words, if there is

9316 a gap with more than this number of consecutive NaNs, it will only

9317 be partially filled. If method is not specified, this is the

9318 maximum number of entries along the entire axis where NaNs will be

9319 filled. Must be greater than 0 if not None.

9320 fill_axis : {axes_single_arg}, default 0

9321 Filling axis, method and limit.

9322 broadcast_axis : {axes_single_arg}, default None

9323 Broadcast values along this axis, if aligning two objects of

9324 different dimensions.

9325

9326 Returns

9327 -------

9328 (left, right) : ({klass}, type of other)

9329 Aligned objects.

9330

9331 Examples

9332 --------

9333 >>> df = pd.DataFrame(

9334 ... [[1, 2, 3, 4], [6, 7, 8, 9]], columns=["D", "B", "E", "A"], index=[1, 2]

9335 ... )

9336 >>> other = pd.DataFrame(

9337 ... [[10, 20, 30, 40], [60, 70, 80, 90], [600, 700, 800, 900]],

9338 ... columns=["A", "B", "C", "D"],

9339 ... index=[2, 3, 4],

9340 ... )

9341 >>> df

9342 D B E A

9343 1 1 2 3 4

9344 2 6 7 8 9

9345 >>> other

9346 A B C D

9347 2 10 20 30 40

9348 3 60 70 80 90

9349 4 600 700 800 900

9350

9351 Align on columns:

9352

9353 >>> left, right = df.align(other, join="outer", axis=1)

9354 >>> left

9355 A B C D E

9356 1 4 2 NaN 1 3

9357 2 9 7 NaN 6 8

9358 >>> right

9359 A B C D E

9360 2 10 20 30 40 NaN

9361 3 60 70 80 90 NaN

9362 4 600 700 800 900 NaN

9363

9364 We can also align on the index:

9365

9366 >>> left, right = df.align(other, join="outer", axis=0)

9367 >>> left

9368 D B E A

9369 1 1.0 2.0 3.0 4.0

9370 2 6.0 7.0 8.0 9.0

9371 3 NaN NaN NaN NaN

9372 4 NaN NaN NaN NaN

9373 >>> right

9374 A B C D

9375 1 NaN NaN NaN NaN

9376 2 10.0 20.0 30.0 40.0

9377 3 60.0 70.0 80.0 90.0

9378 4 600.0 700.0 800.0 900.0

9379

9380 Finally, the default `axis=None` will align on both index and columns:

9381

9382 >>> left, right = df.align(other, join="outer", axis=None)

9383 >>> left

9384 A B C D E

9385 1 4.0 2.0 NaN 1.0 3.0

9386 2 9.0 7.0 NaN 6.0 8.0

9387 3 NaN NaN NaN NaN NaN

9388 4 NaN NaN NaN NaN NaN

9389 >>> right

9390 A B C D E

9391 1 NaN NaN NaN NaN NaN

9392 2 10.0 20.0 30.0 40.0 NaN

9393 3 60.0 70.0 80.0 90.0 NaN

9394 4 600.0 700.0 800.0 900.0 NaN

9395 """

9396

9397 method = missing.clean_fill_method(method)

9398

9399 if broadcast_axis == 1 and self.ndim != other.ndim:

9400 if isinstance(self, ABCSeries):

9401 # this means other is a DataFrame, and we need to broadcast

9402 # self

9403 cons = self._constructor_expanddim

9404 df = cons(

9405 {c: self for c in other.columns}, **other._construct_axes_dict()

9406 )

9407 return df._align_frame(

9408 other,

9409 join=join,

9410 axis=axis,

9411 level=level,

9412 copy=copy,

9413 fill_value=fill_value,

9414 method=method,

9415 limit=limit,

9416 fill_axis=fill_axis,

9417 )

9418 elif isinstance(other, ABCSeries):

9419 # this means self is a DataFrame, and we need to broadcast

9420 # other

9421 cons = other._constructor_expanddim

9422 df = cons(

9423 {c: other for c in self.columns}, **self._construct_axes_dict()

9424 )

9425 return self._align_frame(

9426 df,

9427 join=join,

9428 axis=axis,

9429 level=level,

9430 copy=copy,

9431 fill_value=fill_value,

9432 method=method,

9433 limit=limit,

9434 fill_axis=fill_axis,

9435 )

9436

9437 if axis is not None:

9438 axis = self._get_axis_number(axis)

9439 if isinstance(other, ABCDataFrame):

9440 return self._align_frame(

9441 other,

9442 join=join,

9443 axis=axis,

9444 level=level,

9445 copy=copy,

9446 fill_value=fill_value,

9447 method=method,

9448 limit=limit,

9449 fill_axis=fill_axis,

9450 )

9451 elif isinstance(other, ABCSeries):

9452 return self._align_series(

9453 other,

9454 join=join,

9455 axis=axis,

9456 level=level,

9457 copy=copy,

9458 fill_value=fill_value,

9459 method=method,

9460 limit=limit,

9461 fill_axis=fill_axis,

9462 )

9463 else: # pragma: no cover

9464 raise TypeError(f"unsupported type: {type(other)}")

9465

9466 @final

9467 def _align_frame(

9468 self,

9469 other,

9470 join="outer",

9471 axis=None,

9472 level=None,

9473 copy: bool_t = True,

9474 fill_value=None,

9475 method=None,

9476 limit=None,

9477 fill_axis=0,

9478 ):

9479 # defaults

9480 join_index, join_columns = None, None

9481 ilidx, iridx = None, None

9482 clidx, cridx = None, None

9483

9484 is_series = isinstance(self, ABCSeries)

9485

9486 if (axis is None or axis == 0) and not self.index.equals(other.index):

9487 join_index, ilidx, iridx = self.index.join(

9488 other.index, how=join, level=level, return_indexers=True

9489 )

9490

9491 if (

9492 (axis is None or axis == 1)

9493 and not is_series

9494 and not self.columns.equals(other.columns)

9495 ):

9496 join_columns, clidx, cridx = self.columns.join(

9497 other.columns, how=join, level=level, return_indexers=True

9498 )

9499

9500 if is_series:

9501 reindexers = {0: [join_index, ilidx]}

9502 else:

9503 reindexers = {0: [join_index, ilidx], 1: [join_columns, clidx]}

9504

9505 left = self._reindex_with_indexers(

9506 reindexers, copy=copy, fill_value=fill_value, allow_dups=True

9507 )

9508 # other must be always DataFrame

9509 right = other._reindex_with_indexers(

9510 {0: [join_index, iridx], 1: [join_columns, cridx]},

9511 copy=copy,

9512 fill_value=fill_value,

9513 allow_dups=True,

9514 )

9515

9516 if method is not None:

9517 _left = left.fillna(method=method, axis=fill_axis, limit=limit)

9518 assert _left is not None # needed for mypy

9519 left = _left

9520 right = right.fillna(method=method, axis=fill_axis, limit=limit)

9521

9522 # if DatetimeIndex have different tz, convert to UTC

9523 left, right = _align_as_utc(left, right, join_index)

9524

9525 return (

9526 left.__finalize__(self),

9527 right.__finalize__(other),

9528 )

9529

9530 @final

9531 def _align_series(

9532 self,

9533 other,

9534 join="outer",

9535 axis=None,

9536 level=None,

9537 copy: bool_t = True,

9538 fill_value=None,

9539 method=None,

9540 limit=None,

9541 fill_axis=0,

9542 ):

9543

9544 is_series = isinstance(self, ABCSeries)

9545

9546 if (not is_series and axis is None) or axis not in [None, 0, 1]:

9547 raise ValueError("Must specify axis=0 or 1")

9548

9549 if is_series and axis == 1:

9550 raise ValueError("cannot align series to a series other than axis 0")

9551

9552 # series/series compat, other must always be a Series

9553 if not axis:

9554

9555 # equal

9556 if self.index.equals(other.index):

9557 join_index, lidx, ridx = None, None, None

9558 else:

9559 join_index, lidx, ridx = self.index.join(

9560 other.index, how=join, level=level, return_indexers=True

9561 )

9562

9563 if is_series:

9564 left = self._reindex_indexer(join_index, lidx, copy)

9565 elif lidx is None or join_index is None:

9566 left = self.copy() if copy else self

9567 else:

9568 left = self._constructor(

9569 self._mgr.reindex_indexer(join_index, lidx, axis=1, copy=copy)

9570 )

9571

9572 right = other._reindex_indexer(join_index, ridx, copy)

9573

9574 else:

9575

9576 # one has > 1 ndim

9577 fdata = self._mgr

9578 join_index = self.axes[1]

9579 lidx, ridx = None, None

9580 if not join_index.equals(other.index):

9581 join_index, lidx, ridx = join_index.join(

9582 other.index, how=join, level=level, return_indexers=True

9583 )

9584

9585 if lidx is not None:

9586 bm_axis = self._get_block_manager_axis(1)

9587 fdata = fdata.reindex_indexer(join_index, lidx, axis=bm_axis)

9588

9589 if copy and fdata is self._mgr:

9590 fdata = fdata.copy()

9591

9592 left = self._constructor(fdata)

9593

9594 if ridx is None:

9595 right = other

9596 else:

9597 right = other.reindex(join_index, level=level)

9598

9599 # fill

9600 fill_na = notna(fill_value) or (method is not None)

9601 if fill_na:

9602 left = left.fillna(fill_value, method=method, limit=limit, axis=fill_axis)

9603 right = right.fillna(fill_value, method=method, limit=limit)

9604

9605 # if DatetimeIndex have different tz, convert to UTC

9606 if is_series or (not is_series and axis == 0):

9607 left, right = _align_as_utc(left, right, join_index)

9608

9609 return (

9610 left.__finalize__(self),

9611 right.__finalize__(other),

9612 )

9613

9614 @final

9615 def _where(

9616 self,

9617 cond,

9618 other=lib.no_default,

9619 inplace=False,

9620 axis=None,

9621 level=None,

9622 ):

9623 """

9624 Equivalent to public method `where`, except that `other` is not

9625 applied as a function even if callable. Used in __setitem__.

9626 """

9627 inplace = validate_bool_kwarg(inplace, "inplace")

9628

9629 if axis is not None:

9630 axis = self._get_axis_number(axis)

9631

9632 # align the cond to same shape as myself

9633 cond = com.apply_if_callable(cond, self)

9634 if isinstance(cond, NDFrame):

9635 cond, _ = cond.align(self, join="right", broadcast_axis=1, copy=False)

9636 else:

9637 if not hasattr(cond, "shape"):

9638 cond = np.asanyarray(cond)

9639 if cond.shape != self.shape:

9640 raise ValueError("Array conditional must be same shape as self")

9641 cond = self._constructor(cond, **self._construct_axes_dict())

9642

9643 # make sure we are boolean

9644 fill_value = bool(inplace)

9645 cond = cond.fillna(fill_value)

9646

9647 msg = "Boolean array expected for the condition, not {dtype}"

9648

9649 if not cond.empty:

9650 if not isinstance(cond, ABCDataFrame):

9651 # This is a single-dimensional object.

9652 if not is_bool_dtype(cond):

9653 raise ValueError(msg.format(dtype=cond.dtype))

9654 else:

9655 for dt in cond.dtypes:

9656 if not is_bool_dtype(dt):

9657 raise ValueError(msg.format(dtype=dt))

9658 else:

9659 # GH#21947 we have an empty DataFrame/Series, could be object-dtype

9660 cond = cond.astype(bool)

9661

9662 cond = -cond if inplace else cond

9663 cond = cond.reindex(self._info_axis, axis=self._info_axis_number, copy=False)

9664

9665 # try to align with other

9666 if isinstance(other, NDFrame):

9667

9668 # align with me

9669 if other.ndim <= self.ndim:

9670

9671 _, other = self.align(

9672 other,

9673 join="left",

9674 axis=axis,

9675 level=level,

9676 fill_value=None,

9677 copy=False,

9678 )

9679

9680 # if we are NOT aligned, raise as we cannot where index

9681 if axis is None and not other._indexed_same(self):

9682 raise InvalidIndexError

9683

9684 elif other.ndim < self.ndim:

9685 # TODO(EA2D): avoid object-dtype cast in EA case GH#38729

9686 other = other._values

9687 if axis == 0:

9688 other = np.reshape(other, (-1, 1))

9689 elif axis == 1:

9690 other = np.reshape(other, (1, -1))

9691

9692 other = np.broadcast_to(other, self.shape)

9693

9694 # slice me out of the other

9695 else:

9696 raise NotImplementedError(

9697 "cannot align with a higher dimensional NDFrame"

9698 )

9699

9700 elif not isinstance(other, (MultiIndex, NDFrame)):

9701 # mainly just catching Index here

9702 other = extract_array(other, extract_numpy=True)

9703

9704 if isinstance(other, (np.ndarray, ExtensionArray)):

9705

9706 if other.shape != self.shape:

9707 if self.ndim != 1:

9708 # In the ndim == 1 case we may have

9709 # other length 1, which we treat as scalar (GH#2745, GH#4192)

9710 # or len(other) == icond.sum(), which we treat like

9711 # __setitem__ (GH#3235)

9712 raise ValueError(

9713 "other must be the same shape as self when an ndarray"

9714 )

9715

9716 # we are the same shape, so create an actual object for alignment

9717 else:

9718 other = self._constructor(other, **self._construct_axes_dict())

9719

9720 if axis is None:

9721 axis = 0

9722

9723 if self.ndim == getattr(other, "ndim", 0):

9724 align = True

9725 else:

9726 align = self._get_axis_number(axis) == 1

9727

9728 if inplace:

9729 # we may have different type blocks come out of putmask, so

9730 # reconstruct the block manager

9731

9732 self._check_inplace_setting(other)

9733 new_data = self._mgr.putmask(mask=cond, new=other, align=align)

9734 result = self._constructor(new_data)

9735 return self._update_inplace(result)

9736

9737 else:

9738 new_data = self._mgr.where(

9739 other=other,

9740 cond=cond,

9741 align=align,

9742 )

9743 result = self._constructor(new_data)

9744 return result.__finalize__(self)

9745

9746 @overload

9747 def where(

9748 self: NDFrameT,

9749 cond,

9750 other=...,

9751 *,

9752 inplace: Literal[False] = ...,

9753 axis: Axis | None = ...,

9754 level: Level = ...,

9755 errors: IgnoreRaise | lib.NoDefault = ...,

9756 try_cast: bool_t | lib.NoDefault = ...,

9757 ) -> NDFrameT:

9758 ...

9759

9760 @overload

9761 def where(

9762 self,

9763 cond,

9764 other=...,

9765 *,

9766 inplace: Literal[True],

9767 axis: Axis | None = ...,

9768 level: Level = ...,

9769 errors: IgnoreRaise | lib.NoDefault = ...,

9770 try_cast: bool_t | lib.NoDefault = ...,

9771 ) -> None:

9772 ...

9773

9774 @overload

9775 def where(

9776 self: NDFrameT,

9777 cond,

9778 other=...,

9779 *,

9780 inplace: bool_t = ...,

9781 axis: Axis | None = ...,

9782 level: Level = ...,

9783 errors: IgnoreRaise | lib.NoDefault = ...,

9784 try_cast: bool_t | lib.NoDefault = ...,

9785 ) -> NDFrameT | None:

9786 ...

9787

9788 @deprecate_kwarg(old_arg_name="errors", new_arg_name=None)

9789 @deprecate_nonkeyword_arguments(

9790 version=None, allowed_args=["self", "cond", "other"]

9791 )

9792 @doc(

9793 klass=_shared_doc_kwargs["klass"],

9794 cond="True",

9795 cond_rev="False",

9796 name="where",

9797 name_other="mask",

9798 )

9799 def where(

9800 self: NDFrameT,

9801 cond,

9802 other=np.nan,

9803 inplace: bool_t = False,

9804 axis: Axis | None = None,

9805 level: Level = None,

9806 errors: IgnoreRaise | lib.NoDefault = "raise",

9807 try_cast: bool_t | lib.NoDefault = lib.no_default,

9808 ) -> NDFrameT | None:

9809 """

9810 Replace values where the condition is {cond_rev}.

9811

9812 Parameters

9813 ----------

9814 cond : bool {klass}, array-like, or callable

9815 Where `cond` is {cond}, keep the original value. Where

9816 {cond_rev}, replace with corresponding value from `other`.

9817 If `cond` is callable, it is computed on the {klass} and

9818 should return boolean {klass} or array. The callable must

9819 not change input {klass} (though pandas doesn't check it).

9820 other : scalar, {klass}, or callable

9821 Entries where `cond` is {cond_rev} are replaced with

9822 corresponding value from `other`.

9823 If other is callable, it is computed on the {klass} and

9824 should return scalar or {klass}. The callable must not

9825 change input {klass} (though pandas doesn't check it).

9826 inplace : bool, default False

9827 Whether to perform the operation in place on the data.

9828 axis : int, default None

9829 Alignment axis if needed. For `Series` this parameter is

9830 unused and defaults to 0.

9831 level : int, default None

9832 Alignment level if needed.

9833 errors : str, {{'raise', 'ignore'}}, default 'raise'

9834 Note that currently this parameter won't affect

9835 the results and will always coerce to a suitable dtype.

9836

9837 - 'raise' : allow exceptions to be raised.

9838 - 'ignore' : suppress exceptions. On error return original object.

9839

9840 .. deprecated:: 1.5.0

9841 This argument had no effect.

9842

9843 try_cast : bool, default None

9844 Try to cast the result back to the input type (if possible).

9845

9846 .. deprecated:: 1.3.0

9847 Manually cast back if necessary.

9848

9849 Returns

9850 -------

9851 Same type as caller or None if ``inplace=True``.

9852

9853 See Also

9854 --------

9855 :func:`DataFrame.{name_other}` : Return an object of same shape as

9856 self.

9857

9858 Notes

9859 -----

9860 The {name} method is an application of the if-then idiom. For each

9861 element in the calling DataFrame, if ``cond`` is ``{cond}`` the

9862 element is used; otherwise the corresponding element from the DataFrame

9863 ``other`` is used. If the axis of ``other`` does not align with axis of

9864 ``cond`` {klass}, the misaligned index positions will be filled with

9865 {cond_rev}.

9866

9867 The signature for :func:`DataFrame.where` differs from

9868 :func:`numpy.where`. Roughly ``df1.where(m, df2)`` is equivalent to

9869 ``np.where(m, df1, df2)``.

9870

9871 For further details and examples see the ``{name}`` documentation in

9872 :ref:`indexing <indexing.where_mask>`.

9873

9874 The dtype of the object takes precedence. The fill value is casted to

9875 the object's dtype, if this can be done losslessly.

9876

9877 Examples

9878 --------

9879 >>> s = pd.Series(range(5))

9880 >>> s.where(s > 0)

9881 0 NaN

9882 1 1.0

9883 2 2.0

9884 3 3.0

9885 4 4.0

9886 dtype: float64

9887 >>> s.mask(s > 0)

9888 0 0.0

9889 1 NaN

9890 2 NaN

9891 3 NaN

9892 4 NaN

9893 dtype: float64

9894

9895 >>> s = pd.Series(range(5))

9896 >>> t = pd.Series([True, False])

9897 >>> s.where(t, 99)

9898 0 0

9899 1 99

9900 2 99

9901 3 99

9902 4 99

9903 dtype: int64

9904 >>> s.mask(t, 99)

9905 0 99

9906 1 1

9907 2 99

9908 3 99

9909 4 99

9910 dtype: int64

9911

9912 >>> s.where(s > 1, 10)

9913 0 10

9914 1 10

9915 2 2

9916 3 3

9917 4 4

9918 dtype: int64

9919 >>> s.mask(s > 1, 10)

9920 0 0

9921 1 1

9922 2 10

9923 3 10

9924 4 10

9925 dtype: int64

9926

9927 >>> df = pd.DataFrame(np.arange(10).reshape(-1, 2), columns=['A', 'B'])

9928 >>> df

9929 A B

9930 0 0 1

9931 1 2 3

9932 2 4 5

9933 3 6 7

9934 4 8 9

9935 >>> m = df % 3 == 0

9936 >>> df.where(m, -df)

9937 A B

9938 0 0 -1

9939 1 -2 3

9940 2 -4 -5

9941 3 6 -7

9942 4 -8 9

9943 >>> df.where(m, -df) == np.where(m, df, -df)

9944 A B

9945 0 True True

9946 1 True True

9947 2 True True

9948 3 True True

9949 4 True True

9950 >>> df.where(m, -df) == df.mask(~m, -df)

9951 A B

9952 0 True True

9953 1 True True

9954 2 True True

9955 3 True True

9956 4 True True

9957 """

9958 other = com.apply_if_callable(other, self)

9959

9960 if try_cast is not lib.no_default:

9961 warnings.warn(

9962 "try_cast keyword is deprecated and will be removed in a "

9963 "future version.",

9964 FutureWarning,

9965 stacklevel=find_stack_level(),

9966 )

9967

9968 return self._where(cond, other, inplace, axis, level)

9969

9970 @overload

9971 def mask(

9972 self: NDFrameT,

9973 cond,

9974 other=...,

9975 *,

9976 inplace: Literal[False] = ...,

9977 axis: Axis | None = ...,

9978 level: Level = ...,

9979 errors: IgnoreRaise | lib.NoDefault = ...,

9980 try_cast: bool_t | lib.NoDefault = ...,

9981 ) -> NDFrameT:

9982 ...

9983

9984 @overload

9985 def mask(

9986 self,

9987 cond,

9988 other=...,

9989 *,

9990 inplace: Literal[True],

9991 axis: Axis | None = ...,

9992 level: Level = ...,

9993 errors: IgnoreRaise | lib.NoDefault = ...,

9994 try_cast: bool_t | lib.NoDefault = ...,

9995 ) -> None:

9996 ...

9997

9998 @overload

9999 def mask(

10000 self: NDFrameT,

10001 cond,

10002 other=...,

10003 *,

10004 inplace: bool_t = ...,

10005 axis: Axis | None = ...,

10006 level: Level = ...,

10007 errors: IgnoreRaise | lib.NoDefault = ...,

10008 try_cast: bool_t | lib.NoDefault = ...,

10009 ) -> NDFrameT | None:

10010 ...

10011

10012 @deprecate_kwarg(old_arg_name="errors", new_arg_name=None)

10013 @deprecate_nonkeyword_arguments(

10014 version=None, allowed_args=["self", "cond", "other"]

10015 )

10016 @doc(

10017 where,

10018 klass=_shared_doc_kwargs["klass"],

10019 cond="False",

10020 cond_rev="True",

10021 name="mask",

10022 name_other="where",

10023 )

10024 def mask(

10025 self: NDFrameT,

10026 cond,

10027 other=np.nan,

10028 inplace: bool_t = False,

10029 axis: Axis | None = None,

10030 level: Level = None,

10031 errors: IgnoreRaise | lib.NoDefault = "raise",

10032 try_cast: bool_t | lib.NoDefault = lib.no_default,

10033 ) -> NDFrameT | None:

10034

10035 inplace = validate_bool_kwarg(inplace, "inplace")

10036 cond = com.apply_if_callable(cond, self)

10037

10038 if try_cast is not lib.no_default:

10039 warnings.warn(

10040 "try_cast keyword is deprecated and will be removed in a "

10041 "future version.",

10042 FutureWarning,

10043 stacklevel=find_stack_level(),

10044 )

10045

10046 # see gh-21891

10047 if not hasattr(cond, "__invert__"):

10048 cond = np.array(cond)

10049

10050 return self.where(

10051 ~cond,

10052 other=other,

10053 inplace=inplace,

10054 axis=axis,

10055 level=level,

10056 )

10057

10058 @doc(klass=_shared_doc_kwargs["klass"])

10059 def shift(

10060 self: NDFrameT,

10061 periods: int = 1,

10062 freq=None,

10063 axis: Axis = 0,

10064 fill_value: Hashable = None,

10065 ) -> NDFrameT:

10066 """

10067 Shift index by desired number of periods with an optional time `freq`.

10068

10069 When `freq` is not passed, shift the index without realigning the data.

10070 If `freq` is passed (in this case, the index must be date or datetime,

10071 or it will raise a `NotImplementedError`), the index will be

10072 increased using the periods and the `freq`. `freq` can be inferred

10073 when specified as "infer" as long as either freq or inferred_freq

10074 attribute is set in the index.

10075

10076 Parameters

10077 ----------

10078 periods : int

10079 Number of periods to shift. Can be positive or negative.

10080 freq : DateOffset, tseries.offsets, timedelta, or str, optional

10081 Offset to use from the tseries module or time rule (e.g. 'EOM').

10082 If `freq` is specified then the index values are shifted but the

10083 data is not realigned. That is, use `freq` if you would like to

10084 extend the index when shifting and preserve the original data.

10085 If `freq` is specified as "infer" then it will be inferred from

10086 the freq or inferred_freq attributes of the index. If neither of

10087 those attributes exist, a ValueError is thrown.

10088 axis : {{0 or 'index', 1 or 'columns', None}}, default None

10089 Shift direction. For `Series` this parameter is unused and defaults to 0.

10090 fill_value : object, optional

10091 The scalar value to use for newly introduced missing values.

10092 the default depends on the dtype of `self`.

10093 For numeric data, ``np.nan`` is used.

10094 For datetime, timedelta, or period data, etc. :attr:`NaT` is used.

10095 For extension dtypes, ``self.dtype.na_value`` is used.

10096

10097 .. versionchanged:: 1.1.0

10098

10099 Returns

10100 -------

10101 {klass}

10102 Copy of input object, shifted.

10103

10104 See Also

10105 --------

10106 Index.shift : Shift values of Index.

10107 DatetimeIndex.shift : Shift values of DatetimeIndex.

10108 PeriodIndex.shift : Shift values of PeriodIndex.

10109 tshift : Shift the time index, using the index's frequency if

10110 available.

10111

10112 Examples

10113 --------

10114 >>> df = pd.DataFrame({{"Col1": [10, 20, 15, 30, 45],

10115 ... "Col2": [13, 23, 18, 33, 48],

10116 ... "Col3": [17, 27, 22, 37, 52]}},

10117 ... index=pd.date_range("2020-01-01", "2020-01-05"))

10118 >>> df

10119 Col1 Col2 Col3

10120 2020-01-01 10 13 17

10121 2020-01-02 20 23 27

10122 2020-01-03 15 18 22

10123 2020-01-04 30 33 37

10124 2020-01-05 45 48 52

10125

10126 >>> df.shift(periods=3)

10127 Col1 Col2 Col3

10128 2020-01-01 NaN NaN NaN

10129 2020-01-02 NaN NaN NaN

10130 2020-01-03 NaN NaN NaN

10131 2020-01-04 10.0 13.0 17.0

10132 2020-01-05 20.0 23.0 27.0

10133

10134 >>> df.shift(periods=1, axis="columns")

10135 Col1 Col2 Col3

10136 2020-01-01 NaN 10 13

10137 2020-01-02 NaN 20 23

10138 2020-01-03 NaN 15 18

10139 2020-01-04 NaN 30 33

10140 2020-01-05 NaN 45 48

10141

10142 >>> df.shift(periods=3, fill_value=0)

10143 Col1 Col2 Col3

10144 2020-01-01 0 0 0

10145 2020-01-02 0 0 0

10146 2020-01-03 0 0 0

10147 2020-01-04 10 13 17

10148 2020-01-05 20 23 27

10149

10150 >>> df.shift(periods=3, freq="D")

10151 Col1 Col2 Col3

10152 2020-01-04 10 13 17

10153 2020-01-05 20 23 27

10154 2020-01-06 15 18 22

10155 2020-01-07 30 33 37

10156 2020-01-08 45 48 52

10157

10158 >>> df.shift(periods=3, freq="infer")

10159 Col1 Col2 Col3

10160 2020-01-04 10 13 17

10161 2020-01-05 20 23 27

10162 2020-01-06 15 18 22

10163 2020-01-07 30 33 37

10164 2020-01-08 45 48 52

10165 """

10166 if periods == 0:

10167 return self.copy()

10168

10169 if freq is None:

10170 # when freq is None, data is shifted, index is not

10171 axis = self._get_axis_number(axis)

10172 new_data = self._mgr.shift(

10173 periods=periods, axis=axis, fill_value=fill_value

10174 )

10175 return self._constructor(new_data).__finalize__(self, method="shift")

10176

10177 # when freq is given, index is shifted, data is not

10178 index = self._get_axis(axis)

10179

10180 if freq == "infer":

10181 freq = getattr(index, "freq", None)

10182

10183 if freq is None:

10184 freq = getattr(index, "inferred_freq", None)

10185

10186 if freq is None:

10187 msg = "Freq was not set in the index hence cannot be inferred"

10188 raise ValueError(msg)

10189

10190 elif isinstance(freq, str):

10191 freq = to_offset(freq)

10192

10193 if isinstance(index, PeriodIndex):

10194 orig_freq = to_offset(index.freq)

10195 if freq != orig_freq:

10196 assert orig_freq is not None # for mypy

10197 raise ValueError(

10198 f"Given freq {freq.rule_code} does not match "

10199 f"PeriodIndex freq {orig_freq.rule_code}"

10200 )

10201 new_ax = index.shift(periods)

10202 else:

10203 new_ax = index.shift(periods, freq)

10204

10205 result = self.set_axis(new_ax, axis=axis)

10206 return result.__finalize__(self, method="shift")

10207

10208 @final

10209 def slice_shift(self: NDFrameT, periods: int = 1, axis=0) -> NDFrameT:

10210 """

10211 Equivalent to `shift` without copying data.

10212

10213 .. deprecated:: 1.2.0

10214 slice_shift is deprecated,

10215 use DataFrame/Series.shift instead.

10216

10217 The shifted data will not include the dropped periods and the

10218 shifted axis will be smaller than the original.

10219

10220 Parameters

10221 ----------

10222 periods : int

10223 Number of periods to move, can be positive or negative.

10224 axis : {0 or 'index', 1 or 'columns', None}, default 0

10225 For `Series` this parameter is unused and defaults to 0.

10226

10227 Returns

10228 -------

10229 shifted : same type as caller

10230

10231 Notes

10232 -----

10233 While the `slice_shift` is faster than `shift`, you may pay for it

10234 later during alignment.

10235 """

10236

10237 msg = (

10238 "The 'slice_shift' method is deprecated "

10239 "and will be removed in a future version. "

10240 "You can use DataFrame/Series.shift instead."

10241 )

10242 warnings.warn(msg, FutureWarning, stacklevel=find_stack_level())

10243

10244 if periods == 0:

10245 return self

10246

10247 if periods > 0:

10248 vslicer = slice(None, -periods)

10249 islicer = slice(periods, None)

10250 else:

10251 vslicer = slice(-periods, None)

10252 islicer = slice(None, periods)

10253

10254 new_obj = self._slice(vslicer, axis=axis)

10255 shifted_axis = self._get_axis(axis)[islicer]

10256 new_obj = new_obj.set_axis(shifted_axis, axis=axis, copy=False)

10257 return new_obj.__finalize__(self, method="slice_shift")

10258

10259 @final

10260 def tshift(self: NDFrameT, periods: int = 1, freq=None, axis: Axis = 0) -> NDFrameT:

10261 """

10262 Shift the time index, using the index's frequency if available.

10263

10264 .. deprecated:: 1.1.0

10265 Use `shift` instead.

10266

10267 Parameters

10268 ----------

10269 periods : int

10270 Number of periods to move, can be positive or negative.

10271 freq : DateOffset, timedelta, or str, default None

10272 Increment to use from the tseries module

10273 or time rule expressed as a string (e.g. 'EOM').

10274 axis : {0 or ‘index’, 1 or ‘columns’, None}, default 0

10275 Corresponds to the axis that contains the Index.

10276 For `Series` this parameter is unused and defaults to 0.

10277

10278 Returns

10279 -------

10280 shifted : Series/DataFrame

10281

10282 Notes

10283 -----

10284 If freq is not specified then tries to use the freq or inferred_freq

10285 attributes of the index. If neither of those attributes exist, a

10286 ValueError is thrown

10287 """

10288 warnings.warn(

10289 (

10290 "tshift is deprecated and will be removed in a future version. "

10291 "Please use shift instead."

10292 ),

10293 FutureWarning,

10294 stacklevel=find_stack_level(),

10295 )

10296

10297 if freq is None:

10298 freq = "infer"

10299

10300 return self.shift(periods, freq, axis)

10301

10302 def truncate(

10303 self: NDFrameT, before=None, after=None, axis=None, copy: bool_t = True

10304 ) -> NDFrameT:

10305 """

10306 Truncate a Series or DataFrame before and after some index value.

10307

10308 This is a useful shorthand for boolean indexing based on index

10309 values above or below certain thresholds.

10310

10311 Parameters

10312 ----------

10313 before : date, str, int

10314 Truncate all rows before this index value.

10315 after : date, str, int

10316 Truncate all rows after this index value.

10317 axis : {0 or 'index', 1 or 'columns'}, optional

10318 Axis to truncate. Truncates the index (rows) by default.

10319 For `Series` this parameter is unused and defaults to 0.

10320 copy : bool, default is True,

10321 Return a copy of the truncated section.

10322

10323 Returns

10324 -------

10325 type of caller

10326 The truncated Series or DataFrame.

10327

10328 See Also

10329 --------

10330 DataFrame.loc : Select a subset of a DataFrame by label.

10331 DataFrame.iloc : Select a subset of a DataFrame by position.

10332

10333 Notes

10334 -----

10335 If the index being truncated contains only datetime values,

10336 `before` and `after` may be specified as strings instead of

10337 Timestamps.

10338

10339 Examples

10340 --------

10341 >>> df = pd.DataFrame({'A': ['a', 'b', 'c', 'd', 'e'],

10342 ... 'B': ['f', 'g', 'h', 'i', 'j'],

10343 ... 'C': ['k', 'l', 'm', 'n', 'o']},

10344 ... index=[1, 2, 3, 4, 5])

10345 >>> df

10346 A B C

10347 1 a f k

10348 2 b g l

10349 3 c h m

10350 4 d i n

10351 5 e j o

10352

10353 >>> df.truncate(before=2, after=4)

10354 A B C

10355 2 b g l

10356 3 c h m

10357 4 d i n

10358

10359 The columns of a DataFrame can be truncated.

10360

10361 >>> df.truncate(before="A", after="B", axis="columns")

10362 A B

10363 1 a f

10364 2 b g

10365 3 c h

10366 4 d i

10367 5 e j

10368

10369 For Series, only rows can be truncated.

10370

10371 >>> df['A'].truncate(before=2, after=4)

10372 2 b

10373 3 c

10374 4 d

10375 Name: A, dtype: object

10376

10377 The index values in ``truncate`` can be datetimes or string

10378 dates.

10379

10380 >>> dates = pd.date_range('2016-01-01', '2016-02-01', freq='s')

10381 >>> df = pd.DataFrame(index=dates, data={'A': 1})

10382 >>> df.tail()

10383 A

10384 2016-01-31 23:59:56 1

10385 2016-01-31 23:59:57 1

10386 2016-01-31 23:59:58 1

10387 2016-01-31 23:59:59 1

10388 2016-02-01 00:00:00 1

10389

10390 >>> df.truncate(before=pd.Timestamp('2016-01-05'),

10391 ... after=pd.Timestamp('2016-01-10')).tail()

10392 A

10393 2016-01-09 23:59:56 1

10394 2016-01-09 23:59:57 1

10395 2016-01-09 23:59:58 1

10396 2016-01-09 23:59:59 1

10397 2016-01-10 00:00:00 1

10398

10399 Because the index is a DatetimeIndex containing only dates, we can

10400 specify `before` and `after` as strings. They will be coerced to

10401 Timestamps before truncation.

10402

10403 >>> df.truncate('2016-01-05', '2016-01-10').tail()

10404 A

10405 2016-01-09 23:59:56 1

10406 2016-01-09 23:59:57 1

10407 2016-01-09 23:59:58 1

10408 2016-01-09 23:59:59 1

10409 2016-01-10 00:00:00 1

10410

10411 Note that ``truncate`` assumes a 0 value for any unspecified time

10412 component (midnight). This differs from partial string slicing, which

10413 returns any partially matching dates.

10414

10415 >>> df.loc['2016-01-05':'2016-01-10', :].tail()

10416 A

10417 2016-01-10 23:59:55 1

10418 2016-01-10 23:59:56 1

10419 2016-01-10 23:59:57 1

10420 2016-01-10 23:59:58 1

10421 2016-01-10 23:59:59 1

10422 """

10423 if axis is None:

10424 axis = self._stat_axis_number

10425 axis = self._get_axis_number(axis)

10426 ax = self._get_axis(axis)

10427

10428 # GH 17935

10429 # Check that index is sorted

10430 if not ax.is_monotonic_increasing and not ax.is_monotonic_decreasing:

10431 raise ValueError("truncate requires a sorted index")

10432

10433 # if we have a date index, convert to dates, otherwise

10434 # treat like a slice

10435 if ax._is_all_dates:

10436 from pandas.core.tools.datetimes import to_datetime

10437

10438 before = to_datetime(before)

10439 after = to_datetime(after)

10440

10441 if before is not None and after is not None and before > after:

10442 raise ValueError(f"Truncate: {after} must be after {before}")

10443

10444 if len(ax) > 1 and ax.is_monotonic_decreasing and ax.nunique() > 1:

10445 before, after = after, before

10446

10447 slicer = [slice(None, None)] * self._AXIS_LEN

10448 slicer[axis] = slice(before, after)

10449 result = self.loc[tuple(slicer)]

10450

10451 if isinstance(ax, MultiIndex):

10452 setattr(result, self._get_axis_name(axis), ax.truncate(before, after))

10453

10454 if copy:

10455 result = result.copy()

10456

10457 return result

10458

10459 @final

10460 @doc(klass=_shared_doc_kwargs["klass"])

10461 def tz_convert(

10462 self: NDFrameT, tz, axis=0, level=None, copy: bool_t = True

10463 ) -> NDFrameT:

10464 """

10465 Convert tz-aware axis to target time zone.

10466

10467 Parameters

10468 ----------

10469 tz : str or tzinfo object

10470 axis : the axis to convert

10471 level : int, str, default None

10472 If axis is a MultiIndex, convert a specific level. Otherwise

10473 must be None.

10474 copy : bool, default True

10475 Also make a copy of the underlying data.

10476

10477 Returns

10478 -------

10479 {klass}

10480 Object with time zone converted axis.

10481

10482 Raises

10483 ------

10484 TypeError

10485 If the axis is tz-naive.

10486 """

10487 axis = self._get_axis_number(axis)

10488 ax = self._get_axis(axis)

10489

10490 def _tz_convert(ax, tz):

10491 if not hasattr(ax, "tz_convert"):

10492 if len(ax) > 0:

10493 ax_name = self._get_axis_name(axis)

10494 raise TypeError(

10495 f"{ax_name} is not a valid DatetimeIndex or PeriodIndex"

10496 )

10497 else:

10498 ax = DatetimeIndex([], tz=tz)

10499 else:

10500 ax = ax.tz_convert(tz)

10501 return ax

10502

10503 # if a level is given it must be a MultiIndex level or

10504 # equivalent to the axis name

10505 if isinstance(ax, MultiIndex):

10506 level = ax._get_level_number(level)

10507 new_level = _tz_convert(ax.levels[level], tz)

10508 ax = ax.set_levels(new_level, level=level)

10509 else:

10510 if level not in (None, 0, ax.name):

10511 raise ValueError(f"The level {level} is not valid")

10512 ax = _tz_convert(ax, tz)

10513

10514 result = self.copy(deep=copy)

10515 result = result.set_axis(ax, axis=axis, copy=False)

10516 return result.__finalize__(self, method="tz_convert")

10517

10518 @final

10519 @doc(klass=_shared_doc_kwargs["klass"])

10520 def tz_localize(

10521 self: NDFrameT,

10522 tz,

10523 axis=0,

10524 level=None,

10525 copy: bool_t = True,

10526 ambiguous="raise",

10527 nonexistent: str = "raise",

10528 ) -> NDFrameT:

10529 """

10530 Localize tz-naive index of a Series or DataFrame to target time zone.

10531

10532 This operation localizes the Index. To localize the values in a

10533 timezone-naive Series, use :meth:`Series.dt.tz_localize`.

10534

10535 Parameters

10536 ----------

10537 tz : str or tzinfo

10538 axis : the axis to localize

10539 level : int, str, default None

10540 If axis ia a MultiIndex, localize a specific level. Otherwise

10541 must be None.

10542 copy : bool, default True

10543 Also make a copy of the underlying data.

10544 ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise'

10545 When clocks moved backward due to DST, ambiguous times may arise.

10546 For example in Central European Time (UTC+01), when going from

10547 03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at

10548 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the

10549 `ambiguous` parameter dictates how ambiguous times should be

10550 handled.

10551

10552 - 'infer' will attempt to infer fall dst-transition hours based on

10553 order

10554 - bool-ndarray where True signifies a DST time, False designates

10555 a non-DST time (note that this flag is only applicable for

10556 ambiguous times)

10557 - 'NaT' will return NaT where there are ambiguous times

10558 - 'raise' will raise an AmbiguousTimeError if there are ambiguous

10559 times.

10560 nonexistent : str, default 'raise'

10561 A nonexistent time does not exist in a particular timezone

10562 where clocks moved forward due to DST. Valid values are:

10563

10564 - 'shift_forward' will shift the nonexistent time forward to the

10565 closest existing time

10566 - 'shift_backward' will shift the nonexistent time backward to the

10567 closest existing time

10568 - 'NaT' will return NaT where there are nonexistent times

10569 - timedelta objects will shift nonexistent times by the timedelta

10570 - 'raise' will raise an NonExistentTimeError if there are

10571 nonexistent times.

10572

10573 Returns

10574 -------

10575 {klass}

10576 Same type as the input.

10577

10578 Raises

10579 ------

10580 TypeError

10581 If the TimeSeries is tz-aware and tz is not None.

10582

10583 Examples

10584 --------

10585 Localize local times:

10586

10587 >>> s = pd.Series([1],

10588 ... index=pd.DatetimeIndex(['2018-09-15 01:30:00']))

10589 >>> s.tz_localize('CET')

10590 2018-09-15 01:30:00+02:00 1

10591 dtype: int64

10592

10593 Be careful with DST changes. When there is sequential data, pandas

10594 can infer the DST time:

10595

10596 >>> s = pd.Series(range(7),

10597 ... index=pd.DatetimeIndex(['2018-10-28 01:30:00',

10598 ... '2018-10-28 02:00:00',

10599 ... '2018-10-28 02:30:00',

10600 ... '2018-10-28 02:00:00',

10601 ... '2018-10-28 02:30:00',

10602 ... '2018-10-28 03:00:00',

10603 ... '2018-10-28 03:30:00']))

10604 >>> s.tz_localize('CET', ambiguous='infer')

10605 2018-10-28 01:30:00+02:00 0

10606 2018-10-28 02:00:00+02:00 1

10607 2018-10-28 02:30:00+02:00 2

10608 2018-10-28 02:00:00+01:00 3

10609 2018-10-28 02:30:00+01:00 4

10610 2018-10-28 03:00:00+01:00 5

10611 2018-10-28 03:30:00+01:00 6

10612 dtype: int64

10613

10614 In some cases, inferring the DST is impossible. In such cases, you can

10615 pass an ndarray to the ambiguous parameter to set the DST explicitly

10616

10617 >>> s = pd.Series(range(3),

10618 ... index=pd.DatetimeIndex(['2018-10-28 01:20:00',

10619 ... '2018-10-28 02:36:00',

10620 ... '2018-10-28 03:46:00']))

10621 >>> s.tz_localize('CET', ambiguous=np.array([True, True, False]))

10622 2018-10-28 01:20:00+02:00 0

10623 2018-10-28 02:36:00+02:00 1

10624 2018-10-28 03:46:00+01:00 2

10625 dtype: int64

10626

10627 If the DST transition causes nonexistent times, you can shift these

10628 dates forward or backward with a timedelta object or `'shift_forward'`

10629 or `'shift_backward'`.

10630

10631 >>> s = pd.Series(range(2),

10632 ... index=pd.DatetimeIndex(['2015-03-29 02:30:00',

10633 ... '2015-03-29 03:30:00']))

10634 >>> s.tz_localize('Europe/Warsaw', nonexistent='shift_forward')

10635 2015-03-29 03:00:00+02:00 0

10636 2015-03-29 03:30:00+02:00 1

10637 dtype: int64

10638 >>> s.tz_localize('Europe/Warsaw', nonexistent='shift_backward')

10639 2015-03-29 01:59:59.999999999+01:00 0

10640 2015-03-29 03:30:00+02:00 1

10641 dtype: int64

10642 >>> s.tz_localize('Europe/Warsaw', nonexistent=pd.Timedelta('1H'))

10643 2015-03-29 03:30:00+02:00 0

10644 2015-03-29 03:30:00+02:00 1

10645 dtype: int64

10646 """

10647 nonexistent_options = ("raise", "NaT", "shift_forward", "shift_backward")

10648 if nonexistent not in nonexistent_options and not isinstance(

10649 nonexistent, timedelta

10650 ):

10651 raise ValueError(

10652 "The nonexistent argument must be one of 'raise', "

10653 "'NaT', 'shift_forward', 'shift_backward' or "

10654 "a timedelta object"

10655 )

10656

10657 axis = self._get_axis_number(axis)

10658 ax = self._get_axis(axis)

10659

10660 def _tz_localize(ax, tz, ambiguous, nonexistent):

10661 if not hasattr(ax, "tz_localize"):

10662 if len(ax) > 0:

10663 ax_name = self._get_axis_name(axis)

10664 raise TypeError(

10665 f"{ax_name} is not a valid DatetimeIndex or PeriodIndex"

10666 )

10667 else:

10668 ax = DatetimeIndex([], tz=tz)

10669 else:

10670 ax = ax.tz_localize(tz, ambiguous=ambiguous, nonexistent=nonexistent)

10671 return ax

10672

10673 # if a level is given it must be a MultiIndex level or

10674 # equivalent to the axis name

10675 if isinstance(ax, MultiIndex):

10676 level = ax._get_level_number(level)

10677 new_level = _tz_localize(ax.levels[level], tz, ambiguous, nonexistent)

10678 ax = ax.set_levels(new_level, level=level)

10679 else:

10680 if level not in (None, 0, ax.name):

10681 raise ValueError(f"The level {level} is not valid")

10682 ax = _tz_localize(ax, tz, ambiguous, nonexistent)

10683

10684 result = self.copy(deep=copy)

10685 result = result.set_axis(ax, axis=axis, copy=False)

10686 return result.__finalize__(self, method="tz_localize")

10687

10688 # ----------------------------------------------------------------------

10689 # Numeric Methods

10690

10691 @final

10692 def describe(

10693 self: NDFrameT,

10694 percentiles=None,

10695 include=None,

10696 exclude=None,

10697 datetime_is_numeric: bool_t = False,

10698 ) -> NDFrameT:

10699 """

10700 Generate descriptive statistics.

10701

10702 Descriptive statistics include those that summarize the central

10703 tendency, dispersion and shape of a

10704 dataset's distribution, excluding ``NaN`` values.

10705

10706 Analyzes both numeric and object series, as well

10707 as ``DataFrame`` column sets of mixed data types. The output

10708 will vary depending on what is provided. Refer to the notes

10709 below for more detail.

10710

10711 Parameters

10712 ----------

10713 percentiles : list-like of numbers, optional

10714 The percentiles to include in the output. All should

10715 fall between 0 and 1. The default is

10716 ``[.25, .5, .75]``, which returns the 25th, 50th, and

10717 75th percentiles.

10718 include : 'all', list-like of dtypes or None (default), optional

10719 A white list of data types to include in the result. Ignored

10720 for ``Series``. Here are the options:

10721

10722 - 'all' : All columns of the input will be included in the output.

10723 - A list-like of dtypes : Limits the results to the

10724 provided data types.

10725 To limit the result to numeric types submit

10726 ``numpy.number``. To limit it instead to object columns submit

10727 the ``numpy.object`` data type. Strings

10728 can also be used in the style of

10729 ``select_dtypes`` (e.g. ``df.describe(include=['O'])``). To

10730 select pandas categorical columns, use ``'category'``

10731 - None (default) : The result will include all numeric columns.

10732 exclude : list-like of dtypes or None (default), optional,

10733 A black list of data types to omit from the result. Ignored

10734 for ``Series``. Here are the options:

10735

10736 - A list-like of dtypes : Excludes the provided data types

10737 from the result. To exclude numeric types submit

10738 ``numpy.number``. To exclude object columns submit the data

10739 type ``numpy.object``. Strings can also be used in the style of

10740 ``select_dtypes`` (e.g. ``df.describe(exclude=['O'])``). To

10741 exclude pandas categorical columns, use ``'category'``

10742 - None (default) : The result will exclude nothing.

10743 datetime_is_numeric : bool, default False

10744 Whether to treat datetime dtypes as numeric. This affects statistics

10745 calculated for the column. For DataFrame input, this also

10746 controls whether datetime columns are included by default.

10747

10748 .. versionadded:: 1.1.0

10749

10750 Returns

10751 -------

10752 Series or DataFrame

10753 Summary statistics of the Series or Dataframe provided.

10754

10755 See Also

10756 --------

10757 DataFrame.count: Count number of non-NA/null observations.

10758 DataFrame.max: Maximum of the values in the object.

10759 DataFrame.min: Minimum of the values in the object.

10760 DataFrame.mean: Mean of the values.

10761 DataFrame.std: Standard deviation of the observations.

10762 DataFrame.select_dtypes: Subset of a DataFrame including/excluding

10763 columns based on their dtype.

10764

10765 Notes

10766 -----

10767 For numeric data, the result's index will include ``count``,

10768 ``mean``, ``std``, ``min``, ``max`` as well as lower, ``50`` and

10769 upper percentiles. By default the lower percentile is ``25`` and the

10770 upper percentile is ``75``. The ``50`` percentile is the

10771 same as the median.

10772

10773 For object data (e.g. strings or timestamps), the result's index

10774 will include ``count``, ``unique``, ``top``, and ``freq``. The ``top``

10775 is the most common value. The ``freq`` is the most common value's

10776 frequency. Timestamps also include the ``first`` and ``last`` items.

10777

10778 If multiple object values have the highest count, then the

10779 ``count`` and ``top`` results will be arbitrarily chosen from

10780 among those with the highest count.

10781

10782 For mixed data types provided via a ``DataFrame``, the default is to

10783 return only an analysis of numeric columns. If the dataframe consists

10784 only of object and categorical data without any numeric columns, the

10785 default is to return an analysis of both the object and categorical

10786 columns. If ``include='all'`` is provided as an option, the result

10787 will include a union of attributes of each type.

10788

10789 The `include` and `exclude` parameters can be used to limit

10790 which columns in a ``DataFrame`` are analyzed for the output.

10791 The parameters are ignored when analyzing a ``Series``.

10792

10793 Examples

10794 --------

10795 Describing a numeric ``Series``.

10796

10797 >>> s = pd.Series([1, 2, 3])

10798 >>> s.describe()

10799 count 3.0

10800 mean 2.0

10801 std 1.0

10802 min 1.0

10803 25% 1.5

10804 50% 2.0

10805 75% 2.5

10806 max 3.0

10807 dtype: float64

10808

10809 Describing a categorical ``Series``.

10810

10811 >>> s = pd.Series(['a', 'a', 'b', 'c'])

10812 >>> s.describe()

10813 count 4

10814 unique 3

10815 top a

10816 freq 2

10817 dtype: object

10818

10819 Describing a timestamp ``Series``.

10820

10821 >>> s = pd.Series([

10822 ... np.datetime64("2000-01-01"),

10823 ... np.datetime64("2010-01-01"),

10824 ... np.datetime64("2010-01-01")

10825 ... ])

10826 >>> s.describe(datetime_is_numeric=True)

10827 count 3

10828 mean 2006-09-01 08:00:00

10829 min 2000-01-01 00:00:00

10830 25% 2004-12-31 12:00:00

10831 50% 2010-01-01 00:00:00

10832 75% 2010-01-01 00:00:00

10833 max 2010-01-01 00:00:00

10834 dtype: object

10835

10836 Describing a ``DataFrame``. By default only numeric fields

10837 are returned.

10838

10839 >>> df = pd.DataFrame({'categorical': pd.Categorical(['d','e','f']),

10840 ... 'numeric': [1, 2, 3],

10841 ... 'object': ['a', 'b', 'c']

10842 ... })

10843 >>> df.describe()

10844 numeric

10845 count 3.0

10846 mean 2.0

10847 std 1.0

10848 min 1.0

10849 25% 1.5

10850 50% 2.0

10851 75% 2.5

10852 max 3.0

10853

10854 Describing all columns of a ``DataFrame`` regardless of data type.

10855

10856 >>> df.describe(include='all') # doctest: +SKIP

10857 categorical numeric object

10858 count 3 3.0 3

10859 unique 3 NaN 3

10860 top f NaN a

10861 freq 1 NaN 1

10862 mean NaN 2.0 NaN

10863 std NaN 1.0 NaN

10864 min NaN 1.0 NaN

10865 25% NaN 1.5 NaN

10866 50% NaN 2.0 NaN

10867 75% NaN 2.5 NaN

10868 max NaN 3.0 NaN

10869

10870 Describing a column from a ``DataFrame`` by accessing it as

10871 an attribute.

10872

10873 >>> df.numeric.describe()

10874 count 3.0

10875 mean 2.0

10876 std 1.0

10877 min 1.0

10878 25% 1.5

10879 50% 2.0

10880 75% 2.5

10881 max 3.0

10882 Name: numeric, dtype: float64

10883

10884 Including only numeric columns in a ``DataFrame`` description.

10885

10886 >>> df.describe(include=[np.number])

10887 numeric

10888 count 3.0

10889 mean 2.0

10890 std 1.0

10891 min 1.0

10892 25% 1.5

10893 50% 2.0

10894 75% 2.5

10895 max 3.0

10896

10897 Including only string columns in a ``DataFrame`` description.

10898

10899 >>> df.describe(include=[object]) # doctest: +SKIP

10900 object

10901 count 3

10902 unique 3

10903 top a

10904 freq 1

10905

10906 Including only categorical columns from a ``DataFrame`` description.

10907

10908 >>> df.describe(include=['category'])

10909 categorical

10910 count 3

10911 unique 3

10912 top d

10913 freq 1

10914

10915 Excluding numeric columns from a ``DataFrame`` description.

10916

10917 >>> df.describe(exclude=[np.number]) # doctest: +SKIP

10918 categorical object

10919 count 3 3

10920 unique 3 3

10921 top f a

10922 freq 1 1

10923

10924 Excluding object columns from a ``DataFrame`` description.

10925

10926 >>> df.describe(exclude=[object]) # doctest: +SKIP

10927 categorical numeric

10928 count 3 3.0

10929 unique 3 NaN

10930 top f NaN

10931 freq 1 NaN

10932 mean NaN 2.0

10933 std NaN 1.0

10934 min NaN 1.0

10935 25% NaN 1.5

10936 50% NaN 2.0

10937 75% NaN 2.5

10938 max NaN 3.0

10939 """

10940 return describe_ndframe(

10941 obj=self,

10942 include=include,

10943 exclude=exclude,

10944 datetime_is_numeric=datetime_is_numeric,

10945 percentiles=percentiles,

10946 )

10947

10948 @final

10949 def pct_change(

10950 self: NDFrameT,

10951 periods=1,

10952 fill_method="pad",

10953 limit=None,

10954 freq=None,

10955 **kwargs,

10956 ) -> NDFrameT:

10957 """

10958 Percentage change between the current and a prior element.

10959

10960 Computes the percentage change from the immediately previous row by

10961 default. This is useful in comparing the percentage of change in a time

10962 series of elements.

10963

10964 Parameters

10965 ----------

10966 periods : int, default 1

10967 Periods to shift for forming percent change.

10968 fill_method : str, default 'pad'

10969 How to handle NAs **before** computing percent changes.

10970 limit : int, default None

10971 The number of consecutive NAs to fill before stopping.

10972 freq : DateOffset, timedelta, or str, optional

10973 Increment to use from time series API (e.g. 'M' or BDay()).

10974 **kwargs

10975 Additional keyword arguments are passed into

10976 `DataFrame.shift` or `Series.shift`.

10977

10978 Returns

10979 -------

10980 chg : Series or DataFrame

10981 The same type as the calling object.

10982

10983 See Also

10984 --------

10985 Series.diff : Compute the difference of two elements in a Series.

10986 DataFrame.diff : Compute the difference of two elements in a DataFrame.

10987 Series.shift : Shift the index by some number of periods.

10988 DataFrame.shift : Shift the index by some number of periods.

10989

10990 Examples

10991 --------

10992 **Series**

10993

10994 >>> s = pd.Series([90, 91, 85])

10995 >>> s

10996 0 90

10997 1 91

10998 2 85

10999 dtype: int64

11000

11001 >>> s.pct_change()

11002 0 NaN

11003 1 0.011111

11004 2 -0.065934

11005 dtype: float64

11006

11007 >>> s.pct_change(periods=2)

11008 0 NaN

11009 1 NaN

11010 2 -0.055556

11011 dtype: float64

11012

11013 See the percentage change in a Series where filling NAs with last

11014 valid observation forward to next valid.

11015

11016 >>> s = pd.Series([90, 91, None, 85])

11017 >>> s

11018 0 90.0

11019 1 91.0

11020 2 NaN

11021 3 85.0

11022 dtype: float64

11023

11024 >>> s.pct_change(fill_method='ffill')

11025 0 NaN

11026 1 0.011111

11027 2 0.000000

11028 3 -0.065934

11029 dtype: float64

11030

11031 **DataFrame**

11032

11033 Percentage change in French franc, Deutsche Mark, and Italian lira from

11034 1980-01-01 to 1980-03-01.

11035

11036 >>> df = pd.DataFrame({

11037 ... 'FR': [4.0405, 4.0963, 4.3149],

11038 ... 'GR': [1.7246, 1.7482, 1.8519],

11039 ... 'IT': [804.74, 810.01, 860.13]},

11040 ... index=['1980-01-01', '1980-02-01', '1980-03-01'])

11041 >>> df

11042 FR GR IT

11043 1980-01-01 4.0405 1.7246 804.74

11044 1980-02-01 4.0963 1.7482 810.01

11045 1980-03-01 4.3149 1.8519 860.13

11046

11047 >>> df.pct_change()

11048 FR GR IT

11049 1980-01-01 NaN NaN NaN

11050 1980-02-01 0.013810 0.013684 0.006549

11051 1980-03-01 0.053365 0.059318 0.061876

11052

11053 Percentage of change in GOOG and APPL stock volume. Shows computing

11054 the percentage change between columns.

11055

11056 >>> df = pd.DataFrame({

11057 ... '2016': [1769950, 30586265],

11058 ... '2015': [1500923, 40912316],

11059 ... '2014': [1371819, 41403351]},

11060 ... index=['GOOG', 'APPL'])

11061 >>> df

11062 2016 2015 2014

11063 GOOG 1769950 1500923 1371819

11064 APPL 30586265 40912316 41403351

11065

11066 >>> df.pct_change(axis='columns', periods=-1)

11067 2016 2015 2014

11068 GOOG 0.179241 0.094112 NaN

11069 APPL -0.252395 -0.011860 NaN

11070 """

11071 axis = self._get_axis_number(kwargs.pop("axis", self._stat_axis_name))

11072 if fill_method is None:

11073 data = self

11074 else:

11075 _data = self.fillna(method=fill_method, axis=axis, limit=limit)

11076 assert _data is not None # needed for mypy

11077 data = _data

11078

11079 shifted = data.shift(periods=periods, freq=freq, axis=axis, **kwargs)

11080 # Unsupported left operand type for / ("NDFrameT")

11081 rs = data / shifted - 1 # type: ignore[operator]

11082 if freq is not None:

11083 # Shift method is implemented differently when freq is not None

11084 # We want to restore the original index

11085 rs = rs.loc[~rs.index.duplicated()]

11086 rs = rs.reindex_like(data)

11087 return rs.__finalize__(self, method="pct_change")

11088

11089 @final

11090 def _agg_by_level(

11091 self,

11092 name: str,

11093 axis: Axis = 0,

11094 level: Level = 0,

11095 skipna: bool_t = True,

11096 **kwargs,

11097 ):

11098 if axis is None:

11099 raise ValueError("Must specify 'axis' when aggregating by level.")

11100 grouped = self.groupby(level=level, axis=axis, sort=False)

11101 if hasattr(grouped, name) and skipna:

11102 return getattr(grouped, name)(**kwargs)

11103 axis = self._get_axis_number(axis)

11104 method = getattr(type(self), name)

11105 applyf = lambda x: method(x, axis=axis, skipna=skipna, **kwargs)

11106 return grouped.aggregate(applyf)

11107

11108 @final

11109 def _logical_func(

11110 self,

11111 name: str,

11112 func,

11113 axis: Axis = 0,

11114 bool_only: bool_t | None = None,

11115 skipna: bool_t = True,

11116 level: Level | None = None,

11117 **kwargs,

11118 ) -> Series | bool_t:

11119 nv.validate_logical_func((), kwargs, fname=name)

11120 validate_bool_kwarg(skipna, "skipna", none_allowed=False)

11121 if level is not None:

11122 warnings.warn(

11123 "Using the level keyword in DataFrame and Series aggregations is "

11124 "deprecated and will be removed in a future version. Use groupby "

11125 "instead. df.any(level=1) should use df.groupby(level=1).any()",

11126 FutureWarning,

11127 stacklevel=find_stack_level(),

11128 )

11129 if bool_only is not None:

11130 raise NotImplementedError(

11131 "Option bool_only is not implemented with option level."

11132 )

11133 return self._agg_by_level(name, axis=axis, level=level, skipna=skipna)

11134

11135 if self.ndim > 1 and axis is None:

11136 # Reduce along one dimension then the other, to simplify DataFrame._reduce

11137 res = self._logical_func(

11138 name, func, axis=0, bool_only=bool_only, skipna=skipna, **kwargs

11139 )

11140 return res._logical_func(name, func, skipna=skipna, **kwargs)

11141

11142 if (

11143 self.ndim > 1

11144 and axis == 1

11145 and len(self._mgr.arrays) > 1

11146 # TODO(EA2D): special-case not needed

11147 and all(x.ndim == 2 for x in self._mgr.arrays)

11148 and bool_only is not None

11149 and not kwargs

11150 ):

11151 # Fastpath avoiding potentially expensive transpose

11152 obj = self

11153 if bool_only:

11154 obj = self._get_bool_data()

11155 return obj._reduce_axis1(name, func, skipna=skipna)

11156

11157 return self._reduce(

11158 func,

11159 name=name,

11160 axis=axis,

11161 skipna=skipna,

11162 numeric_only=bool_only,

11163 filter_type="bool",

11164 )

11165

11166 def any(

11167 self,

11168 axis: Axis = 0,

11169 bool_only: bool_t | None = None,

11170 skipna: bool_t = True,

11171 level: Level | None = None,

11172 **kwargs,

11173 ) -> DataFrame | Series | bool_t:

11174 return self._logical_func(

11175 "any", nanops.nanany, axis, bool_only, skipna, level, **kwargs

11176 )

11177

11178 def all(

11179 self,

11180 axis: Axis = 0,

11181 bool_only: bool_t | None = None,

11182 skipna: bool_t = True,

11183 level: Level | None = None,

11184 **kwargs,

11185 ) -> Series | bool_t:

11186 return self._logical_func(

11187 "all", nanops.nanall, axis, bool_only, skipna, level, **kwargs

11188 )

11189

11190 @final

11191 def _accum_func(

11192 self,

11193 name: str,

11194 func,

11195 axis: Axis | None = None,

11196 skipna: bool_t = True,

11197 *args,

11198 **kwargs,

11199 ):

11200 skipna = nv.validate_cum_func_with_skipna(skipna, args, kwargs, name)

11201 if axis is None:

11202 axis = self._stat_axis_number

11203 else:

11204 axis = self._get_axis_number(axis)

11205

11206 if axis == 1:

11207 return self.T._accum_func(

11208 name, func, axis=0, skipna=skipna, *args, **kwargs

11209 ).T

11210

11211 def block_accum_func(blk_values):

11212 values = blk_values.T if hasattr(blk_values, "T") else blk_values

11213

11214 result = nanops.na_accum_func(values, func, skipna=skipna)

11215

11216 result = result.T if hasattr(result, "T") else result

11217 return result

11218

11219 result = self._mgr.apply(block_accum_func)

11220

11221 return self._constructor(result).__finalize__(self, method=name)

11222

11223 def cummax(self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs):

11224 return self._accum_func(

11225 "cummax", np.maximum.accumulate, axis, skipna, *args, **kwargs

11226 )

11227

11228 def cummin(self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs):

11229 return self._accum_func(

11230 "cummin", np.minimum.accumulate, axis, skipna, *args, **kwargs

11231 )

11232

11233 def cumsum(self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs):

11234 return self._accum_func("cumsum", np.cumsum, axis, skipna, *args, **kwargs)

11235

11236 def cumprod(self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs):

11237 return self._accum_func("cumprod", np.cumprod, axis, skipna, *args, **kwargs)

11238

11239 @final

11240 def _stat_function_ddof(

11241 self,

11242 name: str,

11243 func,

11244 axis: Axis | None = None,

11245 skipna: bool_t = True,

11246 level: Level | None = None,

11247 ddof: int = 1,

11248 numeric_only: bool_t | None = None,

11249 **kwargs,

11250 ) -> Series | float:

11251 nv.validate_stat_ddof_func((), kwargs, fname=name)

11252 validate_bool_kwarg(skipna, "skipna", none_allowed=False)

11253 if axis is None:

11254 axis = self._stat_axis_number

11255 if level is not None:

11256 warnings.warn(

11257 "Using the level keyword in DataFrame and Series aggregations is "

11258 "deprecated and will be removed in a future version. Use groupby "

11259 "instead. df.var(level=1) should use df.groupby(level=1).var().",

11260 FutureWarning,

11261 stacklevel=find_stack_level(),

11262 )

11263 return self._agg_by_level(

11264 name, axis=axis, level=level, skipna=skipna, ddof=ddof

11265 )

11266 return self._reduce(

11267 func, name, axis=axis, numeric_only=numeric_only, skipna=skipna, ddof=ddof

11268 )

11269

11270 def sem(

11271 self,

11272 axis: Axis | None = None,

11273 skipna: bool_t = True,

11274 level: Level | None = None,

11275 ddof: int = 1,

11276 numeric_only: bool_t | None = None,

11277 **kwargs,

11278 ) -> Series | float:

11279 return self._stat_function_ddof(

11280 "sem", nanops.nansem, axis, skipna, level, ddof, numeric_only, **kwargs

11281 )

11282

11283 def var(

11284 self,

11285 axis: Axis | None = None,

11286 skipna: bool_t = True,

11287 level: Level | None = None,

11288 ddof: int = 1,

11289 numeric_only: bool_t | None = None,

11290 **kwargs,

11291 ) -> Series | float:

11292 return self._stat_function_ddof(

11293 "var", nanops.nanvar, axis, skipna, level, ddof, numeric_only, **kwargs

11294 )

11295

11296 def std(

11297 self,

11298 axis: Axis | None = None,

11299 skipna: bool_t = True,

11300 level: Level | None = None,

11301 ddof: int = 1,

11302 numeric_only: bool_t | None = None,

11303 **kwargs,

11304 ) -> Series | float:

11305 return self._stat_function_ddof(

11306 "std", nanops.nanstd, axis, skipna, level, ddof, numeric_only, **kwargs

11307 )

11308

11309 @final

11310 def _stat_function(

11311 self,

11312 name: str,

11313 func,

11314 axis: Axis | None | lib.NoDefault = None,

11315 skipna: bool_t = True,

11316 level: Level | None = None,

11317 numeric_only: bool_t | None = None,

11318 **kwargs,

11319 ):

11320 if name == "median":

11321 nv.validate_median((), kwargs)

11322 else:

11323 nv.validate_stat_func((), kwargs, fname=name)

11324

11325 validate_bool_kwarg(skipna, "skipna", none_allowed=False)

11326

11327 if axis is None and level is None and self.ndim > 1:

11328 # user must have explicitly passed axis=None

11329 # GH#21597

11330 warnings.warn(

11331 f"In a future version, DataFrame.{name}(axis=None) will return a "

11332 f"scalar {name} over the entire DataFrame. To retain the old "

11333 f"behavior, use 'frame.{name}(axis=0)' or just 'frame.{name}()'",

11334 FutureWarning,

11335 stacklevel=find_stack_level(),

11336 )

11337 if axis is lib.no_default:

11338 axis = None

11339

11340 if axis is None:

11341 axis = self._stat_axis_number

11342 if level is not None:

11343 warnings.warn(

11344 "Using the level keyword in DataFrame and Series aggregations is "

11345 "deprecated and will be removed in a future version. Use groupby "

11346 "instead. df.median(level=1) should use df.groupby(level=1).median().",

11347 FutureWarning,

11348 stacklevel=find_stack_level(),

11349 )

11350 return self._agg_by_level(

11351 name, axis=axis, level=level, skipna=skipna, numeric_only=numeric_only

11352 )

11353 return self._reduce(

11354 func, name=name, axis=axis, skipna=skipna, numeric_only=numeric_only

11355 )

11356

11357 def min(

11358 self,

11359 axis: Axis | None | lib.NoDefault = lib.no_default,

11360 skipna: bool_t = True,

11361 level: Level | None = None,

11362 numeric_only: bool_t | None = None,

11363 **kwargs,

11364 ):

11365 return self._stat_function(

11366 "min",

11367 nanops.nanmin,

11368 axis,

11369 skipna,

11370 level,

11371 numeric_only,

11372 **kwargs,

11373 )

11374

11375 def max(

11376 self,

11377 axis: Axis | None | lib.NoDefault = lib.no_default,

11378 skipna: bool_t = True,

11379 level: Level | None = None,

11380 numeric_only: bool_t | None = None,

11381 **kwargs,

11382 ):

11383 return self._stat_function(

11384 "max",

11385 nanops.nanmax,

11386 axis,

11387 skipna,

11388 level,

11389 numeric_only,

11390 **kwargs,

11391 )

11392

11393 def mean(

11394 self,

11395 axis: Axis | None | lib.NoDefault = lib.no_default,

11396 skipna: bool_t = True,

11397 level: Level | None = None,

11398 numeric_only: bool_t | None = None,

11399 **kwargs,

11400 ) -> Series | float:

11401 return self._stat_function(

11402 "mean", nanops.nanmean, axis, skipna, level, numeric_only, **kwargs

11403 )

11404

11405 def median(

11406 self,

11407 axis: Axis | None | lib.NoDefault = lib.no_default,

11408 skipna: bool_t = True,

11409 level: Level | None = None,

11410 numeric_only: bool_t | None = None,

11411 **kwargs,

11412 ) -> Series | float:

11413 return self._stat_function(

11414 "median", nanops.nanmedian, axis, skipna, level, numeric_only, **kwargs

11415 )

11416

11417 def skew(

11418 self,

11419 axis: Axis | None | lib.NoDefault = lib.no_default,

11420 skipna: bool_t = True,

11421 level: Level | None = None,

11422 numeric_only: bool_t | None = None,

11423 **kwargs,

11424 ) -> Series | float:

11425 return self._stat_function(

11426 "skew", nanops.nanskew, axis, skipna, level, numeric_only, **kwargs

11427 )

11428

11429 def kurt(

11430 self,

11431 axis: Axis | None | lib.NoDefault = lib.no_default,

11432 skipna: bool_t = True,

11433 level: Level | None = None,

11434 numeric_only: bool_t | None = None,

11435 **kwargs,

11436 ) -> Series | float:

11437 return self._stat_function(

11438 "kurt", nanops.nankurt, axis, skipna, level, numeric_only, **kwargs

11439 )

11440

11441 kurtosis = kurt

11442

11443 @final

11444 def _min_count_stat_function(

11445 self,

11446 name: str,

11447 func,

11448 axis: Axis | None = None,

11449 skipna: bool_t = True,

11450 level: Level | None = None,

11451 numeric_only: bool_t | None = None,

11452 min_count: int = 0,

11453 **kwargs,

11454 ):

11455 if name == "sum":

11456 nv.validate_sum((), kwargs)

11457 elif name == "prod":

11458 nv.validate_prod((), kwargs)

11459 else:

11460 nv.validate_stat_func((), kwargs, fname=name)

11461

11462 validate_bool_kwarg(skipna, "skipna", none_allowed=False)

11463

11464 if axis is None:

11465 axis = self._stat_axis_number

11466 if level is not None:

11467 warnings.warn(

11468 "Using the level keyword in DataFrame and Series aggregations is "

11469 "deprecated and will be removed in a future version. Use groupby "

11470 "instead. df.sum(level=1) should use df.groupby(level=1).sum().",

11471 FutureWarning,

11472 stacklevel=find_stack_level(),

11473 )

11474 return self._agg_by_level(

11475 name,

11476 axis=axis,

11477 level=level,

11478 skipna=skipna,

11479 min_count=min_count,

11480 numeric_only=numeric_only,

11481 )

11482

11483 return self._reduce(

11484 func,

11485 name=name,

11486 axis=axis,

11487 skipna=skipna,

11488 numeric_only=numeric_only,

11489 min_count=min_count,

11490 )

11491

11492 def sum(

11493 self,

11494 axis: Axis | None = None,

11495 skipna: bool_t = True,

11496 level: Level | None = None,

11497 numeric_only: bool_t | None = None,

11498 min_count=0,

11499 **kwargs,

11500 ):

11501 return self._min_count_stat_function(

11502 "sum", nanops.nansum, axis, skipna, level, numeric_only, min_count, **kwargs

11503 )

11504

11505 def prod(

11506 self,

11507 axis: Axis | None = None,

11508 skipna: bool_t = True,

11509 level: Level | None = None,

11510 numeric_only: bool_t | None = None,

11511 min_count: int = 0,

11512 **kwargs,

11513 ):

11514 return self._min_count_stat_function(

11515 "prod",

11516 nanops.nanprod,

11517 axis,

11518 skipna,

11519 level,

11520 numeric_only,

11521 min_count,

11522 **kwargs,

11523 )

11524

11525 product = prod

11526

11527 def mad(

11528 self,

11529 axis: Axis | None = None,

11530 skipna: bool_t = True,

11531 level: Level | None = None,

11532 ) -> Series | float:

11533 """

11534 {desc}

11535

11536 .. deprecated:: 1.5.0

11537 mad is deprecated.

11538

11539 Parameters

11540 ----------

11541 axis : {axis_descr}

11542 Axis for the function to be applied on.

11543 For `Series` this parameter is unused and defaults to 0.

11544 skipna : bool, default True

11545 Exclude NA/null values when computing the result.

11546 level : int or level name, default None

11547 If the axis is a MultiIndex (hierarchical), count along a

11548 particular level, collapsing into a {name1}.

11549

11550 Returns

11551 -------

11552 {name1} or {name2} (if level specified)\

11553 {see_also}\

11554 {examples}

11555 """

11556 msg = (

11557 "The 'mad' method is deprecated and will be removed in a future version. "

11558 "To compute the same result, you may do `(df - df.mean()).abs().mean()`."

11559 )

11560 warnings.warn(msg, FutureWarning, stacklevel=find_stack_level())

11561

11562 if not is_bool(skipna):

11563 warnings.warn(

11564 "Passing None for skipna is deprecated and will raise in a future"

11565 "version. Pass True instead. Only boolean values will be allowed "

11566 "in the future.",

11567 FutureWarning,

11568 stacklevel=find_stack_level(),

11569 )

11570 skipna = True

11571 if axis is None:

11572 axis = self._stat_axis_number

11573 if level is not None:

11574 warnings.warn(

11575 "Using the level keyword in DataFrame and Series aggregations is "

11576 "deprecated and will be removed in a future version. Use groupby "

11577 "instead. df.mad(level=1) should use df.groupby(level=1).mad()",

11578 FutureWarning,

11579 stacklevel=find_stack_level(),

11580 )

11581 return self._agg_by_level("mad", axis=axis, level=level, skipna=skipna)

11582

11583 data = self._get_numeric_data()

11584 if axis == 0:

11585 # error: Unsupported operand types for - ("NDFrame" and "float")

11586 demeaned = data - data.mean(axis=0) # type: ignore[operator]

11587 else:

11588 demeaned = data.sub(data.mean(axis=1), axis=0)

11589 return np.abs(demeaned).mean(axis=axis, skipna=skipna)

11590

11591 @classmethod

11592 def _add_numeric_operations(cls):

11593 """

11594 Add the operations to the cls; evaluate the doc strings again

11595 """

11596 axis_descr, name1, name2 = _doc_params(cls)

11597

11598 @deprecate_nonkeyword_arguments(

11599 version=None,

11600 allowed_args=["self"],

11601 name="DataFrame.any and Series.any",

11602 )

11603 @doc(

11604 _bool_doc,

11605 desc=_any_desc,

11606 name1=name1,

11607 name2=name2,

11608 axis_descr=axis_descr,

11609 see_also=_any_see_also,

11610 examples=_any_examples,

11611 empty_value=False,

11612 )

11613 def any(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs):

11614 return NDFrame.any(self, axis, bool_only, skipna, level, **kwargs)

11615

11616 setattr(cls, "any", any)

11617

11618 @doc(

11619 _bool_doc,

11620 desc=_all_desc,

11621 name1=name1,

11622 name2=name2,

11623 axis_descr=axis_descr,

11624 see_also=_all_see_also,

11625 examples=_all_examples,

11626 empty_value=True,

11627 )

11628 def all(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs):

11629 return NDFrame.all(self, axis, bool_only, skipna, level, **kwargs)

11630

11631 setattr(cls, "all", all)

11632

11633 # error: Argument 1 to "doc" has incompatible type "Optional[str]"; expected

11634 # "Union[str, Callable[..., Any]]"

11635 @doc(

11636 NDFrame.mad.__doc__, # type: ignore[arg-type]

11637 desc="Return the mean absolute deviation of the values "

11638 "over the requested axis.",

11639 name1=name1,

11640 name2=name2,

11641 axis_descr=axis_descr,

11642 see_also="",

11643 examples="",

11644 )

11645 def mad(self, axis=None, skipna=True, level=None):

11646 return NDFrame.mad(self, axis, skipna, level)

11647

11648 setattr(cls, "mad", mad)

11649

11650 @doc(

11651 _num_ddof_doc,

11652 desc="Return unbiased standard error of the mean over requested "

11653 "axis.\n\nNormalized by N-1 by default. This can be changed "

11654 "using the ddof argument",

11655 name1=name1,

11656 name2=name2,

11657 axis_descr=axis_descr,

11658 notes="",

11659 examples="",

11660 )

11661 def sem(

11662 self,

11663 axis=None,

11664 skipna=True,

11665 level=None,

11666 ddof=1,

11667 numeric_only=None,

11668 **kwargs,

11669 ):

11670 return NDFrame.sem(self, axis, skipna, level, ddof, numeric_only, **kwargs)

11671

11672 setattr(cls, "sem", sem)

11673

11674 @doc(

11675 _num_ddof_doc,

11676 desc="Return unbiased variance over requested axis.\n\nNormalized by "

11677 "N-1 by default. This can be changed using the ddof argument.",

11678 name1=name1,

11679 name2=name2,

11680 axis_descr=axis_descr,

11681 notes="",

11682 examples=_var_examples,

11683 )

11684 def var(

11685 self,

11686 axis=None,

11687 skipna=True,

11688 level=None,

11689 ddof=1,

11690 numeric_only=None,

11691 **kwargs,

11692 ):

11693 return NDFrame.var(self, axis, skipna, level, ddof, numeric_only, **kwargs)

11694

11695 setattr(cls, "var", var)

11696

11697 @doc(

11698 _num_ddof_doc,

11699 desc="Return sample standard deviation over requested axis."

11700 "\n\nNormalized by N-1 by default. This can be changed using the "

11701 "ddof argument.",

11702 name1=name1,

11703 name2=name2,

11704 axis_descr=axis_descr,

11705 notes=_std_notes,

11706 examples=_std_examples,

11707 )

11708 def std(

11709 self,

11710 axis=None,

11711 skipna=True,

11712 level=None,

11713 ddof=1,

11714 numeric_only=None,

11715 **kwargs,

11716 ):

11717 return NDFrame.std(self, axis, skipna, level, ddof, numeric_only, **kwargs)

11718

11719 setattr(cls, "std", std)

11720

11721 @doc(

11722 _cnum_doc,

11723 desc="minimum",

11724 name1=name1,

11725 name2=name2,

11726 axis_descr=axis_descr,

11727 accum_func_name="min",

11728 examples=_cummin_examples,

11729 )

11730 def cummin(self, axis=None, skipna=True, *args, **kwargs):

11731 return NDFrame.cummin(self, axis, skipna, *args, **kwargs)

11732

11733 setattr(cls, "cummin", cummin)

11734

11735 @doc(

11736 _cnum_doc,

11737 desc="maximum",

11738 name1=name1,

11739 name2=name2,

11740 axis_descr=axis_descr,

11741 accum_func_name="max",

11742 examples=_cummax_examples,

11743 )

11744 def cummax(self, axis=None, skipna=True, *args, **kwargs):

11745 return NDFrame.cummax(self, axis, skipna, *args, **kwargs)

11746

11747 setattr(cls, "cummax", cummax)

11748

11749 @doc(

11750 _cnum_doc,

11751 desc="sum",

11752 name1=name1,

11753 name2=name2,

11754 axis_descr=axis_descr,

11755 accum_func_name="sum",

11756 examples=_cumsum_examples,

11757 )

11758 def cumsum(self, axis=None, skipna=True, *args, **kwargs):

11759 return NDFrame.cumsum(self, axis, skipna, *args, **kwargs)

11760

11761 setattr(cls, "cumsum", cumsum)

11762

11763 @doc(

11764 _cnum_doc,

11765 desc="product",

11766 name1=name1,

11767 name2=name2,

11768 axis_descr=axis_descr,

11769 accum_func_name="prod",

11770 examples=_cumprod_examples,

11771 )

11772 def cumprod(self, axis=None, skipna=True, *args, **kwargs):

11773 return NDFrame.cumprod(self, axis, skipna, *args, **kwargs)

11774

11775 setattr(cls, "cumprod", cumprod)

11776

11777 @doc(

11778 _num_doc,

11779 desc="Return the sum of the values over the requested axis.\n\n"

11780 "This is equivalent to the method ``numpy.sum``.",

11781 name1=name1,

11782 name2=name2,

11783 axis_descr=axis_descr,

11784 min_count=_min_count_stub,

11785 see_also=_stat_func_see_also,

11786 examples=_sum_examples,

11787 )

11788 def sum(

11789 self,

11790 axis=None,

11791 skipna=True,

11792 level=None,

11793 numeric_only=None,

11794 min_count=0,

11795 **kwargs,

11796 ):

11797 return NDFrame.sum(

11798 self, axis, skipna, level, numeric_only, min_count, **kwargs

11799 )

11800

11801 setattr(cls, "sum", sum)

11802

11803 @doc(

11804 _num_doc,

11805 desc="Return the product of the values over the requested axis.",

11806 name1=name1,

11807 name2=name2,

11808 axis_descr=axis_descr,

11809 min_count=_min_count_stub,

11810 see_also=_stat_func_see_also,

11811 examples=_prod_examples,

11812 )

11813 def prod(

11814 self,

11815 axis=None,

11816 skipna=True,

11817 level=None,

11818 numeric_only=None,

11819 min_count=0,

11820 **kwargs,

11821 ):

11822 return NDFrame.prod(

11823 self, axis, skipna, level, numeric_only, min_count, **kwargs

11824 )

11825

11826 setattr(cls, "prod", prod)

11827 cls.product = prod

11828

11829 @doc(

11830 _num_doc,

11831 desc="Return the mean of the values over the requested axis.",

11832 name1=name1,

11833 name2=name2,

11834 axis_descr=axis_descr,

11835 min_count="",

11836 see_also="",

11837 examples="",

11838 )

11839 def mean(

11840 self,

11841 axis: int | None | lib.NoDefault = lib.no_default,

11842 skipna=True,

11843 level=None,

11844 numeric_only=None,

11845 **kwargs,

11846 ):

11847 return NDFrame.mean(self, axis, skipna, level, numeric_only, **kwargs)

11848

11849 setattr(cls, "mean", mean)

11850

11851 @doc(

11852 _num_doc,

11853 desc="Return unbiased skew over requested axis.\n\nNormalized by N-1.",

11854 name1=name1,

11855 name2=name2,

11856 axis_descr=axis_descr,

11857 min_count="",

11858 see_also="",

11859 examples="",

11860 )

11861 def skew(

11862 self,

11863 axis: int | None | lib.NoDefault = lib.no_default,

11864 skipna=True,

11865 level=None,

11866 numeric_only=None,

11867 **kwargs,

11868 ):

11869 return NDFrame.skew(self, axis, skipna, level, numeric_only, **kwargs)

11870

11871 setattr(cls, "skew", skew)

11872

11873 @doc(

11874 _num_doc,

11875 desc="Return unbiased kurtosis over requested axis.\n\n"

11876 "Kurtosis obtained using Fisher's definition of\n"

11877 "kurtosis (kurtosis of normal == 0.0). Normalized "

11878 "by N-1.",

11879 name1=name1,

11880 name2=name2,

11881 axis_descr=axis_descr,

11882 min_count="",

11883 see_also="",

11884 examples="",

11885 )

11886 def kurt(

11887 self,

11888 axis: Axis | None | lib.NoDefault = lib.no_default,

11889 skipna=True,

11890 level=None,

11891 numeric_only=None,

11892 **kwargs,

11893 ):

11894 return NDFrame.kurt(self, axis, skipna, level, numeric_only, **kwargs)

11895

11896 setattr(cls, "kurt", kurt)

11897 cls.kurtosis = kurt

11898

11899 @doc(

11900 _num_doc,

11901 desc="Return the median of the values over the requested axis.",

11902 name1=name1,

11903 name2=name2,

11904 axis_descr=axis_descr,

11905 min_count="",

11906 see_also="",

11907 examples="",

11908 )

11909 def median(

11910 self,

11911 axis: int | None | lib.NoDefault = lib.no_default,

11912 skipna=True,

11913 level=None,

11914 numeric_only=None,

11915 **kwargs,

11916 ):

11917 return NDFrame.median(self, axis, skipna, level, numeric_only, **kwargs)

11918

11919 setattr(cls, "median", median)

11920

11921 @doc(

11922 _num_doc,

11923 desc="Return the maximum of the values over the requested axis.\n\n"

11924 "If you want the *index* of the maximum, use ``idxmax``. This is "

11925 "the equivalent of the ``numpy.ndarray`` method ``argmax``.",

11926 name1=name1,

11927 name2=name2,

11928 axis_descr=axis_descr,

11929 min_count="",

11930 see_also=_stat_func_see_also,

11931 examples=_max_examples,

11932 )

11933 def max(

11934 self,

11935 axis: int | None | lib.NoDefault = lib.no_default,

11936 skipna=True,

11937 level=None,

11938 numeric_only=None,

11939 **kwargs,

11940 ):

11941 return NDFrame.max(self, axis, skipna, level, numeric_only, **kwargs)

11942

11943 setattr(cls, "max", max)

11944

11945 @doc(

11946 _num_doc,

11947 desc="Return the minimum of the values over the requested axis.\n\n"

11948 "If you want the *index* of the minimum, use ``idxmin``. This is "

11949 "the equivalent of the ``numpy.ndarray`` method ``argmin``.",

11950 name1=name1,

11951 name2=name2,

11952 axis_descr=axis_descr,

11953 min_count="",

11954 see_also=_stat_func_see_also,

11955 examples=_min_examples,

11956 )

11957 def min(

11958 self,

11959 axis: int | None | lib.NoDefault = lib.no_default,

11960 skipna=True,

11961 level=None,

11962 numeric_only=None,

11963 **kwargs,

11964 ):

11965 return NDFrame.min(self, axis, skipna, level, numeric_only, **kwargs)

11966

11967 setattr(cls, "min", min)

11968

11969 @final

11970 @doc(Rolling)

11971 def rolling(

11972 self,

11973 window: int | timedelta | BaseOffset | BaseIndexer,

11974 min_periods: int | None = None,

11975 center: bool_t = False,

11976 win_type: str | None = None,

11977 on: str | None = None,

11978 axis: Axis = 0,

11979 closed: str | None = None,

11980 step: int | None = None,

11981 method: str = "single",

11982 ) -> Window | Rolling:

11983 axis = self._get_axis_number(axis)

11984

11985 if win_type is not None:

11986 return Window(

11987 self,

11988 window=window,

11989 min_periods=min_periods,

11990 center=center,

11991 win_type=win_type,

11992 on=on,

11993 axis=axis,

11994 closed=closed,

11995 step=step,

11996 method=method,

11997 )

11998

11999 return Rolling(

12000 self,

12001 window=window,

12002 min_periods=min_periods,

12003 center=center,

12004 win_type=win_type,

12005 on=on,

12006 axis=axis,

12007 closed=closed,

12008 step=step,

12009 method=method,

12010 )

12011

12012 @final

12013 @doc(Expanding)

12014 def expanding(

12015 self,

12016 min_periods: int = 1,

12017 center: bool_t | None = None,

12018 axis: Axis = 0,

12019 method: str = "single",

12020 ) -> Expanding:

12021 axis = self._get_axis_number(axis)

12022 if center is not None:

12023 warnings.warn(

12024 "The `center` argument on `expanding` will be removed in the future.",

12025 FutureWarning,

12026 stacklevel=find_stack_level(),

12027 )

12028 else:

12029 center = False

12030

12031 return Expanding(

12032 self, min_periods=min_periods, center=center, axis=axis, method=method

12033 )

12034

12035 @final

12036 @doc(ExponentialMovingWindow)

12037 def ewm(

12038 self,

12039 com: float | None = None,

12040 span: float | None = None,

12041 halflife: float | TimedeltaConvertibleTypes | None = None,

12042 alpha: float | None = None,

12043 min_periods: int | None = 0,

12044 adjust: bool_t = True,

12045 ignore_na: bool_t = False,

12046 axis: Axis = 0,

12047 times: str | np.ndarray | DataFrame | Series | None = None,

12048 method: str = "single",

12049 ) -> ExponentialMovingWindow:

12050 axis = self._get_axis_number(axis)

12051 return ExponentialMovingWindow(

12052 self,

12053 com=com,

12054 span=span,

12055 halflife=halflife,

12056 alpha=alpha,

12057 min_periods=min_periods,

12058 adjust=adjust,

12059 ignore_na=ignore_na,

12060 axis=axis,

12061 times=times,

12062 method=method,

12063 )

12064

12065 # ----------------------------------------------------------------------

12066 # Arithmetic Methods

12067

12068 @final

12069 def _inplace_method(self, other, op):

12070 """

12071 Wrap arithmetic method to operate inplace.

12072 """

12073 result = op(self, other)

12074

12075 if (

12076 self.ndim == 1

12077 and result._indexed_same(self)

12078 and is_dtype_equal(result.dtype, self.dtype)

12079 ):

12080 # GH#36498 this inplace op can _actually_ be inplace.

12081 self._values[:] = result._values

12082 return self

12083

12084 # Delete cacher

12085 self._reset_cacher()

12086

12087 # this makes sure that we are aligned like the input

12088 # we are updating inplace so we want to ignore is_copy

12089 self._update_inplace(

12090 result.reindex_like(self, copy=False), verify_is_copy=False

12091 )

12092 return self

12093

12094 def __iadd__(self: NDFrameT, other) -> NDFrameT:

12095 # error: Unsupported left operand type for + ("Type[NDFrame]")

12096 return self._inplace_method(other, type(self).__add__) # type: ignore[operator]

12097

12098 def __isub__(self: NDFrameT, other) -> NDFrameT:

12099 # error: Unsupported left operand type for - ("Type[NDFrame]")

12100 return self._inplace_method(other, type(self).__sub__) # type: ignore[operator]

12101

12102 def __imul__(self: NDFrameT, other) -> NDFrameT:

12103 # error: Unsupported left operand type for * ("Type[NDFrame]")

12104 return self._inplace_method(other, type(self).__mul__) # type: ignore[operator]

12105

12106 def __itruediv__(self: NDFrameT, other) -> NDFrameT:

12107 # error: Unsupported left operand type for / ("Type[NDFrame]")

12108 return self._inplace_method(

12109 other, type(self).__truediv__ # type: ignore[operator]

12110 )

12111

12112 def __ifloordiv__(self: NDFrameT, other) -> NDFrameT:

12113 # error: Unsupported left operand type for // ("Type[NDFrame]")

12114 return self._inplace_method(

12115 other, type(self).__floordiv__ # type: ignore[operator]

12116 )

12117

12118 def __imod__(self: NDFrameT, other) -> NDFrameT:

12119 # error: Unsupported left operand type for % ("Type[NDFrame]")

12120 return self._inplace_method(other, type(self).__mod__) # type: ignore[operator]

12121

12122 def __ipow__(self: NDFrameT, other) -> NDFrameT:

12123 # error: Unsupported left operand type for ** ("Type[NDFrame]")

12124 return self._inplace_method(other, type(self).__pow__) # type: ignore[operator]

12125

12126 def __iand__(self: NDFrameT, other) -> NDFrameT:

12127 # error: Unsupported left operand type for & ("Type[NDFrame]")

12128 return self._inplace_method(other, type(self).__and__) # type: ignore[operator]

12129

12130 def __ior__(self: NDFrameT, other) -> NDFrameT:

12131 # error: Unsupported left operand type for | ("Type[NDFrame]")

12132 return self._inplace_method(other, type(self).__or__) # type: ignore[operator]

12133

12134 def __ixor__(self: NDFrameT, other) -> NDFrameT:

12135 # error: Unsupported left operand type for ^ ("Type[NDFrame]")

12136 return self._inplace_method(other, type(self).__xor__) # type: ignore[operator]

12137

12138 # ----------------------------------------------------------------------

12139 # Misc methods

12140

12141 @final

12142 def _find_valid_index(self, *, how: str) -> Hashable | None:

12143 """

12144 Retrieves the index of the first valid value.

12145

12146 Parameters

12147 ----------

12148 how : {'first', 'last'}

12149 Use this parameter to change between the first or last valid index.

12150

12151 Returns

12152 -------

12153 idx_first_valid : type of index

12154 """

12155 idxpos = find_valid_index(self._values, how=how)

12156 if idxpos is None:

12157 return None

12158 return self.index[idxpos]

12159

12160 @final

12161 @doc(position="first", klass=_shared_doc_kwargs["klass"])

12162 def first_valid_index(self) -> Hashable | None:

12163 """

12164 Return index for {position} non-NA value or None, if no non-NA value is found.

12165

12166 Returns

12167 -------

12168 scalar : type of index

12169

12170 Notes

12171 -----

12172 If all elements are non-NA/null, returns None.

12173 Also returns None for empty {klass}.

12174 """

12175 return self._find_valid_index(how="first")

12176

12177 @final

12178 @doc(first_valid_index, position="last", klass=_shared_doc_kwargs["klass"])

12179 def last_valid_index(self) -> Hashable | None:

12180 return self._find_valid_index(how="last")

12181

12182

12183def _doc_params(cls):

12184 """Return a tuple of the doc params."""

12185 axis_descr = (

12186 f"{{{', '.join([f'{a} ({i})' for i, a in enumerate(cls._AXIS_ORDERS)])}}}"

12187 )

12188 name = cls._constructor_sliced.__name__ if cls._AXIS_LEN > 1 else "scalar"

12189 name2 = cls.__name__

12190 return axis_descr, name, name2

12191

12192

12193_num_doc = """

12194{desc}

12195

12196Parameters

12197----------

12198axis : {axis_descr}

12199 Axis for the function to be applied on.

12200 For `Series` this parameter is unused and defaults to 0.

12201skipna : bool, default True

12202 Exclude NA/null values when computing the result.

12203level : int or level name, default None

12204 If the axis is a MultiIndex (hierarchical), count along a

12205 particular level, collapsing into a {name1}.

12206

12207 .. deprecated:: 1.3.0

12208 The level keyword is deprecated. Use groupby instead.

12209numeric_only : bool, default None

12210 Include only float, int, boolean columns. If None, will attempt to use

12211 everything, then use only numeric data. Not implemented for Series.

12212

12213 .. deprecated:: 1.5.0

12214 Specifying ``numeric_only=None`` is deprecated. The default value will be

12215 ``False`` in a future version of pandas.

12216

12217{min_count}\

12218**kwargs

12219 Additional keyword arguments to be passed to the function.

12220

12221Returns

12222-------

12223{name1} or {name2} (if level specified)\

12224{see_also}\

12225{examples}

12226"""

12227

12228_num_ddof_doc = """

12229{desc}

12230

12231Parameters

12232----------

12233axis : {axis_descr}

12234 For `Series` this parameter is unused and defaults to 0.

12235skipna : bool, default True

12236 Exclude NA/null values. If an entire row/column is NA, the result

12237 will be NA.

12238level : int or level name, default None

12239 If the axis is a MultiIndex (hierarchical), count along a

12240 particular level, collapsing into a {name1}.

12241

12242 .. deprecated:: 1.3.0

12243 The level keyword is deprecated. Use groupby instead.

12244ddof : int, default 1

12245 Delta Degrees of Freedom. The divisor used in calculations is N - ddof,

12246 where N represents the number of elements.

12247numeric_only : bool, default None

12248 Include only float, int, boolean columns. If None, will attempt to use

12249 everything, then use only numeric data. Not implemented for Series.

12250

12251 .. deprecated:: 1.5.0

12252 Specifying ``numeric_only=None`` is deprecated. The default value will be

12253 ``False`` in a future version of pandas.

12254

12255Returns

12256-------

12257{name1} or {name2} (if level specified) \

12258{notes}\

12259{examples}

12260"""

12261

12262_std_notes = """

12263

12264Notes

12265-----

12266To have the same behaviour as `numpy.std`, use `ddof=0` (instead of the

12267default `ddof=1`)"""

12268

12269_std_examples = """

12270

12271Examples

12272--------

12273>>> df = pd.DataFrame({'person_id': [0, 1, 2, 3],

12274... 'age': [21, 25, 62, 43],

12275... 'height': [1.61, 1.87, 1.49, 2.01]}

12276... ).set_index('person_id')

12277>>> df

12278 age height

12279person_id

122800 21 1.61

122811 25 1.87

122822 62 1.49

122833 43 2.01

12284

12285The standard deviation of the columns can be found as follows:

12286

12287>>> df.std()

12288age 18.786076

12289height 0.237417

12290

12291Alternatively, `ddof=0` can be set to normalize by N instead of N-1:

12292

12293>>> df.std(ddof=0)

12294age 16.269219

12295height 0.205609"""

12296

12297_var_examples = """

12298

12299Examples

12300--------

12301>>> df = pd.DataFrame({'person_id': [0, 1, 2, 3],

12302... 'age': [21, 25, 62, 43],

12303... 'height': [1.61, 1.87, 1.49, 2.01]}

12304... ).set_index('person_id')

12305>>> df

12306 age height

12307person_id

123080 21 1.61

123091 25 1.87

123102 62 1.49

123113 43 2.01

12312

12313>>> df.var()

12314age 352.916667

12315height 0.056367

12316

12317Alternatively, ``ddof=0`` can be set to normalize by N instead of N-1:

12318

12319>>> df.var(ddof=0)

12320age 264.687500

12321height 0.042275"""

12322

12323_bool_doc = """

12324{desc}

12325

12326Parameters

12327----------

12328axis : {{0 or 'index', 1 or 'columns', None}}, default 0

12329 Indicate which axis or axes should be reduced. For `Series` this parameter

12330 is unused and defaults to 0.

12331

12332 * 0 / 'index' : reduce the index, return a Series whose index is the

12333 original column labels.

12334 * 1 / 'columns' : reduce the columns, return a Series whose index is the

12335 original index.

12336 * None : reduce all axes, return a scalar.

12337

12338bool_only : bool, default None

12339 Include only boolean columns. If None, will attempt to use everything,

12340 then use only boolean data. Not implemented for Series.

12341skipna : bool, default True

12342 Exclude NA/null values. If the entire row/column is NA and skipna is

12343 True, then the result will be {empty_value}, as for an empty row/column.

12344 If skipna is False, then NA are treated as True, because these are not

12345 equal to zero.

12346level : int or level name, default None

12347 If the axis is a MultiIndex (hierarchical), count along a

12348 particular level, collapsing into a {name1}.

12349

12350 .. deprecated:: 1.3.0

12351 The level keyword is deprecated. Use groupby instead.

12352**kwargs : any, default None

12353 Additional keywords have no effect but might be accepted for

12354 compatibility with NumPy.

12355

12356Returns

12357-------

12358{name1} or {name2}

12359 If level is specified, then, {name2} is returned; otherwise, {name1}

12360 is returned.

12361

12362{see_also}

12363{examples}"""

12364

12365_all_desc = """\

12366Return whether all elements are True, potentially over an axis.

12367

12368Returns True unless there at least one element within a series or

12369along a Dataframe axis that is False or equivalent (e.g. zero or

12370empty)."""

12371

12372_all_examples = """\

12373Examples

12374--------

12375**Series**

12376

12377>>> pd.Series([True, True]).all()

12378True

12379>>> pd.Series([True, False]).all()

12380False

12381>>> pd.Series([], dtype="float64").all()

12382True

12383>>> pd.Series([np.nan]).all()

12384True

12385>>> pd.Series([np.nan]).all(skipna=False)

12386True

12387

12388**DataFrames**

12389

12390Create a dataframe from a dictionary.

12391

12392>>> df = pd.DataFrame({'col1': [True, True], 'col2': [True, False]})

12393>>> df

12394 col1 col2

123950 True True

123961 True False

12397

12398Default behaviour checks if values in each column all return True.

12399

12400>>> df.all()

12401col1 True

12402col2 False

12403dtype: bool

12404

12405Specify ``axis='columns'`` to check if values in each row all return True.

12406

12407>>> df.all(axis='columns')

124080 True

124091 False

12410dtype: bool

12411

12412Or ``axis=None`` for whether every value is True.

12413

12414>>> df.all(axis=None)

12415False

12416"""

12417

12418_all_see_also = """\

12419See Also

12420--------

12421Series.all : Return True if all elements are True.

12422DataFrame.any : Return True if one (or more) elements are True.

12423"""

12424

12425_cnum_doc = """

12426Return cumulative {desc} over a DataFrame or Series axis.

12427

12428Returns a DataFrame or Series of the same size containing the cumulative

12429{desc}.

12430

12431Parameters

12432----------

12433axis : {{0 or 'index', 1 or 'columns'}}, default 0

12434 The index or the name of the axis. 0 is equivalent to None or 'index'.

12435 For `Series` this parameter is unused and defaults to 0.

12436skipna : bool, default True

12437 Exclude NA/null values. If an entire row/column is NA, the result

12438 will be NA.

12439*args, **kwargs

12440 Additional keywords have no effect but might be accepted for

12441 compatibility with NumPy.

12442

12443Returns

12444-------

12445{name1} or {name2}

12446 Return cumulative {desc} of {name1} or {name2}.

12447

12448See Also

12449--------

12450core.window.expanding.Expanding.{accum_func_name} : Similar functionality

12451 but ignores ``NaN`` values.

12452{name2}.{accum_func_name} : Return the {desc} over

12453 {name2} axis.

12454{name2}.cummax : Return cumulative maximum over {name2} axis.

12455{name2}.cummin : Return cumulative minimum over {name2} axis.

12456{name2}.cumsum : Return cumulative sum over {name2} axis.

12457{name2}.cumprod : Return cumulative product over {name2} axis.

12458

12459{examples}"""

12460

12461_cummin_examples = """\

12462Examples

12463--------

12464**Series**

12465

12466>>> s = pd.Series([2, np.nan, 5, -1, 0])

12467>>> s

124680 2.0

124691 NaN

124702 5.0

124713 -1.0

124724 0.0

12473dtype: float64

12474

12475By default, NA values are ignored.

12476

12477>>> s.cummin()

124780 2.0

124791 NaN

124802 2.0

124813 -1.0

124824 -1.0

12483dtype: float64

12484

12485To include NA values in the operation, use ``skipna=False``

12486

12487>>> s.cummin(skipna=False)

124880 2.0

124891 NaN

124902 NaN

124913 NaN

124924 NaN

12493dtype: float64

12494

12495**DataFrame**

12496

12497>>> df = pd.DataFrame([[2.0, 1.0],

12498... [3.0, np.nan],

12499... [1.0, 0.0]],

12500... columns=list('AB'))

12501>>> df

12502 A B

125030 2.0 1.0

125041 3.0 NaN

125052 1.0 0.0

12506

12507By default, iterates over rows and finds the minimum

12508in each column. This is equivalent to ``axis=None`` or ``axis='index'``.

12509

12510>>> df.cummin()

12511 A B

125120 2.0 1.0

125131 2.0 NaN

125142 1.0 0.0

12515

12516To iterate over columns and find the minimum in each row,

12517use ``axis=1``

12518

12519>>> df.cummin(axis=1)

12520 A B

125210 2.0 1.0

125221 3.0 NaN

125232 1.0 0.0

12524"""

12525

12526_cumsum_examples = """\

12527Examples

12528--------

12529**Series**

12530

12531>>> s = pd.Series([2, np.nan, 5, -1, 0])

12532>>> s

125330 2.0

125341 NaN

125352 5.0

125363 -1.0

125374 0.0

12538dtype: float64

12539

12540By default, NA values are ignored.

12541

12542>>> s.cumsum()

125430 2.0

125441 NaN

125452 7.0

125463 6.0

125474 6.0

12548dtype: float64

12549

12550To include NA values in the operation, use ``skipna=False``

12551

12552>>> s.cumsum(skipna=False)

125530 2.0

125541 NaN

125552 NaN

125563 NaN

125574 NaN

12558dtype: float64

12559

12560**DataFrame**

12561

12562>>> df = pd.DataFrame([[2.0, 1.0],

12563... [3.0, np.nan],

12564... [1.0, 0.0]],

12565... columns=list('AB'))

12566>>> df

12567 A B

125680 2.0 1.0

125691 3.0 NaN

125702 1.0 0.0

12571

12572By default, iterates over rows and finds the sum

12573in each column. This is equivalent to ``axis=None`` or ``axis='index'``.

12574

12575>>> df.cumsum()

12576 A B

125770 2.0 1.0

125781 5.0 NaN

125792 6.0 1.0

12580

12581To iterate over columns and find the sum in each row,

12582use ``axis=1``

12583

12584>>> df.cumsum(axis=1)

12585 A B

125860 2.0 3.0

125871 3.0 NaN

125882 1.0 1.0

12589"""

12590

12591_cumprod_examples = """\

12592Examples

12593--------

12594**Series**

12595

12596>>> s = pd.Series([2, np.nan, 5, -1, 0])

12597>>> s

125980 2.0

125991 NaN

126002 5.0

126013 -1.0

126024 0.0

12603dtype: float64

12604

12605By default, NA values are ignored.

12606

12607>>> s.cumprod()

126080 2.0

126091 NaN

126102 10.0

126113 -10.0

126124 -0.0

12613dtype: float64

12614

12615To include NA values in the operation, use ``skipna=False``

12616

12617>>> s.cumprod(skipna=False)

126180 2.0

126191 NaN

126202 NaN

126213 NaN

126224 NaN

12623dtype: float64

12624

12625**DataFrame**

12626

12627>>> df = pd.DataFrame([[2.0, 1.0],

12628... [3.0, np.nan],

12629... [1.0, 0.0]],

12630... columns=list('AB'))

12631>>> df

12632 A B

126330 2.0 1.0

126341 3.0 NaN

126352 1.0 0.0

12636

12637By default, iterates over rows and finds the product

12638in each column. This is equivalent to ``axis=None`` or ``axis='index'``.

12639

12640>>> df.cumprod()

12641 A B

126420 2.0 1.0

126431 6.0 NaN

126442 6.0 0.0

12645

12646To iterate over columns and find the product in each row,

12647use ``axis=1``

12648

12649>>> df.cumprod(axis=1)

12650 A B

126510 2.0 2.0

126521 3.0 NaN

126532 1.0 0.0

12654"""

12655

12656_cummax_examples = """\

12657Examples

12658--------

12659**Series**

12660

12661>>> s = pd.Series([2, np.nan, 5, -1, 0])

12662>>> s

126630 2.0

126641 NaN

126652 5.0

126663 -1.0

126674 0.0

12668dtype: float64

12669

12670By default, NA values are ignored.

12671

12672>>> s.cummax()

126730 2.0

126741 NaN

126752 5.0

126763 5.0

126774 5.0

12678dtype: float64

12679

12680To include NA values in the operation, use ``skipna=False``

12681

12682>>> s.cummax(skipna=False)

126830 2.0

126841 NaN

126852 NaN

126863 NaN

126874 NaN

12688dtype: float64

12689

12690**DataFrame**

12691

12692>>> df = pd.DataFrame([[2.0, 1.0],

12693... [3.0, np.nan],

12694... [1.0, 0.0]],

12695... columns=list('AB'))

12696>>> df

12697 A B

126980 2.0 1.0

126991 3.0 NaN

127002 1.0 0.0

12701

12702By default, iterates over rows and finds the maximum

12703in each column. This is equivalent to ``axis=None`` or ``axis='index'``.

12704

12705>>> df.cummax()

12706 A B

127070 2.0 1.0

127081 3.0 NaN

127092 3.0 1.0

12710

12711To iterate over columns and find the maximum in each row,

12712use ``axis=1``

12713

12714>>> df.cummax(axis=1)

12715 A B

127160 2.0 2.0

127171 3.0 NaN

127182 1.0 1.0

12719"""

12720

12721_any_see_also = """\

12722See Also

12723--------

12724numpy.any : Numpy version of this method.

12725Series.any : Return whether any element is True.

12726Series.all : Return whether all elements are True.

12727DataFrame.any : Return whether any element is True over requested axis.

12728DataFrame.all : Return whether all elements are True over requested axis.

12729"""

12730

12731_any_desc = """\

12732Return whether any element is True, potentially over an axis.

12733

12734Returns False unless there is at least one element within a series or

12735along a Dataframe axis that is True or equivalent (e.g. non-zero or

12736non-empty)."""

12737

12738_any_examples = """\

12739Examples

12740--------

12741**Series**

12742

12743For Series input, the output is a scalar indicating whether any element

12744is True.

12745

12746>>> pd.Series([False, False]).any()

12747False

12748>>> pd.Series([True, False]).any()

12749True

12750>>> pd.Series([], dtype="float64").any()

12751False

12752>>> pd.Series([np.nan]).any()

12753False

12754>>> pd.Series([np.nan]).any(skipna=False)

12755True

12756

12757**DataFrame**

12758

12759Whether each column contains at least one True element (the default).

12760

12761>>> df = pd.DataFrame({"A": [1, 2], "B": [0, 2], "C": [0, 0]})

12762>>> df

12763 A B C

127640 1 0 0

127651 2 2 0

12766

12767>>> df.any()

12768A True

12769B True

12770C False

12771dtype: bool

12772

12773Aggregating over the columns.

12774

12775>>> df = pd.DataFrame({"A": [True, False], "B": [1, 2]})

12776>>> df

12777 A B

127780 True 1

127791 False 2

12780

12781>>> df.any(axis='columns')

127820 True

127831 True

12784dtype: bool

12785

12786>>> df = pd.DataFrame({"A": [True, False], "B": [1, 0]})

12787>>> df

12788 A B

127890 True 1

127901 False 0

12791

12792>>> df.any(axis='columns')

127930 True

127941 False

12795dtype: bool

12796

12797Aggregating over the entire DataFrame with ``axis=None``.

12798

12799>>> df.any(axis=None)

12800True

12801

12802`any` for an empty DataFrame is an empty Series.

12803

12804>>> pd.DataFrame([]).any()

12805Series([], dtype: bool)

12806"""

12807

12808_shared_docs[

12809 "stat_func_example"

12810] = """

12811

12812Examples

12813--------

12814>>> idx = pd.MultiIndex.from_arrays([

12815... ['warm', 'warm', 'cold', 'cold'],

12816... ['dog', 'falcon', 'fish', 'spider']],

12817... names=['blooded', 'animal'])

12818>>> s = pd.Series([4, 2, 0, 8], name='legs', index=idx)

12819>>> s

12820blooded animal

12821warm dog 4

12822 falcon 2

12823cold fish 0

12824 spider 8

12825Name: legs, dtype: int64

12826

12827>>> s.{stat_func}()

12828{default_output}"""

12829

12830_sum_examples = _shared_docs["stat_func_example"].format(

12831 stat_func="sum", verb="Sum", default_output=14, level_output_0=6, level_output_1=8

12832)

12833

12834_sum_examples += """

12835

12836By default, the sum of an empty or all-NA Series is ``0``.

12837

12838>>> pd.Series([], dtype="float64").sum() # min_count=0 is the default

128390.0

12840

12841This can be controlled with the ``min_count`` parameter. For example, if

12842you'd like the sum of an empty series to be NaN, pass ``min_count=1``.

12843

12844>>> pd.Series([], dtype="float64").sum(min_count=1)

12845nan

12846

12847Thanks to the ``skipna`` parameter, ``min_count`` handles all-NA and

12848empty series identically.

12849

12850>>> pd.Series([np.nan]).sum()

128510.0

12852

12853>>> pd.Series([np.nan]).sum(min_count=1)

12854nan"""

12855

12856_max_examples: str = _shared_docs["stat_func_example"].format(

12857 stat_func="max", verb="Max", default_output=8, level_output_0=4, level_output_1=8

12858)

12859

12860_min_examples: str = _shared_docs["stat_func_example"].format(

12861 stat_func="min", verb="Min", default_output=0, level_output_0=2, level_output_1=0

12862)

12863

12864_stat_func_see_also = """

12865

12866See Also

12867--------

12868Series.sum : Return the sum.

12869Series.min : Return the minimum.

12870Series.max : Return the maximum.

12871Series.idxmin : Return the index of the minimum.

12872Series.idxmax : Return the index of the maximum.

12873DataFrame.sum : Return the sum over the requested axis.

12874DataFrame.min : Return the minimum over the requested axis.

12875DataFrame.max : Return the maximum over the requested axis.

12876DataFrame.idxmin : Return the index of the minimum over the requested axis.

12877DataFrame.idxmax : Return the index of the maximum over the requested axis."""

12878

12879_prod_examples = """

12880

12881Examples

12882--------

12883By default, the product of an empty or all-NA Series is ``1``

12884

12885>>> pd.Series([], dtype="float64").prod()

128861.0

12887

12888This can be controlled with the ``min_count`` parameter

12889

12890>>> pd.Series([], dtype="float64").prod(min_count=1)

12891nan

12892

12893Thanks to the ``skipna`` parameter, ``min_count`` handles all-NA and

12894empty series identically.

12895

12896>>> pd.Series([np.nan]).prod()

128971.0

12898

12899>>> pd.Series([np.nan]).prod(min_count=1)

12900nan"""

12901

12902_min_count_stub = """\

12903min_count : int, default 0

12904 The required number of valid values to perform the operation. If fewer than

12905 ``min_count`` non-NA values are present the result will be NA.

12906"""

12907

12908

12909def _align_as_utc(

12910 left: NDFrameT, right: NDFrameT, join_index: Index | None

12911) -> tuple[NDFrameT, NDFrameT]:

12912 """

12913 If we are aligning timezone-aware DatetimeIndexes and the timezones

12914 do not match, convert both to UTC.

12915 """

12916 if is_datetime64tz_dtype(left.index.dtype):

12917 if left.index.tz != right.index.tz:

12918 if join_index is not None:

12919 # GH#33671 ensure we don't change the index on

12920 # our original Series (NB: by default deep=False)

12921 left = left.copy()

12922 right = right.copy()

12923 left.index = join_index

12924 right.index = join_index

12925

12926 return left, right