Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/generic.py: 19%

2323 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1# pyright: reportPropertyTypeMismatch=false 

2from __future__ import annotations 

3 

4import collections 

5from datetime import timedelta 

6import functools 

7import gc 

8import json 

9import operator 

10import pickle 

11import re 

12from typing import ( 

13 TYPE_CHECKING, 

14 Any, 

15 Callable, 

16 ClassVar, 

17 Hashable, 

18 Literal, 

19 Mapping, 

20 NoReturn, 

21 Sequence, 

22 Type, 

23 cast, 

24 final, 

25 overload, 

26) 

27import warnings 

28import weakref 

29 

30import numpy as np 

31 

32from pandas._config import config 

33 

34from pandas._libs import lib 

35from pandas._libs.tslibs import ( 

36 Period, 

37 Tick, 

38 Timestamp, 

39 to_offset, 

40) 

41from pandas._typing import ( 

42 AnyArrayLike, 

43 ArrayLike, 

44 Axis, 

45 ColspaceArgType, 

46 CompressionOptions, 

47 Dtype, 

48 DtypeArg, 

49 DtypeObj, 

50 FilePath, 

51 FillnaOptions, 

52 FloatFormatType, 

53 FormattersType, 

54 Frequency, 

55 IgnoreRaise, 

56 IndexKeyFunc, 

57 IndexLabel, 

58 IntervalClosedType, 

59 JSONSerializable, 

60 Level, 

61 Manager, 

62 NaPosition, 

63 NDFrameT, 

64 RandomState, 

65 Renamer, 

66 SortKind, 

67 StorageOptions, 

68 Suffixes, 

69 T, 

70 TimedeltaConvertibleTypes, 

71 TimestampConvertibleTypes, 

72 ValueKeyFunc, 

73 WriteBuffer, 

74 npt, 

75) 

76from pandas.compat._optional import import_optional_dependency 

77from pandas.compat.numpy import function as nv 

78from pandas.errors import ( 

79 AbstractMethodError, 

80 InvalidIndexError, 

81 SettingWithCopyError, 

82 SettingWithCopyWarning, 

83) 

84from pandas.util._decorators import ( 

85 deprecate_kwarg, 

86 deprecate_nonkeyword_arguments, 

87 doc, 

88 rewrite_axis_style_signature, 

89) 

90from pandas.util._exceptions import find_stack_level 

91from pandas.util._validators import ( 

92 validate_ascending, 

93 validate_bool_kwarg, 

94 validate_fillna_kwargs, 

95 validate_inclusive, 

96) 

97 

98from pandas.core.dtypes.common import ( 

99 ensure_object, 

100 ensure_platform_int, 

101 ensure_str, 

102 is_bool, 

103 is_bool_dtype, 

104 is_datetime64_any_dtype, 

105 is_datetime64tz_dtype, 

106 is_dict_like, 

107 is_dtype_equal, 

108 is_extension_array_dtype, 

109 is_float, 

110 is_list_like, 

111 is_number, 

112 is_numeric_dtype, 

113 is_re_compilable, 

114 is_scalar, 

115 is_timedelta64_dtype, 

116 pandas_dtype, 

117) 

118from pandas.core.dtypes.generic import ( 

119 ABCDataFrame, 

120 ABCSeries, 

121) 

122from pandas.core.dtypes.inference import ( 

123 is_hashable, 

124 is_nested_list_like, 

125) 

126from pandas.core.dtypes.missing import ( 

127 isna, 

128 notna, 

129) 

130 

131from pandas.core import ( 

132 algorithms as algos, 

133 arraylike, 

134 common as com, 

135 indexing, 

136 missing, 

137 nanops, 

138 sample, 

139) 

140from pandas.core.array_algos.replace import should_use_regex 

141from pandas.core.arrays import ExtensionArray 

142from pandas.core.base import PandasObject 

143from pandas.core.construction import ( 

144 create_series_with_explicit_dtype, 

145 extract_array, 

146) 

147from pandas.core.describe import describe_ndframe 

148from pandas.core.flags import Flags 

149from pandas.core.indexes.api import ( 

150 DatetimeIndex, 

151 Index, 

152 MultiIndex, 

153 PeriodIndex, 

154 RangeIndex, 

155 default_index, 

156 ensure_index, 

157) 

158from pandas.core.internals import ( 

159 ArrayManager, 

160 BlockManager, 

161 SingleArrayManager, 

162) 

163from pandas.core.internals.construction import mgr_to_mgr 

164from pandas.core.missing import find_valid_index 

165from pandas.core.ops import align_method_FRAME 

166from pandas.core.reshape.concat import concat 

167from pandas.core.shared_docs import _shared_docs 

168from pandas.core.sorting import get_indexer_indexer 

169from pandas.core.window import ( 

170 Expanding, 

171 ExponentialMovingWindow, 

172 Rolling, 

173 Window, 

174) 

175 

176from pandas.io.formats import format as fmt 

177from pandas.io.formats.format import ( 

178 DataFrameFormatter, 

179 DataFrameRenderer, 

180) 

181from pandas.io.formats.printing import pprint_thing 

182 

183if TYPE_CHECKING: 183 ↛ 185line 183 didn't jump to line 185, because the condition on line 183 was never true

184 

185 from pandas._libs.tslibs import BaseOffset 

186 

187 from pandas.core.frame import DataFrame 

188 from pandas.core.indexers.objects import BaseIndexer 

189 from pandas.core.resample import Resampler 

190 from pandas.core.series import Series 

191 

192 from pandas.io.pytables import HDFStore 

193 

194 

195# goal is to be able to define the docs close to function, while still being 

196# able to share 

197_shared_docs = {**_shared_docs} 

198_shared_doc_kwargs = { 

199 "axes": "keywords for axes", 

200 "klass": "Series/DataFrame", 

201 "axes_single_arg": "int or labels for object", 

202 "args_transpose": "axes to permute (int or label for object)", 

203 "inplace": """ 

204 inplace : bool, default False 

205 If True, performs operation inplace and returns None.""", 

206 "optional_by": """ 

207 by : str or list of str 

208 Name or list of names to sort by""", 

209 "replace_iloc": """ 

210 This differs from updating with ``.loc`` or ``.iloc``, which require 

211 you to specify a location to update with some value.""", 

212} 

213 

214 

215bool_t = bool # Need alias because NDFrame has def bool: 

216 

217 

218class NDFrame(PandasObject, indexing.IndexingMixin): 

219 """ 

220 N-dimensional analogue of DataFrame. Store multi-dimensional in a 

221 size-mutable, labeled data structure 

222 

223 Parameters 

224 ---------- 

225 data : BlockManager 

226 axes : list 

227 copy : bool, default False 

228 """ 

229 

230 _internal_names: list[str] = [ 

231 "_mgr", 

232 "_cacher", 

233 "_item_cache", 

234 "_cache", 

235 "_is_copy", 

236 "_subtyp", 

237 "_name", 

238 "_default_kind", 

239 "_default_fill_value", 

240 "_metadata", 

241 "__array_struct__", 

242 "__array_interface__", 

243 "_flags", 

244 ] 

245 _internal_names_set: set[str] = set(_internal_names) 

246 _accessors: set[str] = set() 

247 _hidden_attrs: frozenset[str] = frozenset( 

248 ["_AXIS_NAMES", "_AXIS_NUMBERS", "get_values", "tshift"] 

249 ) 

250 _metadata: list[str] = [] 

251 _is_copy: weakref.ReferenceType[NDFrame] | None = None 

252 _mgr: Manager 

253 _attrs: dict[Hashable, Any] 

254 _typ: str 

255 

256 # ---------------------------------------------------------------------- 

257 # Constructors 

258 

259 def __init__( 

260 self, 

261 data: Manager, 

262 copy: bool_t = False, 

263 attrs: Mapping[Hashable, Any] | None = None, 

264 ) -> None: 

265 # copy kwarg is retained for mypy compat, is not used 

266 

267 object.__setattr__(self, "_is_copy", None) 

268 object.__setattr__(self, "_mgr", data) 

269 object.__setattr__(self, "_item_cache", {}) 

270 if attrs is None: 

271 attrs = {} 

272 else: 

273 attrs = dict(attrs) 

274 object.__setattr__(self, "_attrs", attrs) 

275 object.__setattr__(self, "_flags", Flags(self, allows_duplicate_labels=True)) 

276 

277 @classmethod 

278 def _init_mgr( 

279 cls, 

280 mgr: Manager, 

281 axes, 

282 dtype: Dtype | None = None, 

283 copy: bool_t = False, 

284 ) -> Manager: 

285 """passed a manager and a axes dict""" 

286 for a, axe in axes.items(): 

287 if axe is not None: 

288 axe = ensure_index(axe) 

289 bm_axis = cls._get_block_manager_axis(a) 

290 mgr = mgr.reindex_axis(axe, axis=bm_axis) 

291 

292 # make a copy if explicitly requested 

293 if copy: 

294 mgr = mgr.copy() 

295 if dtype is not None: 

296 # avoid further copies if we can 

297 if ( 

298 isinstance(mgr, BlockManager) 

299 and len(mgr.blocks) == 1 

300 and is_dtype_equal(mgr.blocks[0].values.dtype, dtype) 

301 ): 

302 pass 

303 else: 

304 mgr = mgr.astype(dtype=dtype) 

305 return mgr 

306 

307 def _as_manager(self: NDFrameT, typ: str, copy: bool_t = True) -> NDFrameT: 

308 """ 

309 Private helper function to create a DataFrame with specific manager. 

310 

311 Parameters 

312 ---------- 

313 typ : {"block", "array"} 

314 copy : bool, default True 

315 Only controls whether the conversion from Block->ArrayManager 

316 copies the 1D arrays (to ensure proper/contiguous memory layout). 

317 

318 Returns 

319 ------- 

320 DataFrame 

321 New DataFrame using specified manager type. Is not guaranteed 

322 to be a copy or not. 

323 """ 

324 new_mgr: Manager 

325 new_mgr = mgr_to_mgr(self._mgr, typ=typ, copy=copy) 

326 # fastpath of passing a manager doesn't check the option/manager class 

327 return self._constructor(new_mgr).__finalize__(self) 

328 

329 # ---------------------------------------------------------------------- 

330 # attrs and flags 

331 

332 @property 

333 def attrs(self) -> dict[Hashable, Any]: 

334 """ 

335 Dictionary of global attributes of this dataset. 

336 

337 .. warning:: 

338 

339 attrs is experimental and may change without warning. 

340 

341 See Also 

342 -------- 

343 DataFrame.flags : Global flags applying to this object. 

344 """ 

345 if self._attrs is None: 

346 self._attrs = {} 

347 return self._attrs 

348 

349 @attrs.setter 

350 def attrs(self, value: Mapping[Hashable, Any]) -> None: 

351 self._attrs = dict(value) 

352 

353 @final 

354 @property 

355 def flags(self) -> Flags: 

356 """ 

357 Get the properties associated with this pandas object. 

358 

359 The available flags are 

360 

361 * :attr:`Flags.allows_duplicate_labels` 

362 

363 See Also 

364 -------- 

365 Flags : Flags that apply to pandas objects. 

366 DataFrame.attrs : Global metadata applying to this dataset. 

367 

368 Notes 

369 ----- 

370 "Flags" differ from "metadata". Flags reflect properties of the 

371 pandas object (the Series or DataFrame). Metadata refer to properties 

372 of the dataset, and should be stored in :attr:`DataFrame.attrs`. 

373 

374 Examples 

375 -------- 

376 >>> df = pd.DataFrame({"A": [1, 2]}) 

377 >>> df.flags 

378 <Flags(allows_duplicate_labels=True)> 

379 

380 Flags can be get or set using ``.`` 

381 

382 >>> df.flags.allows_duplicate_labels 

383 True 

384 >>> df.flags.allows_duplicate_labels = False 

385 

386 Or by slicing with a key 

387 

388 >>> df.flags["allows_duplicate_labels"] 

389 False 

390 >>> df.flags["allows_duplicate_labels"] = True 

391 """ 

392 return self._flags 

393 

394 @final 

395 def set_flags( 

396 self: NDFrameT, 

397 *, 

398 copy: bool_t = False, 

399 allows_duplicate_labels: bool_t | None = None, 

400 ) -> NDFrameT: 

401 """ 

402 Return a new object with updated flags. 

403 

404 Parameters 

405 ---------- 

406 allows_duplicate_labels : bool, optional 

407 Whether the returned object allows duplicate labels. 

408 

409 Returns 

410 ------- 

411 Series or DataFrame 

412 The same type as the caller. 

413 

414 See Also 

415 -------- 

416 DataFrame.attrs : Global metadata applying to this dataset. 

417 DataFrame.flags : Global flags applying to this object. 

418 

419 Notes 

420 ----- 

421 This method returns a new object that's a view on the same data 

422 as the input. Mutating the input or the output values will be reflected 

423 in the other. 

424 

425 This method is intended to be used in method chains. 

426 

427 "Flags" differ from "metadata". Flags reflect properties of the 

428 pandas object (the Series or DataFrame). Metadata refer to properties 

429 of the dataset, and should be stored in :attr:`DataFrame.attrs`. 

430 

431 Examples 

432 -------- 

433 >>> df = pd.DataFrame({"A": [1, 2]}) 

434 >>> df.flags.allows_duplicate_labels 

435 True 

436 >>> df2 = df.set_flags(allows_duplicate_labels=False) 

437 >>> df2.flags.allows_duplicate_labels 

438 False 

439 """ 

440 df = self.copy(deep=copy) 

441 if allows_duplicate_labels is not None: 

442 df.flags["allows_duplicate_labels"] = allows_duplicate_labels 

443 return df 

444 

445 @final 

446 @classmethod 

447 def _validate_dtype(cls, dtype) -> DtypeObj | None: 

448 """validate the passed dtype""" 

449 if dtype is not None: 

450 dtype = pandas_dtype(dtype) 

451 

452 # a compound dtype 

453 if dtype.kind == "V": 

454 raise NotImplementedError( 

455 "compound dtypes are not implemented " 

456 f"in the {cls.__name__} constructor" 

457 ) 

458 

459 return dtype 

460 

461 # ---------------------------------------------------------------------- 

462 # Construction 

463 

464 @property 

465 def _constructor(self: NDFrameT) -> Callable[..., NDFrameT]: 

466 """ 

467 Used when a manipulation result has the same dimensions as the 

468 original. 

469 """ 

470 raise AbstractMethodError(self) 

471 

472 # ---------------------------------------------------------------------- 

473 # Internals 

474 

475 @final 

476 @property 

477 def _data(self): 

478 # GH#33054 retained because some downstream packages uses this, 

479 # e.g. fastparquet 

480 return self._mgr 

481 

482 # ---------------------------------------------------------------------- 

483 # Axis 

484 _stat_axis_number = 0 

485 _stat_axis_name = "index" 

486 _AXIS_ORDERS: list[str] 

487 _AXIS_TO_AXIS_NUMBER: dict[Axis, int] = {0: 0, "index": 0, "rows": 0} 

488 _info_axis_number: int 

489 _info_axis_name: str 

490 _AXIS_LEN: int 

491 

492 @property 

493 def _AXIS_NUMBERS(self) -> dict[str, int]: 

494 """.. deprecated:: 1.1.0""" 

495 warnings.warn( 

496 "_AXIS_NUMBERS has been deprecated.", 

497 FutureWarning, 

498 stacklevel=find_stack_level(), 

499 ) 

500 return {"index": 0} 

501 

502 @property 

503 def _AXIS_NAMES(self) -> dict[int, str]: 

504 """.. deprecated:: 1.1.0""" 

505 level = self.ndim + 1 

506 warnings.warn( 

507 "_AXIS_NAMES has been deprecated.", FutureWarning, stacklevel=level 

508 ) 

509 return {0: "index"} 

510 

511 @final 

512 def _construct_axes_dict(self, axes=None, **kwargs): 

513 """Return an axes dictionary for myself.""" 

514 d = {a: self._get_axis(a) for a in (axes or self._AXIS_ORDERS)} 

515 d.update(kwargs) 

516 return d 

517 

518 @final 

519 @classmethod 

520 def _construct_axes_from_arguments( 

521 cls, args, kwargs, require_all: bool_t = False, sentinel=None 

522 ): 

523 """ 

524 Construct and returns axes if supplied in args/kwargs. 

525 

526 If require_all, raise if all axis arguments are not supplied 

527 return a tuple of (axes, kwargs). 

528 

529 sentinel specifies the default parameter when an axis is not 

530 supplied; useful to distinguish when a user explicitly passes None 

531 in scenarios where None has special meaning. 

532 """ 

533 # construct the args 

534 args = list(args) 

535 for a in cls._AXIS_ORDERS: 

536 

537 # look for a argument by position 

538 if a not in kwargs: 

539 try: 

540 kwargs[a] = args.pop(0) 

541 except IndexError as err: 

542 if require_all: 

543 raise TypeError( 

544 "not enough/duplicate arguments specified!" 

545 ) from err 

546 

547 axes = {a: kwargs.pop(a, sentinel) for a in cls._AXIS_ORDERS} 

548 return axes, kwargs 

549 

550 @final 

551 @classmethod 

552 def _get_axis_number(cls, axis: Axis) -> int: 

553 try: 

554 return cls._AXIS_TO_AXIS_NUMBER[axis] 

555 except KeyError: 

556 raise ValueError(f"No axis named {axis} for object type {cls.__name__}") 

557 

558 @final 

559 @classmethod 

560 def _get_axis_name(cls, axis: Axis) -> str: 

561 axis_number = cls._get_axis_number(axis) 

562 return cls._AXIS_ORDERS[axis_number] 

563 

564 @final 

565 def _get_axis(self, axis: Axis) -> Index: 

566 axis_number = self._get_axis_number(axis) 

567 assert axis_number in {0, 1} 

568 return self.index if axis_number == 0 else self.columns 

569 

570 @final 

571 @classmethod 

572 def _get_block_manager_axis(cls, axis: Axis) -> int: 

573 """Map the axis to the block_manager axis.""" 

574 axis = cls._get_axis_number(axis) 

575 ndim = cls._AXIS_LEN 

576 if ndim == 2: 

577 # i.e. DataFrame 

578 return 1 - axis 

579 return axis 

580 

581 @final 

582 def _get_axis_resolvers(self, axis: str) -> dict[str, Series | MultiIndex]: 

583 # index or columns 

584 axis_index = getattr(self, axis) 

585 d = {} 

586 prefix = axis[0] 

587 

588 for i, name in enumerate(axis_index.names): 

589 if name is not None: 

590 key = level = name 

591 else: 

592 # prefix with 'i' or 'c' depending on the input axis 

593 # e.g., you must do ilevel_0 for the 0th level of an unnamed 

594 # multiiindex 

595 key = f"{prefix}level_{i}" 

596 level = i 

597 

598 level_values = axis_index.get_level_values(level) 

599 s = level_values.to_series() 

600 s.index = axis_index 

601 d[key] = s 

602 

603 # put the index/columns itself in the dict 

604 if isinstance(axis_index, MultiIndex): 

605 dindex = axis_index 

606 else: 

607 dindex = axis_index.to_series() 

608 

609 d[axis] = dindex 

610 return d 

611 

612 @final 

613 def _get_index_resolvers(self) -> dict[Hashable, Series | MultiIndex]: 

614 from pandas.core.computation.parsing import clean_column_name 

615 

616 d: dict[str, Series | MultiIndex] = {} 

617 for axis_name in self._AXIS_ORDERS: 

618 d.update(self._get_axis_resolvers(axis_name)) 

619 

620 return {clean_column_name(k): v for k, v in d.items() if not isinstance(k, int)} 

621 

622 @final 

623 def _get_cleaned_column_resolvers(self) -> dict[Hashable, Series]: 

624 """ 

625 Return the special character free column resolvers of a dataframe. 

626 

627 Column names with special characters are 'cleaned up' so that they can 

628 be referred to by backtick quoting. 

629 Used in :meth:`DataFrame.eval`. 

630 """ 

631 from pandas.core.computation.parsing import clean_column_name 

632 

633 if isinstance(self, ABCSeries): 

634 return {clean_column_name(self.name): self} 

635 

636 return { 

637 clean_column_name(k): v for k, v in self.items() if not isinstance(k, int) 

638 } 

639 

640 @property 

641 def _info_axis(self) -> Index: 

642 return getattr(self, self._info_axis_name) 

643 

644 @property 

645 def _stat_axis(self) -> Index: 

646 return getattr(self, self._stat_axis_name) 

647 

648 @property 

649 def shape(self) -> tuple[int, ...]: 

650 """ 

651 Return a tuple of axis dimensions 

652 """ 

653 return tuple(len(self._get_axis(a)) for a in self._AXIS_ORDERS) 

654 

655 @property 

656 def axes(self) -> list[Index]: 

657 """ 

658 Return index label(s) of the internal NDFrame 

659 """ 

660 # we do it this way because if we have reversed axes, then 

661 # the block manager shows then reversed 

662 return [self._get_axis(a) for a in self._AXIS_ORDERS] 

663 

664 @property 

665 def ndim(self) -> int: 

666 """ 

667 Return an int representing the number of axes / array dimensions. 

668 

669 Return 1 if Series. Otherwise return 2 if DataFrame. 

670 

671 See Also 

672 -------- 

673 ndarray.ndim : Number of array dimensions. 

674 

675 Examples 

676 -------- 

677 >>> s = pd.Series({'a': 1, 'b': 2, 'c': 3}) 

678 >>> s.ndim 

679 1 

680 

681 >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) 

682 >>> df.ndim 

683 2 

684 """ 

685 return self._mgr.ndim 

686 

687 @property 

688 def size(self) -> int: 

689 """ 

690 Return an int representing the number of elements in this object. 

691 

692 Return the number of rows if Series. Otherwise return the number of 

693 rows times number of columns if DataFrame. 

694 

695 See Also 

696 -------- 

697 ndarray.size : Number of elements in the array. 

698 

699 Examples 

700 -------- 

701 >>> s = pd.Series({'a': 1, 'b': 2, 'c': 3}) 

702 >>> s.size 

703 3 

704 

705 >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) 

706 >>> df.size 

707 4 

708 """ 

709 # error: Incompatible return value type (got "signedinteger[_64Bit]", 

710 # expected "int") [return-value] 

711 return np.prod(self.shape) # type: ignore[return-value] 

712 

713 @overload 

714 def set_axis( 

715 self: NDFrameT, 

716 labels, 

717 *, 

718 axis: Axis = ..., 

719 inplace: Literal[False] | lib.NoDefault = ..., 

720 copy: bool_t | lib.NoDefault = ..., 

721 ) -> NDFrameT: 

722 ... 

723 

724 @overload 

725 def set_axis( 

726 self, 

727 labels, 

728 *, 

729 axis: Axis = ..., 

730 inplace: Literal[True], 

731 copy: bool_t | lib.NoDefault = ..., 

732 ) -> None: 

733 ... 

734 

735 @overload 

736 def set_axis( 

737 self: NDFrameT, 

738 labels, 

739 *, 

740 axis: Axis = ..., 

741 inplace: bool_t | lib.NoDefault = ..., 

742 copy: bool_t | lib.NoDefault = ..., 

743 ) -> NDFrameT | None: 

744 ... 

745 

746 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "labels"]) 

747 def set_axis( 

748 self: NDFrameT, 

749 labels, 

750 axis: Axis = 0, 

751 inplace: bool_t | lib.NoDefault = lib.no_default, 

752 *, 

753 copy: bool_t | lib.NoDefault = lib.no_default, 

754 ) -> NDFrameT | None: 

755 """ 

756 Assign desired index to given axis. 

757 

758 Indexes for%(extended_summary_sub)s row labels can be changed by assigning 

759 a list-like or Index. 

760 

761 Parameters 

762 ---------- 

763 labels : list-like, Index 

764 The values for the new index. 

765 

766 axis : %(axes_single_arg)s, default 0 

767 The axis to update. The value 0 identifies the rows. For `Series` 

768 this parameter is unused and defaults to 0. 

769 

770 inplace : bool, default False 

771 Whether to return a new %(klass)s instance. 

772 

773 .. deprecated:: 1.5.0 

774 

775 copy : bool, default True 

776 Whether to make a copy of the underlying data. 

777 

778 .. versionadded:: 1.5.0 

779 

780 Returns 

781 ------- 

782 renamed : %(klass)s or None 

783 An object of type %(klass)s or None if ``inplace=True``. 

784 

785 See Also 

786 -------- 

787 %(klass)s.rename_axis : Alter the name of the index%(see_also_sub)s. 

788 """ 

789 if inplace is not lib.no_default: 

790 warnings.warn( 

791 f"{type(self).__name__}.set_axis 'inplace' keyword is deprecated " 

792 "and will be removed in a future version. Use " 

793 "`obj = obj.set_axis(..., copy=False)` instead", 

794 FutureWarning, 

795 stacklevel=find_stack_level(), 

796 ) 

797 else: 

798 inplace = False 

799 

800 if inplace: 

801 if copy is True: 

802 raise ValueError("Cannot specify both inplace=True and copy=True") 

803 copy = False 

804 elif copy is lib.no_default: 

805 copy = True 

806 

807 self._check_inplace_and_allows_duplicate_labels(inplace) 

808 return self._set_axis_nocheck(labels, axis, inplace, copy=copy) 

809 

810 @final 

811 def _set_axis_nocheck(self, labels, axis: Axis, inplace: bool_t, copy: bool_t): 

812 if inplace: 

813 setattr(self, self._get_axis_name(axis), labels) 

814 else: 

815 # With copy=False, we create a new object but don't copy the 

816 # underlying data. 

817 obj = self.copy(deep=copy) 

818 setattr(obj, obj._get_axis_name(axis), labels) 

819 return obj 

820 

821 def _set_axis(self, axis: int, labels: AnyArrayLike | list) -> None: 

822 labels = ensure_index(labels) 

823 self._mgr.set_axis(axis, labels) 

824 self._clear_item_cache() 

825 

826 @final 

827 def swapaxes( 

828 self: NDFrameT, axis1: Axis, axis2: Axis, copy: bool_t = True 

829 ) -> NDFrameT: 

830 """ 

831 Interchange axes and swap values axes appropriately. 

832 

833 Returns 

834 ------- 

835 y : same as input 

836 """ 

837 i = self._get_axis_number(axis1) 

838 j = self._get_axis_number(axis2) 

839 

840 if i == j: 

841 if copy: 

842 return self.copy() 

843 return self 

844 

845 mapping = {i: j, j: i} 

846 

847 new_axes = (self._get_axis(mapping.get(k, k)) for k in range(self._AXIS_LEN)) 

848 new_values = self.values.swapaxes(i, j) 

849 if copy: 

850 new_values = new_values.copy() 

851 

852 return self._constructor( 

853 new_values, 

854 *new_axes, 

855 ).__finalize__(self, method="swapaxes") 

856 

857 @final 

858 @doc(klass=_shared_doc_kwargs["klass"]) 

859 def droplevel(self: NDFrameT, level: IndexLabel, axis: Axis = 0) -> NDFrameT: 

860 """ 

861 Return {klass} with requested index / column level(s) removed. 

862 

863 Parameters 

864 ---------- 

865 level : int, str, or list-like 

866 If a string is given, must be the name of a level 

867 If list-like, elements must be names or positional indexes 

868 of levels. 

869 

870 axis : {{0 or 'index', 1 or 'columns'}}, default 0 

871 Axis along which the level(s) is removed: 

872 

873 * 0 or 'index': remove level(s) in column. 

874 * 1 or 'columns': remove level(s) in row. 

875 

876 For `Series` this parameter is unused and defaults to 0. 

877 

878 Returns 

879 ------- 

880 {klass} 

881 {klass} with requested index / column level(s) removed. 

882 

883 Examples 

884 -------- 

885 >>> df = pd.DataFrame([ 

886 ... [1, 2, 3, 4], 

887 ... [5, 6, 7, 8], 

888 ... [9, 10, 11, 12] 

889 ... ]).set_index([0, 1]).rename_axis(['a', 'b']) 

890 

891 >>> df.columns = pd.MultiIndex.from_tuples([ 

892 ... ('c', 'e'), ('d', 'f') 

893 ... ], names=['level_1', 'level_2']) 

894 

895 >>> df 

896 level_1 c d 

897 level_2 e f 

898 a b 

899 1 2 3 4 

900 5 6 7 8 

901 9 10 11 12 

902 

903 >>> df.droplevel('a') 

904 level_1 c d 

905 level_2 e f 

906 b 

907 2 3 4 

908 6 7 8 

909 10 11 12 

910 

911 >>> df.droplevel('level_2', axis=1) 

912 level_1 c d 

913 a b 

914 1 2 3 4 

915 5 6 7 8 

916 9 10 11 12 

917 """ 

918 labels = self._get_axis(axis) 

919 new_labels = labels.droplevel(level) 

920 return self.set_axis(new_labels, axis=axis) 

921 

922 def pop(self, item: Hashable) -> Series | Any: 

923 result = self[item] 

924 del self[item] 

925 

926 return result 

927 

928 @final 

929 def squeeze(self, axis=None): 

930 """ 

931 Squeeze 1 dimensional axis objects into scalars. 

932 

933 Series or DataFrames with a single element are squeezed to a scalar. 

934 DataFrames with a single column or a single row are squeezed to a 

935 Series. Otherwise the object is unchanged. 

936 

937 This method is most useful when you don't know if your 

938 object is a Series or DataFrame, but you do know it has just a single 

939 column. In that case you can safely call `squeeze` to ensure you have a 

940 Series. 

941 

942 Parameters 

943 ---------- 

944 axis : {0 or 'index', 1 or 'columns', None}, default None 

945 A specific axis to squeeze. By default, all length-1 axes are 

946 squeezed. For `Series` this parameter is unused and defaults to `None`. 

947 

948 Returns 

949 ------- 

950 DataFrame, Series, or scalar 

951 The projection after squeezing `axis` or all the axes. 

952 

953 See Also 

954 -------- 

955 Series.iloc : Integer-location based indexing for selecting scalars. 

956 DataFrame.iloc : Integer-location based indexing for selecting Series. 

957 Series.to_frame : Inverse of DataFrame.squeeze for a 

958 single-column DataFrame. 

959 

960 Examples 

961 -------- 

962 >>> primes = pd.Series([2, 3, 5, 7]) 

963 

964 Slicing might produce a Series with a single value: 

965 

966 >>> even_primes = primes[primes % 2 == 0] 

967 >>> even_primes 

968 0 2 

969 dtype: int64 

970 

971 >>> even_primes.squeeze() 

972 2 

973 

974 Squeezing objects with more than one value in every axis does nothing: 

975 

976 >>> odd_primes = primes[primes % 2 == 1] 

977 >>> odd_primes 

978 1 3 

979 2 5 

980 3 7 

981 dtype: int64 

982 

983 >>> odd_primes.squeeze() 

984 1 3 

985 2 5 

986 3 7 

987 dtype: int64 

988 

989 Squeezing is even more effective when used with DataFrames. 

990 

991 >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=['a', 'b']) 

992 >>> df 

993 a b 

994 0 1 2 

995 1 3 4 

996 

997 Slicing a single column will produce a DataFrame with the columns 

998 having only one value: 

999 

1000 >>> df_a = df[['a']] 

1001 >>> df_a 

1002 a 

1003 0 1 

1004 1 3 

1005 

1006 So the columns can be squeezed down, resulting in a Series: 

1007 

1008 >>> df_a.squeeze('columns') 

1009 0 1 

1010 1 3 

1011 Name: a, dtype: int64 

1012 

1013 Slicing a single row from a single column will produce a single 

1014 scalar DataFrame: 

1015 

1016 >>> df_0a = df.loc[df.index < 1, ['a']] 

1017 >>> df_0a 

1018 a 

1019 0 1 

1020 

1021 Squeezing the rows produces a single scalar Series: 

1022 

1023 >>> df_0a.squeeze('rows') 

1024 a 1 

1025 Name: 0, dtype: int64 

1026 

1027 Squeezing all axes will project directly into a scalar: 

1028 

1029 >>> df_0a.squeeze() 

1030 1 

1031 """ 

1032 axis = range(self._AXIS_LEN) if axis is None else (self._get_axis_number(axis),) 

1033 return self.iloc[ 

1034 tuple( 

1035 0 if i in axis and len(a) == 1 else slice(None) 

1036 for i, a in enumerate(self.axes) 

1037 ) 

1038 ] 

1039 

1040 # ---------------------------------------------------------------------- 

1041 # Rename 

1042 

1043 def _rename( 

1044 self: NDFrameT, 

1045 mapper: Renamer | None = None, 

1046 *, 

1047 index: Renamer | None = None, 

1048 columns: Renamer | None = None, 

1049 axis: Axis | None = None, 

1050 copy: bool_t | None = None, 

1051 inplace: bool_t = False, 

1052 level: Level | None = None, 

1053 errors: str = "ignore", 

1054 ) -> NDFrameT | None: 

1055 # called by Series.rename and DataFrame.rename 

1056 

1057 if mapper is None and index is None and columns is None: 

1058 raise TypeError("must pass an index to rename") 

1059 

1060 if index is not None or columns is not None: 

1061 if axis is not None: 

1062 raise TypeError( 

1063 "Cannot specify both 'axis' and any of 'index' or 'columns'" 

1064 ) 

1065 elif mapper is not None: 

1066 raise TypeError( 

1067 "Cannot specify both 'mapper' and any of 'index' or 'columns'" 

1068 ) 

1069 else: 

1070 # use the mapper argument 

1071 if axis and self._get_axis_number(axis) == 1: 

1072 columns = mapper 

1073 else: 

1074 index = mapper 

1075 

1076 self._check_inplace_and_allows_duplicate_labels(inplace) 

1077 result = self if inplace else self.copy(deep=copy) 

1078 

1079 for axis_no, replacements in enumerate((index, columns)): 

1080 if replacements is None: 

1081 continue 

1082 

1083 ax = self._get_axis(axis_no) 

1084 f = com.get_rename_function(replacements) 

1085 

1086 if level is not None: 

1087 level = ax._get_level_number(level) 

1088 

1089 # GH 13473 

1090 if not callable(replacements): 

1091 if ax._is_multi and level is not None: 

1092 indexer = ax.get_level_values(level).get_indexer_for(replacements) 

1093 else: 

1094 indexer = ax.get_indexer_for(replacements) 

1095 

1096 if errors == "raise" and len(indexer[indexer == -1]): 

1097 missing_labels = [ 

1098 label 

1099 for index, label in enumerate(replacements) 

1100 if indexer[index] == -1 

1101 ] 

1102 raise KeyError(f"{missing_labels} not found in axis") 

1103 

1104 new_index = ax._transform_index(f, level=level) 

1105 result._set_axis_nocheck(new_index, axis=axis_no, inplace=True, copy=False) 

1106 result._clear_item_cache() 

1107 

1108 if inplace: 

1109 self._update_inplace(result) 

1110 return None 

1111 else: 

1112 return result.__finalize__(self, method="rename") 

1113 

1114 @overload 

1115 def rename_axis( 

1116 self: NDFrameT, 

1117 mapper: IndexLabel | lib.NoDefault = ..., 

1118 *, 

1119 inplace: Literal[False] = ..., 

1120 **kwargs, 

1121 ) -> NDFrameT: 

1122 ... 

1123 

1124 @overload 

1125 def rename_axis( 

1126 self, 

1127 mapper: IndexLabel | lib.NoDefault = ..., 

1128 *, 

1129 inplace: Literal[True], 

1130 **kwargs, 

1131 ) -> None: 

1132 ... 

1133 

1134 @overload 

1135 def rename_axis( 

1136 self: NDFrameT, 

1137 mapper: IndexLabel | lib.NoDefault = ..., 

1138 *, 

1139 inplace: bool_t = ..., 

1140 **kwargs, 

1141 ) -> NDFrameT | None: 

1142 ... 

1143 

1144 @rewrite_axis_style_signature("mapper", [("copy", True)]) 

1145 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "mapper"]) 

1146 def rename_axis( 

1147 self: NDFrameT, 

1148 mapper: IndexLabel | lib.NoDefault = lib.no_default, 

1149 inplace: bool_t = False, 

1150 **kwargs, 

1151 ) -> NDFrameT | None: 

1152 """ 

1153 Set the name of the axis for the index or columns. 

1154 

1155 Parameters 

1156 ---------- 

1157 mapper : scalar, list-like, optional 

1158 Value to set the axis name attribute. 

1159 index, columns : scalar, list-like, dict-like or function, optional 

1160 A scalar, list-like, dict-like or functions transformations to 

1161 apply to that axis' values. 

1162 Note that the ``columns`` parameter is not allowed if the 

1163 object is a Series. This parameter only apply for DataFrame 

1164 type objects. 

1165 

1166 Use either ``mapper`` and ``axis`` to 

1167 specify the axis to target with ``mapper``, or ``index`` 

1168 and/or ``columns``. 

1169 axis : {0 or 'index', 1 or 'columns'}, default 0 

1170 The axis to rename. For `Series` this parameter is unused and defaults to 0. 

1171 copy : bool, default True 

1172 Also copy underlying data. 

1173 inplace : bool, default False 

1174 Modifies the object directly, instead of creating a new Series 

1175 or DataFrame. 

1176 

1177 Returns 

1178 ------- 

1179 Series, DataFrame, or None 

1180 The same type as the caller or None if ``inplace=True``. 

1181 

1182 See Also 

1183 -------- 

1184 Series.rename : Alter Series index labels or name. 

1185 DataFrame.rename : Alter DataFrame index labels or name. 

1186 Index.rename : Set new names on index. 

1187 

1188 Notes 

1189 ----- 

1190 ``DataFrame.rename_axis`` supports two calling conventions 

1191 

1192 * ``(index=index_mapper, columns=columns_mapper, ...)`` 

1193 * ``(mapper, axis={'index', 'columns'}, ...)`` 

1194 

1195 The first calling convention will only modify the names of 

1196 the index and/or the names of the Index object that is the columns. 

1197 In this case, the parameter ``copy`` is ignored. 

1198 

1199 The second calling convention will modify the names of the 

1200 corresponding index if mapper is a list or a scalar. 

1201 However, if mapper is dict-like or a function, it will use the 

1202 deprecated behavior of modifying the axis *labels*. 

1203 

1204 We *highly* recommend using keyword arguments to clarify your 

1205 intent. 

1206 

1207 Examples 

1208 -------- 

1209 **Series** 

1210 

1211 >>> s = pd.Series(["dog", "cat", "monkey"]) 

1212 >>> s 

1213 0 dog 

1214 1 cat 

1215 2 monkey 

1216 dtype: object 

1217 >>> s.rename_axis("animal") 

1218 animal 

1219 0 dog 

1220 1 cat 

1221 2 monkey 

1222 dtype: object 

1223 

1224 **DataFrame** 

1225 

1226 >>> df = pd.DataFrame({"num_legs": [4, 4, 2], 

1227 ... "num_arms": [0, 0, 2]}, 

1228 ... ["dog", "cat", "monkey"]) 

1229 >>> df 

1230 num_legs num_arms 

1231 dog 4 0 

1232 cat 4 0 

1233 monkey 2 2 

1234 >>> df = df.rename_axis("animal") 

1235 >>> df 

1236 num_legs num_arms 

1237 animal 

1238 dog 4 0 

1239 cat 4 0 

1240 monkey 2 2 

1241 >>> df = df.rename_axis("limbs", axis="columns") 

1242 >>> df 

1243 limbs num_legs num_arms 

1244 animal 

1245 dog 4 0 

1246 cat 4 0 

1247 monkey 2 2 

1248 

1249 **MultiIndex** 

1250 

1251 >>> df.index = pd.MultiIndex.from_product([['mammal'], 

1252 ... ['dog', 'cat', 'monkey']], 

1253 ... names=['type', 'name']) 

1254 >>> df 

1255 limbs num_legs num_arms 

1256 type name 

1257 mammal dog 4 0 

1258 cat 4 0 

1259 monkey 2 2 

1260 

1261 >>> df.rename_axis(index={'type': 'class'}) 

1262 limbs num_legs num_arms 

1263 class name 

1264 mammal dog 4 0 

1265 cat 4 0 

1266 monkey 2 2 

1267 

1268 >>> df.rename_axis(columns=str.upper) 

1269 LIMBS num_legs num_arms 

1270 type name 

1271 mammal dog 4 0 

1272 cat 4 0 

1273 monkey 2 2 

1274 """ 

1275 kwargs["inplace"] = inplace 

1276 axes, kwargs = self._construct_axes_from_arguments( 

1277 (), kwargs, sentinel=lib.no_default 

1278 ) 

1279 copy = kwargs.pop("copy", True) 

1280 inplace = kwargs.pop("inplace", False) 

1281 axis = kwargs.pop("axis", 0) 

1282 if axis is not None: 

1283 axis = self._get_axis_number(axis) 

1284 

1285 if kwargs: 

1286 raise TypeError( 

1287 "rename_axis() got an unexpected keyword " 

1288 f'argument "{list(kwargs.keys())[0]}"' 

1289 ) 

1290 

1291 inplace = validate_bool_kwarg(inplace, "inplace") 

1292 

1293 if mapper is not lib.no_default: 

1294 # Use v0.23 behavior if a scalar or list 

1295 non_mapper = is_scalar(mapper) or ( 

1296 is_list_like(mapper) and not is_dict_like(mapper) 

1297 ) 

1298 if non_mapper: 

1299 return self._set_axis_name(mapper, axis=axis, inplace=inplace) 

1300 else: 

1301 raise ValueError("Use `.rename` to alter labels with a mapper.") 

1302 else: 

1303 # Use new behavior. Means that index and/or columns 

1304 # is specified 

1305 result = self if inplace else self.copy(deep=copy) 

1306 

1307 for axis in range(self._AXIS_LEN): 

1308 v = axes.get(self._get_axis_name(axis)) 

1309 if v is lib.no_default: 

1310 continue 

1311 non_mapper = is_scalar(v) or (is_list_like(v) and not is_dict_like(v)) 

1312 if non_mapper: 

1313 newnames = v 

1314 else: 

1315 f = com.get_rename_function(v) 

1316 curnames = self._get_axis(axis).names 

1317 newnames = [f(name) for name in curnames] 

1318 result._set_axis_name(newnames, axis=axis, inplace=True) 

1319 if not inplace: 

1320 return result 

1321 return None 

1322 

1323 @final 

1324 def _set_axis_name(self, name, axis=0, inplace=False): 

1325 """ 

1326 Set the name(s) of the axis. 

1327 

1328 Parameters 

1329 ---------- 

1330 name : str or list of str 

1331 Name(s) to set. 

1332 axis : {0 or 'index', 1 or 'columns'}, default 0 

1333 The axis to set the label. The value 0 or 'index' specifies index, 

1334 and the value 1 or 'columns' specifies columns. 

1335 inplace : bool, default False 

1336 If `True`, do operation inplace and return None. 

1337 

1338 Returns 

1339 ------- 

1340 Series, DataFrame, or None 

1341 The same type as the caller or `None` if `inplace` is `True`. 

1342 

1343 See Also 

1344 -------- 

1345 DataFrame.rename : Alter the axis labels of :class:`DataFrame`. 

1346 Series.rename : Alter the index labels or set the index name 

1347 of :class:`Series`. 

1348 Index.rename : Set the name of :class:`Index` or :class:`MultiIndex`. 

1349 

1350 Examples 

1351 -------- 

1352 >>> df = pd.DataFrame({"num_legs": [4, 4, 2]}, 

1353 ... ["dog", "cat", "monkey"]) 

1354 >>> df 

1355 num_legs 

1356 dog 4 

1357 cat 4 

1358 monkey 2 

1359 >>> df._set_axis_name("animal") 

1360 num_legs 

1361 animal 

1362 dog 4 

1363 cat 4 

1364 monkey 2 

1365 >>> df.index = pd.MultiIndex.from_product( 

1366 ... [["mammal"], ['dog', 'cat', 'monkey']]) 

1367 >>> df._set_axis_name(["type", "name"]) 

1368 num_legs 

1369 type name 

1370 mammal dog 4 

1371 cat 4 

1372 monkey 2 

1373 """ 

1374 axis = self._get_axis_number(axis) 

1375 idx = self._get_axis(axis).set_names(name) 

1376 

1377 inplace = validate_bool_kwarg(inplace, "inplace") 

1378 renamed = self if inplace else self.copy() 

1379 if axis == 0: 

1380 renamed.index = idx 

1381 else: 

1382 renamed.columns = idx 

1383 

1384 if not inplace: 

1385 return renamed 

1386 

1387 # ---------------------------------------------------------------------- 

1388 # Comparison Methods 

1389 

1390 @final 

1391 def _indexed_same(self, other) -> bool_t: 

1392 return all( 

1393 self._get_axis(a).equals(other._get_axis(a)) for a in self._AXIS_ORDERS 

1394 ) 

1395 

1396 @final 

1397 def equals(self, other: object) -> bool_t: 

1398 """ 

1399 Test whether two objects contain the same elements. 

1400 

1401 This function allows two Series or DataFrames to be compared against 

1402 each other to see if they have the same shape and elements. NaNs in 

1403 the same location are considered equal. 

1404 

1405 The row/column index do not need to have the same type, as long 

1406 as the values are considered equal. Corresponding columns must be of 

1407 the same dtype. 

1408 

1409 Parameters 

1410 ---------- 

1411 other : Series or DataFrame 

1412 The other Series or DataFrame to be compared with the first. 

1413 

1414 Returns 

1415 ------- 

1416 bool 

1417 True if all elements are the same in both objects, False 

1418 otherwise. 

1419 

1420 See Also 

1421 -------- 

1422 Series.eq : Compare two Series objects of the same length 

1423 and return a Series where each element is True if the element 

1424 in each Series is equal, False otherwise. 

1425 DataFrame.eq : Compare two DataFrame objects of the same shape and 

1426 return a DataFrame where each element is True if the respective 

1427 element in each DataFrame is equal, False otherwise. 

1428 testing.assert_series_equal : Raises an AssertionError if left and 

1429 right are not equal. Provides an easy interface to ignore 

1430 inequality in dtypes, indexes and precision among others. 

1431 testing.assert_frame_equal : Like assert_series_equal, but targets 

1432 DataFrames. 

1433 numpy.array_equal : Return True if two arrays have the same shape 

1434 and elements, False otherwise. 

1435 

1436 Examples 

1437 -------- 

1438 >>> df = pd.DataFrame({1: [10], 2: [20]}) 

1439 >>> df 

1440 1 2 

1441 0 10 20 

1442 

1443 DataFrames df and exactly_equal have the same types and values for 

1444 their elements and column labels, which will return True. 

1445 

1446 >>> exactly_equal = pd.DataFrame({1: [10], 2: [20]}) 

1447 >>> exactly_equal 

1448 1 2 

1449 0 10 20 

1450 >>> df.equals(exactly_equal) 

1451 True 

1452 

1453 DataFrames df and different_column_type have the same element 

1454 types and values, but have different types for the column labels, 

1455 which will still return True. 

1456 

1457 >>> different_column_type = pd.DataFrame({1.0: [10], 2.0: [20]}) 

1458 >>> different_column_type 

1459 1.0 2.0 

1460 0 10 20 

1461 >>> df.equals(different_column_type) 

1462 True 

1463 

1464 DataFrames df and different_data_type have different types for the 

1465 same values for their elements, and will return False even though 

1466 their column labels are the same values and types. 

1467 

1468 >>> different_data_type = pd.DataFrame({1: [10.0], 2: [20.0]}) 

1469 >>> different_data_type 

1470 1 2 

1471 0 10.0 20.0 

1472 >>> df.equals(different_data_type) 

1473 False 

1474 """ 

1475 if not (isinstance(other, type(self)) or isinstance(self, type(other))): 

1476 return False 

1477 other = cast(NDFrame, other) 

1478 return self._mgr.equals(other._mgr) 

1479 

1480 # ------------------------------------------------------------------------- 

1481 # Unary Methods 

1482 

1483 @final 

1484 def __neg__(self: NDFrameT) -> NDFrameT: 

1485 def blk_func(values: ArrayLike): 

1486 if is_bool_dtype(values.dtype): 

1487 # error: Argument 1 to "inv" has incompatible type "Union 

1488 # [ExtensionArray, ndarray[Any, Any]]"; expected 

1489 # "_SupportsInversion[ndarray[Any, dtype[bool_]]]" 

1490 return operator.inv(values) # type: ignore[arg-type] 

1491 else: 

1492 # error: Argument 1 to "neg" has incompatible type "Union 

1493 # [ExtensionArray, ndarray[Any, Any]]"; expected 

1494 # "_SupportsNeg[ndarray[Any, dtype[Any]]]" 

1495 return operator.neg(values) # type: ignore[arg-type] 

1496 

1497 new_data = self._mgr.apply(blk_func) 

1498 res = self._constructor(new_data) 

1499 return res.__finalize__(self, method="__neg__") 

1500 

1501 @final 

1502 def __pos__(self: NDFrameT) -> NDFrameT: 

1503 def blk_func(values: ArrayLike): 

1504 if is_bool_dtype(values.dtype): 

1505 return values.copy() 

1506 else: 

1507 # error: Argument 1 to "pos" has incompatible type "Union 

1508 # [ExtensionArray, ndarray[Any, Any]]"; expected 

1509 # "_SupportsPos[ndarray[Any, dtype[Any]]]" 

1510 return operator.pos(values) # type: ignore[arg-type] 

1511 

1512 new_data = self._mgr.apply(blk_func) 

1513 res = self._constructor(new_data) 

1514 return res.__finalize__(self, method="__pos__") 

1515 

1516 @final 

1517 def __invert__(self: NDFrameT) -> NDFrameT: 

1518 if not self.size: 

1519 # inv fails with 0 len 

1520 return self 

1521 

1522 new_data = self._mgr.apply(operator.invert) 

1523 return self._constructor(new_data).__finalize__(self, method="__invert__") 

1524 

1525 @final 

1526 def __nonzero__(self) -> NoReturn: 

1527 raise ValueError( 

1528 f"The truth value of a {type(self).__name__} is ambiguous. " 

1529 "Use a.empty, a.bool(), a.item(), a.any() or a.all()." 

1530 ) 

1531 

1532 __bool__ = __nonzero__ 

1533 

1534 @final 

1535 def bool(self) -> bool_t: 

1536 """ 

1537 Return the bool of a single element Series or DataFrame. 

1538 

1539 This must be a boolean scalar value, either True or False. It will raise a 

1540 ValueError if the Series or DataFrame does not have exactly 1 element, or that 

1541 element is not boolean (integer values 0 and 1 will also raise an exception). 

1542 

1543 Returns 

1544 ------- 

1545 bool 

1546 The value in the Series or DataFrame. 

1547 

1548 See Also 

1549 -------- 

1550 Series.astype : Change the data type of a Series, including to boolean. 

1551 DataFrame.astype : Change the data type of a DataFrame, including to boolean. 

1552 numpy.bool_ : NumPy boolean data type, used by pandas for boolean values. 

1553 

1554 Examples 

1555 -------- 

1556 The method will only work for single element objects with a boolean value: 

1557 

1558 >>> pd.Series([True]).bool() 

1559 True 

1560 >>> pd.Series([False]).bool() 

1561 False 

1562 

1563 >>> pd.DataFrame({'col': [True]}).bool() 

1564 True 

1565 >>> pd.DataFrame({'col': [False]}).bool() 

1566 False 

1567 """ 

1568 v = self.squeeze() 

1569 if isinstance(v, (bool, np.bool_)): 

1570 return bool(v) 

1571 elif is_scalar(v): 

1572 raise ValueError( 

1573 "bool cannot act on a non-boolean single element " 

1574 f"{type(self).__name__}" 

1575 ) 

1576 

1577 self.__nonzero__() 

1578 # for mypy (__nonzero__ raises) 

1579 return True 

1580 

1581 @final 

1582 def abs(self: NDFrameT) -> NDFrameT: 

1583 """ 

1584 Return a Series/DataFrame with absolute numeric value of each element. 

1585 

1586 This function only applies to elements that are all numeric. 

1587 

1588 Returns 

1589 ------- 

1590 abs 

1591 Series/DataFrame containing the absolute value of each element. 

1592 

1593 See Also 

1594 -------- 

1595 numpy.absolute : Calculate the absolute value element-wise. 

1596 

1597 Notes 

1598 ----- 

1599 For ``complex`` inputs, ``1.2 + 1j``, the absolute value is 

1600 :math:`\\sqrt{ a^2 + b^2 }`. 

1601 

1602 Examples 

1603 -------- 

1604 Absolute numeric values in a Series. 

1605 

1606 >>> s = pd.Series([-1.10, 2, -3.33, 4]) 

1607 >>> s.abs() 

1608 0 1.10 

1609 1 2.00 

1610 2 3.33 

1611 3 4.00 

1612 dtype: float64 

1613 

1614 Absolute numeric values in a Series with complex numbers. 

1615 

1616 >>> s = pd.Series([1.2 + 1j]) 

1617 >>> s.abs() 

1618 0 1.56205 

1619 dtype: float64 

1620 

1621 Absolute numeric values in a Series with a Timedelta element. 

1622 

1623 >>> s = pd.Series([pd.Timedelta('1 days')]) 

1624 >>> s.abs() 

1625 0 1 days 

1626 dtype: timedelta64[ns] 

1627 

1628 Select rows with data closest to certain value using argsort (from 

1629 `StackOverflow <https://stackoverflow.com/a/17758115>`__). 

1630 

1631 >>> df = pd.DataFrame({ 

1632 ... 'a': [4, 5, 6, 7], 

1633 ... 'b': [10, 20, 30, 40], 

1634 ... 'c': [100, 50, -30, -50] 

1635 ... }) 

1636 >>> df 

1637 a b c 

1638 0 4 10 100 

1639 1 5 20 50 

1640 2 6 30 -30 

1641 3 7 40 -50 

1642 >>> df.loc[(df.c - 43).abs().argsort()] 

1643 a b c 

1644 1 5 20 50 

1645 0 4 10 100 

1646 2 6 30 -30 

1647 3 7 40 -50 

1648 """ 

1649 res_mgr = self._mgr.apply(np.abs) 

1650 return self._constructor(res_mgr).__finalize__(self, name="abs") 

1651 

1652 @final 

1653 def __abs__(self: NDFrameT) -> NDFrameT: 

1654 return self.abs() 

1655 

1656 @final 

1657 def __round__(self: NDFrameT, decimals: int = 0) -> NDFrameT: 

1658 return self.round(decimals).__finalize__(self, method="__round__") 

1659 

1660 # ------------------------------------------------------------------------- 

1661 # Label or Level Combination Helpers 

1662 # 

1663 # A collection of helper methods for DataFrame/Series operations that 

1664 # accept a combination of column/index labels and levels. All such 

1665 # operations should utilize/extend these methods when possible so that we 

1666 # have consistent precedence and validation logic throughout the library. 

1667 

1668 @final 

1669 def _is_level_reference(self, key: Level, axis=0) -> bool_t: 

1670 """ 

1671 Test whether a key is a level reference for a given axis. 

1672 

1673 To be considered a level reference, `key` must be a string that: 

1674 - (axis=0): Matches the name of an index level and does NOT match 

1675 a column label. 

1676 - (axis=1): Matches the name of a column level and does NOT match 

1677 an index label. 

1678 

1679 Parameters 

1680 ---------- 

1681 key : Hashable 

1682 Potential level name for the given axis 

1683 axis : int, default 0 

1684 Axis that levels are associated with (0 for index, 1 for columns) 

1685 

1686 Returns 

1687 ------- 

1688 is_level : bool 

1689 """ 

1690 axis = self._get_axis_number(axis) 

1691 

1692 return ( 

1693 key is not None 

1694 and is_hashable(key) 

1695 and key in self.axes[axis].names 

1696 and not self._is_label_reference(key, axis=axis) 

1697 ) 

1698 

1699 @final 

1700 def _is_label_reference(self, key: Level, axis=0) -> bool_t: 

1701 """ 

1702 Test whether a key is a label reference for a given axis. 

1703 

1704 To be considered a label reference, `key` must be a string that: 

1705 - (axis=0): Matches a column label 

1706 - (axis=1): Matches an index label 

1707 

1708 Parameters 

1709 ---------- 

1710 key : Hashable 

1711 Potential label name, i.e. Index entry. 

1712 axis : int, default 0 

1713 Axis perpendicular to the axis that labels are associated with 

1714 (0 means search for column labels, 1 means search for index labels) 

1715 

1716 Returns 

1717 ------- 

1718 is_label: bool 

1719 """ 

1720 axis = self._get_axis_number(axis) 

1721 other_axes = (ax for ax in range(self._AXIS_LEN) if ax != axis) 

1722 

1723 return ( 

1724 key is not None 

1725 and is_hashable(key) 

1726 and any(key in self.axes[ax] for ax in other_axes) 

1727 ) 

1728 

1729 @final 

1730 def _is_label_or_level_reference(self, key: Level, axis: int = 0) -> bool_t: 

1731 """ 

1732 Test whether a key is a label or level reference for a given axis. 

1733 

1734 To be considered either a label or a level reference, `key` must be a 

1735 string that: 

1736 - (axis=0): Matches a column label or an index level 

1737 - (axis=1): Matches an index label or a column level 

1738 

1739 Parameters 

1740 ---------- 

1741 key : Hashable 

1742 Potential label or level name 

1743 axis : int, default 0 

1744 Axis that levels are associated with (0 for index, 1 for columns) 

1745 

1746 Returns 

1747 ------- 

1748 bool 

1749 """ 

1750 return self._is_level_reference(key, axis=axis) or self._is_label_reference( 

1751 key, axis=axis 

1752 ) 

1753 

1754 @final 

1755 def _check_label_or_level_ambiguity(self, key: Level, axis: int = 0) -> None: 

1756 """ 

1757 Check whether `key` is ambiguous. 

1758 

1759 By ambiguous, we mean that it matches both a level of the input 

1760 `axis` and a label of the other axis. 

1761 

1762 Parameters 

1763 ---------- 

1764 key : Hashable 

1765 Label or level name. 

1766 axis : int, default 0 

1767 Axis that levels are associated with (0 for index, 1 for columns). 

1768 

1769 Raises 

1770 ------ 

1771 ValueError: `key` is ambiguous 

1772 """ 

1773 

1774 axis = self._get_axis_number(axis) 

1775 other_axes = (ax for ax in range(self._AXIS_LEN) if ax != axis) 

1776 

1777 if ( 

1778 key is not None 

1779 and is_hashable(key) 

1780 and key in self.axes[axis].names 

1781 and any(key in self.axes[ax] for ax in other_axes) 

1782 ): 

1783 

1784 # Build an informative and grammatical warning 

1785 level_article, level_type = ( 

1786 ("an", "index") if axis == 0 else ("a", "column") 

1787 ) 

1788 

1789 label_article, label_type = ( 

1790 ("a", "column") if axis == 0 else ("an", "index") 

1791 ) 

1792 

1793 msg = ( 

1794 f"'{key}' is both {level_article} {level_type} level and " 

1795 f"{label_article} {label_type} label, which is ambiguous." 

1796 ) 

1797 raise ValueError(msg) 

1798 

1799 @final 

1800 def _get_label_or_level_values(self, key: Level, axis: int = 0) -> ArrayLike: 

1801 """ 

1802 Return a 1-D array of values associated with `key`, a label or level 

1803 from the given `axis`. 

1804 

1805 Retrieval logic: 

1806 - (axis=0): Return column values if `key` matches a column label. 

1807 Otherwise return index level values if `key` matches an index 

1808 level. 

1809 - (axis=1): Return row values if `key` matches an index label. 

1810 Otherwise return column level values if 'key' matches a column 

1811 level 

1812 

1813 Parameters 

1814 ---------- 

1815 key : Hashable 

1816 Label or level name. 

1817 axis : int, default 0 

1818 Axis that levels are associated with (0 for index, 1 for columns) 

1819 

1820 Returns 

1821 ------- 

1822 np.ndarray or ExtensionArray 

1823 

1824 Raises 

1825 ------ 

1826 KeyError 

1827 if `key` matches neither a label nor a level 

1828 ValueError 

1829 if `key` matches multiple labels 

1830 FutureWarning 

1831 if `key` is ambiguous. This will become an ambiguity error in a 

1832 future version 

1833 """ 

1834 axis = self._get_axis_number(axis) 

1835 other_axes = [ax for ax in range(self._AXIS_LEN) if ax != axis] 

1836 

1837 if self._is_label_reference(key, axis=axis): 

1838 self._check_label_or_level_ambiguity(key, axis=axis) 

1839 values = self.xs(key, axis=other_axes[0])._values 

1840 elif self._is_level_reference(key, axis=axis): 

1841 # error: Incompatible types in assignment (expression has type "Union[ 

1842 # ExtensionArray, ndarray[Any, Any]]", variable has type "ndarray[Any, 

1843 # Any]") 

1844 values = ( 

1845 self.axes[axis] 

1846 .get_level_values(key) # type: ignore[assignment] 

1847 ._values 

1848 ) 

1849 else: 

1850 raise KeyError(key) 

1851 

1852 # Check for duplicates 

1853 if values.ndim > 1: 

1854 

1855 if other_axes and isinstance(self._get_axis(other_axes[0]), MultiIndex): 

1856 multi_message = ( 

1857 "\n" 

1858 "For a multi-index, the label must be a " 

1859 "tuple with elements corresponding to each level." 

1860 ) 

1861 else: 

1862 multi_message = "" 

1863 

1864 label_axis_name = "column" if axis == 0 else "index" 

1865 raise ValueError( 

1866 f"The {label_axis_name} label '{key}' is not unique.{multi_message}" 

1867 ) 

1868 

1869 return values 

1870 

1871 @final 

1872 def _drop_labels_or_levels(self, keys, axis: int = 0): 

1873 """ 

1874 Drop labels and/or levels for the given `axis`. 

1875 

1876 For each key in `keys`: 

1877 - (axis=0): If key matches a column label then drop the column. 

1878 Otherwise if key matches an index level then drop the level. 

1879 - (axis=1): If key matches an index label then drop the row. 

1880 Otherwise if key matches a column level then drop the level. 

1881 

1882 Parameters 

1883 ---------- 

1884 keys : str or list of str 

1885 labels or levels to drop 

1886 axis : int, default 0 

1887 Axis that levels are associated with (0 for index, 1 for columns) 

1888 

1889 Returns 

1890 ------- 

1891 dropped: DataFrame 

1892 

1893 Raises 

1894 ------ 

1895 ValueError 

1896 if any `keys` match neither a label nor a level 

1897 """ 

1898 axis = self._get_axis_number(axis) 

1899 

1900 # Validate keys 

1901 keys = com.maybe_make_list(keys) 

1902 invalid_keys = [ 

1903 k for k in keys if not self._is_label_or_level_reference(k, axis=axis) 

1904 ] 

1905 

1906 if invalid_keys: 

1907 raise ValueError( 

1908 "The following keys are not valid labels or " 

1909 f"levels for axis {axis}: {invalid_keys}" 

1910 ) 

1911 

1912 # Compute levels and labels to drop 

1913 levels_to_drop = [k for k in keys if self._is_level_reference(k, axis=axis)] 

1914 

1915 labels_to_drop = [k for k in keys if not self._is_level_reference(k, axis=axis)] 

1916 

1917 # Perform copy upfront and then use inplace operations below. 

1918 # This ensures that we always perform exactly one copy. 

1919 # ``copy`` and/or ``inplace`` options could be added in the future. 

1920 dropped = self.copy() 

1921 

1922 if axis == 0: 

1923 # Handle dropping index levels 

1924 if levels_to_drop: 

1925 dropped.reset_index(levels_to_drop, drop=True, inplace=True) 

1926 

1927 # Handle dropping columns labels 

1928 if labels_to_drop: 

1929 dropped.drop(labels_to_drop, axis=1, inplace=True) 

1930 else: 

1931 # Handle dropping column levels 

1932 if levels_to_drop: 

1933 if isinstance(dropped.columns, MultiIndex): 

1934 # Drop the specified levels from the MultiIndex 

1935 dropped.columns = dropped.columns.droplevel(levels_to_drop) 

1936 else: 

1937 # Drop the last level of Index by replacing with 

1938 # a RangeIndex 

1939 dropped.columns = RangeIndex(dropped.columns.size) 

1940 

1941 # Handle dropping index labels 

1942 if labels_to_drop: 

1943 dropped.drop(labels_to_drop, axis=0, inplace=True) 

1944 

1945 return dropped 

1946 

1947 # ---------------------------------------------------------------------- 

1948 # Iteration 

1949 

1950 # https://github.com/python/typeshed/issues/2148#issuecomment-520783318 

1951 # Incompatible types in assignment (expression has type "None", base class 

1952 # "object" defined the type as "Callable[[object], int]") 

1953 __hash__: ClassVar[None] # type: ignore[assignment] 

1954 

1955 def __iter__(self): 

1956 """ 

1957 Iterate over info axis. 

1958 

1959 Returns 

1960 ------- 

1961 iterator 

1962 Info axis as iterator. 

1963 """ 

1964 return iter(self._info_axis) 

1965 

1966 # can we get a better explanation of this? 

1967 def keys(self) -> Index: 

1968 """ 

1969 Get the 'info axis' (see Indexing for more). 

1970 

1971 This is index for Series, columns for DataFrame. 

1972 

1973 Returns 

1974 ------- 

1975 Index 

1976 Info axis. 

1977 """ 

1978 return self._info_axis 

1979 

1980 def items(self): 

1981 """ 

1982 Iterate over (label, values) on info axis 

1983 

1984 This is index for Series and columns for DataFrame. 

1985 

1986 Returns 

1987 ------- 

1988 Generator 

1989 """ 

1990 for h in self._info_axis: 

1991 yield h, self[h] 

1992 

1993 def __len__(self) -> int: 

1994 """Returns length of info axis""" 

1995 return len(self._info_axis) 

1996 

1997 @final 

1998 def __contains__(self, key) -> bool_t: 

1999 """True if the key is in the info axis""" 

2000 return key in self._info_axis 

2001 

2002 @property 

2003 def empty(self) -> bool_t: 

2004 """ 

2005 Indicator whether Series/DataFrame is empty. 

2006 

2007 True if Series/DataFrame is entirely empty (no items), meaning any of the 

2008 axes are of length 0. 

2009 

2010 Returns 

2011 ------- 

2012 bool 

2013 If Series/DataFrame is empty, return True, if not return False. 

2014 

2015 See Also 

2016 -------- 

2017 Series.dropna : Return series without null values. 

2018 DataFrame.dropna : Return DataFrame with labels on given axis omitted 

2019 where (all or any) data are missing. 

2020 

2021 Notes 

2022 ----- 

2023 If Series/DataFrame contains only NaNs, it is still not considered empty. See 

2024 the example below. 

2025 

2026 Examples 

2027 -------- 

2028 An example of an actual empty DataFrame. Notice the index is empty: 

2029 

2030 >>> df_empty = pd.DataFrame({'A' : []}) 

2031 >>> df_empty 

2032 Empty DataFrame 

2033 Columns: [A] 

2034 Index: [] 

2035 >>> df_empty.empty 

2036 True 

2037 

2038 If we only have NaNs in our DataFrame, it is not considered empty! We 

2039 will need to drop the NaNs to make the DataFrame empty: 

2040 

2041 >>> df = pd.DataFrame({'A' : [np.nan]}) 

2042 >>> df 

2043 A 

2044 0 NaN 

2045 >>> df.empty 

2046 False 

2047 >>> df.dropna().empty 

2048 True 

2049 

2050 >>> ser_empty = pd.Series({'A' : []}) 

2051 >>> ser_empty 

2052 A [] 

2053 dtype: object 

2054 >>> ser_empty.empty 

2055 False 

2056 >>> ser_empty = pd.Series() 

2057 >>> ser_empty.empty 

2058 True 

2059 """ 

2060 return any(len(self._get_axis(a)) == 0 for a in self._AXIS_ORDERS) 

2061 

2062 # ---------------------------------------------------------------------- 

2063 # Array Interface 

2064 

2065 # This is also set in IndexOpsMixin 

2066 # GH#23114 Ensure ndarray.__op__(DataFrame) returns NotImplemented 

2067 __array_priority__: int = 1000 

2068 

2069 def __array__(self, dtype: npt.DTypeLike | None = None) -> np.ndarray: 

2070 return np.asarray(self._values, dtype=dtype) 

2071 

2072 def __array_wrap__( 

2073 self, 

2074 result: np.ndarray, 

2075 context: tuple[Callable, tuple[Any, ...], int] | None = None, 

2076 ): 

2077 """ 

2078 Gets called after a ufunc and other functions. 

2079 

2080 Parameters 

2081 ---------- 

2082 result: np.ndarray 

2083 The result of the ufunc or other function called on the NumPy array 

2084 returned by __array__ 

2085 context: tuple of (func, tuple, int) 

2086 This parameter is returned by ufuncs as a 3-element tuple: (name of the 

2087 ufunc, arguments of the ufunc, domain of the ufunc), but is not set by 

2088 other numpy functions.q 

2089 

2090 Notes 

2091 ----- 

2092 Series implements __array_ufunc_ so this not called for ufunc on Series. 

2093 """ 

2094 # Note: at time of dask 2022.01.0, this is still used by dask 

2095 warnings.warn( 

2096 "The __array_wrap__ method of DataFrame and Series will be removed in " 

2097 "a future version", 

2098 DeprecationWarning, 

2099 stacklevel=find_stack_level(), 

2100 ) 

2101 res = lib.item_from_zerodim(result) 

2102 if is_scalar(res): 

2103 # e.g. we get here with np.ptp(series) 

2104 # ptp also requires the item_from_zerodim 

2105 return res 

2106 d = self._construct_axes_dict(self._AXIS_ORDERS, copy=False) 

2107 return self._constructor(res, **d).__finalize__(self, method="__array_wrap__") 

2108 

2109 @final 

2110 def __array_ufunc__( 

2111 self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any 

2112 ): 

2113 return arraylike.array_ufunc(self, ufunc, method, *inputs, **kwargs) 

2114 

2115 # ---------------------------------------------------------------------- 

2116 # Picklability 

2117 

2118 @final 

2119 def __getstate__(self) -> dict[str, Any]: 

2120 meta = {k: getattr(self, k, None) for k in self._metadata} 

2121 return { 

2122 "_mgr": self._mgr, 

2123 "_typ": self._typ, 

2124 "_metadata": self._metadata, 

2125 "attrs": self.attrs, 

2126 "_flags": {k: self.flags[k] for k in self.flags._keys}, 

2127 **meta, 

2128 } 

2129 

2130 @final 

2131 def __setstate__(self, state) -> None: 

2132 if isinstance(state, BlockManager): 

2133 self._mgr = state 

2134 elif isinstance(state, dict): 

2135 if "_data" in state and "_mgr" not in state: 

2136 # compat for older pickles 

2137 state["_mgr"] = state.pop("_data") 

2138 typ = state.get("_typ") 

2139 if typ is not None: 

2140 attrs = state.get("_attrs", {}) 

2141 object.__setattr__(self, "_attrs", attrs) 

2142 flags = state.get("_flags", {"allows_duplicate_labels": True}) 

2143 object.__setattr__(self, "_flags", Flags(self, **flags)) 

2144 

2145 # set in the order of internal names 

2146 # to avoid definitional recursion 

2147 # e.g. say fill_value needing _mgr to be 

2148 # defined 

2149 meta = set(self._internal_names + self._metadata) 

2150 for k in list(meta): 

2151 if k in state and k != "_flags": 

2152 v = state[k] 

2153 object.__setattr__(self, k, v) 

2154 

2155 for k, v in state.items(): 

2156 if k not in meta: 

2157 object.__setattr__(self, k, v) 

2158 

2159 else: 

2160 raise NotImplementedError("Pre-0.12 pickles are no longer supported") 

2161 elif len(state) == 2: 

2162 raise NotImplementedError("Pre-0.12 pickles are no longer supported") 

2163 

2164 self._item_cache: dict[Hashable, Series] = {} 

2165 

2166 # ---------------------------------------------------------------------- 

2167 # Rendering Methods 

2168 

2169 def __repr__(self) -> str: 

2170 # string representation based upon iterating over self 

2171 # (since, by definition, `PandasContainers` are iterable) 

2172 prepr = f"[{','.join(map(pprint_thing, self))}]" 

2173 return f"{type(self).__name__}({prepr})" 

2174 

2175 @final 

2176 def _repr_latex_(self): 

2177 """ 

2178 Returns a LaTeX representation for a particular object. 

2179 Mainly for use with nbconvert (jupyter notebook conversion to pdf). 

2180 """ 

2181 if config.get_option("display.latex.repr"): 

2182 return self.to_latex() 

2183 else: 

2184 return None 

2185 

2186 @final 

2187 def _repr_data_resource_(self): 

2188 """ 

2189 Not a real Jupyter special repr method, but we use the same 

2190 naming convention. 

2191 """ 

2192 if config.get_option("display.html.table_schema"): 

2193 data = self.head(config.get_option("display.max_rows")) 

2194 

2195 as_json = data.to_json(orient="table") 

2196 as_json = cast(str, as_json) 

2197 return json.loads(as_json, object_pairs_hook=collections.OrderedDict) 

2198 

2199 # ---------------------------------------------------------------------- 

2200 # I/O Methods 

2201 

2202 @final 

2203 @deprecate_kwarg(old_arg_name="verbose", new_arg_name=None) 

2204 @deprecate_kwarg(old_arg_name="encoding", new_arg_name=None) 

2205 @doc( 

2206 klass="object", 

2207 storage_options=_shared_docs["storage_options"], 

2208 storage_options_versionadded="1.2.0", 

2209 ) 

2210 def to_excel( 

2211 self, 

2212 excel_writer, 

2213 sheet_name: str = "Sheet1", 

2214 na_rep: str = "", 

2215 float_format: str | None = None, 

2216 columns: Sequence[Hashable] | None = None, 

2217 header: Sequence[Hashable] | bool_t = True, 

2218 index: bool_t = True, 

2219 index_label: IndexLabel = None, 

2220 startrow: int = 0, 

2221 startcol: int = 0, 

2222 engine: str | None = None, 

2223 merge_cells: bool_t = True, 

2224 encoding: lib.NoDefault = lib.no_default, 

2225 inf_rep: str = "inf", 

2226 verbose: lib.NoDefault = lib.no_default, 

2227 freeze_panes: tuple[int, int] | None = None, 

2228 storage_options: StorageOptions = None, 

2229 ) -> None: 

2230 """ 

2231 Write {klass} to an Excel sheet. 

2232 

2233 To write a single {klass} to an Excel .xlsx file it is only necessary to 

2234 specify a target file name. To write to multiple sheets it is necessary to 

2235 create an `ExcelWriter` object with a target file name, and specify a sheet 

2236 in the file to write to. 

2237 

2238 Multiple sheets may be written to by specifying unique `sheet_name`. 

2239 With all data written to the file it is necessary to save the changes. 

2240 Note that creating an `ExcelWriter` object with a file name that already 

2241 exists will result in the contents of the existing file being erased. 

2242 

2243 Parameters 

2244 ---------- 

2245 excel_writer : path-like, file-like, or ExcelWriter object 

2246 File path or existing ExcelWriter. 

2247 sheet_name : str, default 'Sheet1' 

2248 Name of sheet which will contain DataFrame. 

2249 na_rep : str, default '' 

2250 Missing data representation. 

2251 float_format : str, optional 

2252 Format string for floating point numbers. For example 

2253 ``float_format="%.2f"`` will format 0.1234 to 0.12. 

2254 columns : sequence or list of str, optional 

2255 Columns to write. 

2256 header : bool or list of str, default True 

2257 Write out the column names. If a list of string is given it is 

2258 assumed to be aliases for the column names. 

2259 index : bool, default True 

2260 Write row names (index). 

2261 index_label : str or sequence, optional 

2262 Column label for index column(s) if desired. If not specified, and 

2263 `header` and `index` are True, then the index names are used. A 

2264 sequence should be given if the DataFrame uses MultiIndex. 

2265 startrow : int, default 0 

2266 Upper left cell row to dump data frame. 

2267 startcol : int, default 0 

2268 Upper left cell column to dump data frame. 

2269 engine : str, optional 

2270 Write engine to use, 'openpyxl' or 'xlsxwriter'. You can also set this 

2271 via the options ``io.excel.xlsx.writer``, ``io.excel.xls.writer``, and 

2272 ``io.excel.xlsm.writer``. 

2273 

2274 .. deprecated:: 1.2.0 

2275 

2276 As the `xlwt <https://pypi.org/project/xlwt/>`__ package is no longer 

2277 maintained, the ``xlwt`` engine will be removed in a future version 

2278 of pandas. 

2279 

2280 merge_cells : bool, default True 

2281 Write MultiIndex and Hierarchical Rows as merged cells. 

2282 encoding : str, optional 

2283 Encoding of the resulting excel file. Only necessary for xlwt, 

2284 other writers support unicode natively. 

2285 

2286 .. deprecated:: 1.5.0 

2287 

2288 This keyword was not used. 

2289 

2290 inf_rep : str, default 'inf' 

2291 Representation for infinity (there is no native representation for 

2292 infinity in Excel). 

2293 verbose : bool, default True 

2294 Display more information in the error logs. 

2295 

2296 .. deprecated:: 1.5.0 

2297 

2298 This keyword was not used. 

2299 

2300 freeze_panes : tuple of int (length 2), optional 

2301 Specifies the one-based bottommost row and rightmost column that 

2302 is to be frozen. 

2303 {storage_options} 

2304 

2305 .. versionadded:: {storage_options_versionadded} 

2306 

2307 See Also 

2308 -------- 

2309 to_csv : Write DataFrame to a comma-separated values (csv) file. 

2310 ExcelWriter : Class for writing DataFrame objects into excel sheets. 

2311 read_excel : Read an Excel file into a pandas DataFrame. 

2312 read_csv : Read a comma-separated values (csv) file into DataFrame. 

2313 io.formats.style.Styler.to_excel : Add styles to Excel sheet. 

2314 

2315 Notes 

2316 ----- 

2317 For compatibility with :meth:`~DataFrame.to_csv`, 

2318 to_excel serializes lists and dicts to strings before writing. 

2319 

2320 Once a workbook has been saved it is not possible to write further 

2321 data without rewriting the whole workbook. 

2322 

2323 Examples 

2324 -------- 

2325 

2326 Create, write to and save a workbook: 

2327 

2328 >>> df1 = pd.DataFrame([['a', 'b'], ['c', 'd']], 

2329 ... index=['row 1', 'row 2'], 

2330 ... columns=['col 1', 'col 2']) 

2331 >>> df1.to_excel("output.xlsx") # doctest: +SKIP 

2332 

2333 To specify the sheet name: 

2334 

2335 >>> df1.to_excel("output.xlsx", 

2336 ... sheet_name='Sheet_name_1') # doctest: +SKIP 

2337 

2338 If you wish to write to more than one sheet in the workbook, it is 

2339 necessary to specify an ExcelWriter object: 

2340 

2341 >>> df2 = df1.copy() 

2342 >>> with pd.ExcelWriter('output.xlsx') as writer: # doctest: +SKIP 

2343 ... df1.to_excel(writer, sheet_name='Sheet_name_1') 

2344 ... df2.to_excel(writer, sheet_name='Sheet_name_2') 

2345 

2346 ExcelWriter can also be used to append to an existing Excel file: 

2347 

2348 >>> with pd.ExcelWriter('output.xlsx', 

2349 ... mode='a') as writer: # doctest: +SKIP 

2350 ... df.to_excel(writer, sheet_name='Sheet_name_3') 

2351 

2352 To set the library that is used to write the Excel file, 

2353 you can pass the `engine` keyword (the default engine is 

2354 automatically chosen depending on the file extension): 

2355 

2356 >>> df1.to_excel('output1.xlsx', engine='xlsxwriter') # doctest: +SKIP 

2357 """ 

2358 

2359 df = self if isinstance(self, ABCDataFrame) else self.to_frame() 

2360 

2361 from pandas.io.formats.excel import ExcelFormatter 

2362 

2363 formatter = ExcelFormatter( 

2364 df, 

2365 na_rep=na_rep, 

2366 cols=columns, 

2367 header=header, 

2368 float_format=float_format, 

2369 index=index, 

2370 index_label=index_label, 

2371 merge_cells=merge_cells, 

2372 inf_rep=inf_rep, 

2373 ) 

2374 formatter.write( 

2375 excel_writer, 

2376 sheet_name=sheet_name, 

2377 startrow=startrow, 

2378 startcol=startcol, 

2379 freeze_panes=freeze_panes, 

2380 engine=engine, 

2381 storage_options=storage_options, 

2382 ) 

2383 

2384 @final 

2385 @doc( 

2386 storage_options=_shared_docs["storage_options"], 

2387 compression_options=_shared_docs["compression_options"] % "path_or_buf", 

2388 ) 

2389 def to_json( 

2390 self, 

2391 path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None, 

2392 orient: str | None = None, 

2393 date_format: str | None = None, 

2394 double_precision: int = 10, 

2395 force_ascii: bool_t = True, 

2396 date_unit: str = "ms", 

2397 default_handler: Callable[[Any], JSONSerializable] | None = None, 

2398 lines: bool_t = False, 

2399 compression: CompressionOptions = "infer", 

2400 index: bool_t = True, 

2401 indent: int | None = None, 

2402 storage_options: StorageOptions = None, 

2403 ) -> str | None: 

2404 """ 

2405 Convert the object to a JSON string. 

2406 

2407 Note NaN's and None will be converted to null and datetime objects 

2408 will be converted to UNIX timestamps. 

2409 

2410 Parameters 

2411 ---------- 

2412 path_or_buf : str, path object, file-like object, or None, default None 

2413 String, path object (implementing os.PathLike[str]), or file-like 

2414 object implementing a write() function. If None, the result is 

2415 returned as a string. 

2416 orient : str 

2417 Indication of expected JSON string format. 

2418 

2419 * Series: 

2420 

2421 - default is 'index' 

2422 - allowed values are: {{'split', 'records', 'index', 'table'}}. 

2423 

2424 * DataFrame: 

2425 

2426 - default is 'columns' 

2427 - allowed values are: {{'split', 'records', 'index', 'columns', 

2428 'values', 'table'}}. 

2429 

2430 * The format of the JSON string: 

2431 

2432 - 'split' : dict like {{'index' -> [index], 'columns' -> [columns], 

2433 'data' -> [values]}} 

2434 - 'records' : list like [{{column -> value}}, ... , {{column -> value}}] 

2435 - 'index' : dict like {{index -> {{column -> value}}}} 

2436 - 'columns' : dict like {{column -> {{index -> value}}}} 

2437 - 'values' : just the values array 

2438 - 'table' : dict like {{'schema': {{schema}}, 'data': {{data}}}} 

2439 

2440 Describing the data, where data component is like ``orient='records'``. 

2441 

2442 date_format : {{None, 'epoch', 'iso'}} 

2443 Type of date conversion. 'epoch' = epoch milliseconds, 

2444 'iso' = ISO8601. The default depends on the `orient`. For 

2445 ``orient='table'``, the default is 'iso'. For all other orients, 

2446 the default is 'epoch'. 

2447 double_precision : int, default 10 

2448 The number of decimal places to use when encoding 

2449 floating point values. 

2450 force_ascii : bool, default True 

2451 Force encoded string to be ASCII. 

2452 date_unit : str, default 'ms' (milliseconds) 

2453 The time unit to encode to, governs timestamp and ISO8601 

2454 precision. One of 's', 'ms', 'us', 'ns' for second, millisecond, 

2455 microsecond, and nanosecond respectively. 

2456 default_handler : callable, default None 

2457 Handler to call if object cannot otherwise be converted to a 

2458 suitable format for JSON. Should receive a single argument which is 

2459 the object to convert and return a serialisable object. 

2460 lines : bool, default False 

2461 If 'orient' is 'records' write out line-delimited json format. Will 

2462 throw ValueError if incorrect 'orient' since others are not 

2463 list-like. 

2464 {compression_options} 

2465 

2466 .. versionchanged:: 1.4.0 Zstandard support. 

2467 

2468 index : bool, default True 

2469 Whether to include the index values in the JSON string. Not 

2470 including the index (``index=False``) is only supported when 

2471 orient is 'split' or 'table'. 

2472 indent : int, optional 

2473 Length of whitespace used to indent each record. 

2474 

2475 .. versionadded:: 1.0.0 

2476 

2477 {storage_options} 

2478 

2479 .. versionadded:: 1.2.0 

2480 

2481 Returns 

2482 ------- 

2483 None or str 

2484 If path_or_buf is None, returns the resulting json format as a 

2485 string. Otherwise returns None. 

2486 

2487 See Also 

2488 -------- 

2489 read_json : Convert a JSON string to pandas object. 

2490 

2491 Notes 

2492 ----- 

2493 The behavior of ``indent=0`` varies from the stdlib, which does not 

2494 indent the output but does insert newlines. Currently, ``indent=0`` 

2495 and the default ``indent=None`` are equivalent in pandas, though this 

2496 may change in a future release. 

2497 

2498 ``orient='table'`` contains a 'pandas_version' field under 'schema'. 

2499 This stores the version of `pandas` used in the latest revision of the 

2500 schema. 

2501 

2502 Examples 

2503 -------- 

2504 >>> import json 

2505 >>> df = pd.DataFrame( 

2506 ... [["a", "b"], ["c", "d"]], 

2507 ... index=["row 1", "row 2"], 

2508 ... columns=["col 1", "col 2"], 

2509 ... ) 

2510 

2511 >>> result = df.to_json(orient="split") 

2512 >>> parsed = json.loads(result) 

2513 >>> json.dumps(parsed, indent=4) # doctest: +SKIP 

2514 {{ 

2515 "columns": [ 

2516 "col 1", 

2517 "col 2" 

2518 ], 

2519 "index": [ 

2520 "row 1", 

2521 "row 2" 

2522 ], 

2523 "data": [ 

2524 [ 

2525 "a", 

2526 "b" 

2527 ], 

2528 [ 

2529 "c", 

2530 "d" 

2531 ] 

2532 ] 

2533 }} 

2534 

2535 Encoding/decoding a Dataframe using ``'records'`` formatted JSON. 

2536 Note that index labels are not preserved with this encoding. 

2537 

2538 >>> result = df.to_json(orient="records") 

2539 >>> parsed = json.loads(result) 

2540 >>> json.dumps(parsed, indent=4) # doctest: +SKIP 

2541 [ 

2542 {{ 

2543 "col 1": "a", 

2544 "col 2": "b" 

2545 }}, 

2546 {{ 

2547 "col 1": "c", 

2548 "col 2": "d" 

2549 }} 

2550 ] 

2551 

2552 Encoding/decoding a Dataframe using ``'index'`` formatted JSON: 

2553 

2554 >>> result = df.to_json(orient="index") 

2555 >>> parsed = json.loads(result) 

2556 >>> json.dumps(parsed, indent=4) # doctest: +SKIP 

2557 {{ 

2558 "row 1": {{ 

2559 "col 1": "a", 

2560 "col 2": "b" 

2561 }}, 

2562 "row 2": {{ 

2563 "col 1": "c", 

2564 "col 2": "d" 

2565 }} 

2566 }} 

2567 

2568 Encoding/decoding a Dataframe using ``'columns'`` formatted JSON: 

2569 

2570 >>> result = df.to_json(orient="columns") 

2571 >>> parsed = json.loads(result) 

2572 >>> json.dumps(parsed, indent=4) # doctest: +SKIP 

2573 {{ 

2574 "col 1": {{ 

2575 "row 1": "a", 

2576 "row 2": "c" 

2577 }}, 

2578 "col 2": {{ 

2579 "row 1": "b", 

2580 "row 2": "d" 

2581 }} 

2582 }} 

2583 

2584 Encoding/decoding a Dataframe using ``'values'`` formatted JSON: 

2585 

2586 >>> result = df.to_json(orient="values") 

2587 >>> parsed = json.loads(result) 

2588 >>> json.dumps(parsed, indent=4) # doctest: +SKIP 

2589 [ 

2590 [ 

2591 "a", 

2592 "b" 

2593 ], 

2594 [ 

2595 "c", 

2596 "d" 

2597 ] 

2598 ] 

2599 

2600 Encoding with Table Schema: 

2601 

2602 >>> result = df.to_json(orient="table") 

2603 >>> parsed = json.loads(result) 

2604 >>> json.dumps(parsed, indent=4) # doctest: +SKIP 

2605 {{ 

2606 "schema": {{ 

2607 "fields": [ 

2608 {{ 

2609 "name": "index", 

2610 "type": "string" 

2611 }}, 

2612 {{ 

2613 "name": "col 1", 

2614 "type": "string" 

2615 }}, 

2616 {{ 

2617 "name": "col 2", 

2618 "type": "string" 

2619 }} 

2620 ], 

2621 "primaryKey": [ 

2622 "index" 

2623 ], 

2624 "pandas_version": "1.4.0" 

2625 }}, 

2626 "data": [ 

2627 {{ 

2628 "index": "row 1", 

2629 "col 1": "a", 

2630 "col 2": "b" 

2631 }}, 

2632 {{ 

2633 "index": "row 2", 

2634 "col 1": "c", 

2635 "col 2": "d" 

2636 }} 

2637 ] 

2638 }} 

2639 """ 

2640 from pandas.io import json 

2641 

2642 if date_format is None and orient == "table": 

2643 date_format = "iso" 

2644 elif date_format is None: 

2645 date_format = "epoch" 

2646 

2647 config.is_nonnegative_int(indent) 

2648 indent = indent or 0 

2649 

2650 return json.to_json( 

2651 path_or_buf=path_or_buf, 

2652 obj=self, 

2653 orient=orient, 

2654 date_format=date_format, 

2655 double_precision=double_precision, 

2656 force_ascii=force_ascii, 

2657 date_unit=date_unit, 

2658 default_handler=default_handler, 

2659 lines=lines, 

2660 compression=compression, 

2661 index=index, 

2662 indent=indent, 

2663 storage_options=storage_options, 

2664 ) 

2665 

2666 @final 

2667 def to_hdf( 

2668 self, 

2669 path_or_buf: FilePath | HDFStore, 

2670 key: str, 

2671 mode: str = "a", 

2672 complevel: int | None = None, 

2673 complib: str | None = None, 

2674 append: bool_t = False, 

2675 format: str | None = None, 

2676 index: bool_t = True, 

2677 min_itemsize: int | dict[str, int] | None = None, 

2678 nan_rep=None, 

2679 dropna: bool_t | None = None, 

2680 data_columns: Literal[True] | list[str] | None = None, 

2681 errors: str = "strict", 

2682 encoding: str = "UTF-8", 

2683 ) -> None: 

2684 """ 

2685 Write the contained data to an HDF5 file using HDFStore. 

2686 

2687 Hierarchical Data Format (HDF) is self-describing, allowing an 

2688 application to interpret the structure and contents of a file with 

2689 no outside information. One HDF file can hold a mix of related objects 

2690 which can be accessed as a group or as individual objects. 

2691 

2692 In order to add another DataFrame or Series to an existing HDF file 

2693 please use append mode and a different a key. 

2694 

2695 .. warning:: 

2696 

2697 One can store a subclass of ``DataFrame`` or ``Series`` to HDF5, 

2698 but the type of the subclass is lost upon storing. 

2699 

2700 For more information see the :ref:`user guide <io.hdf5>`. 

2701 

2702 Parameters 

2703 ---------- 

2704 path_or_buf : str or pandas.HDFStore 

2705 File path or HDFStore object. 

2706 key : str 

2707 Identifier for the group in the store. 

2708 mode : {'a', 'w', 'r+'}, default 'a' 

2709 Mode to open file: 

2710 

2711 - 'w': write, a new file is created (an existing file with 

2712 the same name would be deleted). 

2713 - 'a': append, an existing file is opened for reading and 

2714 writing, and if the file does not exist it is created. 

2715 - 'r+': similar to 'a', but the file must already exist. 

2716 complevel : {0-9}, default None 

2717 Specifies a compression level for data. 

2718 A value of 0 or None disables compression. 

2719 complib : {'zlib', 'lzo', 'bzip2', 'blosc'}, default 'zlib' 

2720 Specifies the compression library to be used. 

2721 As of v0.20.2 these additional compressors for Blosc are supported 

2722 (default if no compressor specified: 'blosc:blosclz'): 

2723 {'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', 

2724 'blosc:zlib', 'blosc:zstd'}. 

2725 Specifying a compression library which is not available issues 

2726 a ValueError. 

2727 append : bool, default False 

2728 For Table formats, append the input data to the existing. 

2729 format : {'fixed', 'table', None}, default 'fixed' 

2730 Possible values: 

2731 

2732 - 'fixed': Fixed format. Fast writing/reading. Not-appendable, 

2733 nor searchable. 

2734 - 'table': Table format. Write as a PyTables Table structure 

2735 which may perform worse but allow more flexible operations 

2736 like searching / selecting subsets of the data. 

2737 - If None, pd.get_option('io.hdf.default_format') is checked, 

2738 followed by fallback to "fixed". 

2739 index : bool, default True 

2740 Write DataFrame index as a column. 

2741 min_itemsize : dict or int, optional 

2742 Map column names to minimum string sizes for columns. 

2743 nan_rep : Any, optional 

2744 How to represent null values as str. 

2745 Not allowed with append=True. 

2746 dropna : bool, default False, optional 

2747 Remove missing values. 

2748 data_columns : list of columns or True, optional 

2749 List of columns to create as indexed data columns for on-disk 

2750 queries, or True to use all columns. By default only the axes 

2751 of the object are indexed. See 

2752 :ref:`Query via data columns<io.hdf5-query-data-columns>`. for 

2753 more information. 

2754 Applicable only to format='table'. 

2755 errors : str, default 'strict' 

2756 Specifies how encoding and decoding errors are to be handled. 

2757 See the errors argument for :func:`open` for a full list 

2758 of options. 

2759 encoding : str, default "UTF-8" 

2760 

2761 See Also 

2762 -------- 

2763 read_hdf : Read from HDF file. 

2764 DataFrame.to_orc : Write a DataFrame to the binary orc format. 

2765 DataFrame.to_parquet : Write a DataFrame to the binary parquet format. 

2766 DataFrame.to_sql : Write to a SQL table. 

2767 DataFrame.to_feather : Write out feather-format for DataFrames. 

2768 DataFrame.to_csv : Write out to a csv file. 

2769 

2770 Examples 

2771 -------- 

2772 >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, 

2773 ... index=['a', 'b', 'c']) # doctest: +SKIP 

2774 >>> df.to_hdf('data.h5', key='df', mode='w') # doctest: +SKIP 

2775 

2776 We can add another object to the same file: 

2777 

2778 >>> s = pd.Series([1, 2, 3, 4]) # doctest: +SKIP 

2779 >>> s.to_hdf('data.h5', key='s') # doctest: +SKIP 

2780 

2781 Reading from HDF file: 

2782 

2783 >>> pd.read_hdf('data.h5', 'df') # doctest: +SKIP 

2784 A B 

2785 a 1 4 

2786 b 2 5 

2787 c 3 6 

2788 >>> pd.read_hdf('data.h5', 's') # doctest: +SKIP 

2789 0 1 

2790 1 2 

2791 2 3 

2792 3 4 

2793 dtype: int64 

2794 """ 

2795 from pandas.io import pytables 

2796 

2797 # Argument 3 to "to_hdf" has incompatible type "NDFrame"; expected 

2798 # "Union[DataFrame, Series]" [arg-type] 

2799 pytables.to_hdf( 

2800 path_or_buf, 

2801 key, 

2802 self, # type: ignore[arg-type] 

2803 mode=mode, 

2804 complevel=complevel, 

2805 complib=complib, 

2806 append=append, 

2807 format=format, 

2808 index=index, 

2809 min_itemsize=min_itemsize, 

2810 nan_rep=nan_rep, 

2811 dropna=dropna, 

2812 data_columns=data_columns, 

2813 errors=errors, 

2814 encoding=encoding, 

2815 ) 

2816 

2817 @final 

2818 def to_sql( 

2819 self, 

2820 name: str, 

2821 con, 

2822 schema: str | None = None, 

2823 if_exists: str = "fail", 

2824 index: bool_t = True, 

2825 index_label: IndexLabel = None, 

2826 chunksize: int | None = None, 

2827 dtype: DtypeArg | None = None, 

2828 method: str | None = None, 

2829 ) -> int | None: 

2830 """ 

2831 Write records stored in a DataFrame to a SQL database. 

2832 

2833 Databases supported by SQLAlchemy [1]_ are supported. Tables can be 

2834 newly created, appended to, or overwritten. 

2835 

2836 Parameters 

2837 ---------- 

2838 name : str 

2839 Name of SQL table. 

2840 con : sqlalchemy.engine.(Engine or Connection) or sqlite3.Connection 

2841 Using SQLAlchemy makes it possible to use any DB supported by that 

2842 library. Legacy support is provided for sqlite3.Connection objects. The user 

2843 is responsible for engine disposal and connection closure for the SQLAlchemy 

2844 connectable See `here \ 

2845 <https://docs.sqlalchemy.org/en/13/core/connections.html>`_. 

2846 

2847 schema : str, optional 

2848 Specify the schema (if database flavor supports this). If None, use 

2849 default schema. 

2850 if_exists : {'fail', 'replace', 'append'}, default 'fail' 

2851 How to behave if the table already exists. 

2852 

2853 * fail: Raise a ValueError. 

2854 * replace: Drop the table before inserting new values. 

2855 * append: Insert new values to the existing table. 

2856 

2857 index : bool, default True 

2858 Write DataFrame index as a column. Uses `index_label` as the column 

2859 name in the table. 

2860 index_label : str or sequence, default None 

2861 Column label for index column(s). If None is given (default) and 

2862 `index` is True, then the index names are used. 

2863 A sequence should be given if the DataFrame uses MultiIndex. 

2864 chunksize : int, optional 

2865 Specify the number of rows in each batch to be written at a time. 

2866 By default, all rows will be written at once. 

2867 dtype : dict or scalar, optional 

2868 Specifying the datatype for columns. If a dictionary is used, the 

2869 keys should be the column names and the values should be the 

2870 SQLAlchemy types or strings for the sqlite3 legacy mode. If a 

2871 scalar is provided, it will be applied to all columns. 

2872 method : {None, 'multi', callable}, optional 

2873 Controls the SQL insertion clause used: 

2874 

2875 * None : Uses standard SQL ``INSERT`` clause (one per row). 

2876 * 'multi': Pass multiple values in a single ``INSERT`` clause. 

2877 * callable with signature ``(pd_table, conn, keys, data_iter)``. 

2878 

2879 Details and a sample callable implementation can be found in the 

2880 section :ref:`insert method <io.sql.method>`. 

2881 

2882 Returns 

2883 ------- 

2884 None or int 

2885 Number of rows affected by to_sql. None is returned if the callable 

2886 passed into ``method`` does not return an integer number of rows. 

2887 

2888 The number of returned rows affected is the sum of the ``rowcount`` 

2889 attribute of ``sqlite3.Cursor`` or SQLAlchemy connectable which may not 

2890 reflect the exact number of written rows as stipulated in the 

2891 `sqlite3 <https://docs.python.org/3/library/sqlite3.html#sqlite3.Cursor.rowcount>`__ or 

2892 `SQLAlchemy <https://docs.sqlalchemy.org/en/14/core/connections.html#sqlalchemy.engine.BaseCursorResult.rowcount>`__. 

2893 

2894 .. versionadded:: 1.4.0 

2895 

2896 Raises 

2897 ------ 

2898 ValueError 

2899 When the table already exists and `if_exists` is 'fail' (the 

2900 default). 

2901 

2902 See Also 

2903 -------- 

2904 read_sql : Read a DataFrame from a table. 

2905 

2906 Notes 

2907 ----- 

2908 Timezone aware datetime columns will be written as 

2909 ``Timestamp with timezone`` type with SQLAlchemy if supported by the 

2910 database. Otherwise, the datetimes will be stored as timezone unaware 

2911 timestamps local to the original timezone. 

2912 

2913 References 

2914 ---------- 

2915 .. [1] https://docs.sqlalchemy.org 

2916 .. [2] https://www.python.org/dev/peps/pep-0249/ 

2917 

2918 Examples 

2919 -------- 

2920 Create an in-memory SQLite database. 

2921 

2922 >>> from sqlalchemy import create_engine 

2923 >>> engine = create_engine('sqlite://', echo=False) 

2924 

2925 Create a table from scratch with 3 rows. 

2926 

2927 >>> df = pd.DataFrame({'name' : ['User 1', 'User 2', 'User 3']}) 

2928 >>> df 

2929 name 

2930 0 User 1 

2931 1 User 2 

2932 2 User 3 

2933 

2934 >>> df.to_sql('users', con=engine) 

2935 3 

2936 >>> engine.execute("SELECT * FROM users").fetchall() 

2937 [(0, 'User 1'), (1, 'User 2'), (2, 'User 3')] 

2938 

2939 An `sqlalchemy.engine.Connection` can also be passed to `con`: 

2940 

2941 >>> with engine.begin() as connection: 

2942 ... df1 = pd.DataFrame({'name' : ['User 4', 'User 5']}) 

2943 ... df1.to_sql('users', con=connection, if_exists='append') 

2944 2 

2945 

2946 This is allowed to support operations that require that the same 

2947 DBAPI connection is used for the entire operation. 

2948 

2949 >>> df2 = pd.DataFrame({'name' : ['User 6', 'User 7']}) 

2950 >>> df2.to_sql('users', con=engine, if_exists='append') 

2951 2 

2952 >>> engine.execute("SELECT * FROM users").fetchall() 

2953 [(0, 'User 1'), (1, 'User 2'), (2, 'User 3'), 

2954 (0, 'User 4'), (1, 'User 5'), (0, 'User 6'), 

2955 (1, 'User 7')] 

2956 

2957 Overwrite the table with just ``df2``. 

2958 

2959 >>> df2.to_sql('users', con=engine, if_exists='replace', 

2960 ... index_label='id') 

2961 2 

2962 >>> engine.execute("SELECT * FROM users").fetchall() 

2963 [(0, 'User 6'), (1, 'User 7')] 

2964 

2965 Specify the dtype (especially useful for integers with missing values). 

2966 Notice that while pandas is forced to store the data as floating point, 

2967 the database supports nullable integers. When fetching the data with 

2968 Python, we get back integer scalars. 

2969 

2970 >>> df = pd.DataFrame({"A": [1, None, 2]}) 

2971 >>> df 

2972 A 

2973 0 1.0 

2974 1 NaN 

2975 2 2.0 

2976 

2977 >>> from sqlalchemy.types import Integer 

2978 >>> df.to_sql('integers', con=engine, index=False, 

2979 ... dtype={"A": Integer()}) 

2980 3 

2981 

2982 >>> engine.execute("SELECT * FROM integers").fetchall() 

2983 [(1,), (None,), (2,)] 

2984 """ # noqa:E501 

2985 from pandas.io import sql 

2986 

2987 return sql.to_sql( 

2988 self, 

2989 name, 

2990 con, 

2991 schema=schema, 

2992 if_exists=if_exists, 

2993 index=index, 

2994 index_label=index_label, 

2995 chunksize=chunksize, 

2996 dtype=dtype, 

2997 method=method, 

2998 ) 

2999 

3000 @final 

3001 @doc( 

3002 storage_options=_shared_docs["storage_options"], 

3003 compression_options=_shared_docs["compression_options"] % "path", 

3004 ) 

3005 def to_pickle( 

3006 self, 

3007 path: FilePath | WriteBuffer[bytes], 

3008 compression: CompressionOptions = "infer", 

3009 protocol: int = pickle.HIGHEST_PROTOCOL, 

3010 storage_options: StorageOptions = None, 

3011 ) -> None: 

3012 """ 

3013 Pickle (serialize) object to file. 

3014 

3015 Parameters 

3016 ---------- 

3017 path : str, path object, or file-like object 

3018 String, path object (implementing ``os.PathLike[str]``), or file-like 

3019 object implementing a binary ``write()`` function. File path where 

3020 the pickled object will be stored. 

3021 {compression_options} 

3022 protocol : int 

3023 Int which indicates which protocol should be used by the pickler, 

3024 default HIGHEST_PROTOCOL (see [1]_ paragraph 12.1.2). The possible 

3025 values are 0, 1, 2, 3, 4, 5. A negative value for the protocol 

3026 parameter is equivalent to setting its value to HIGHEST_PROTOCOL. 

3027 

3028 .. [1] https://docs.python.org/3/library/pickle.html. 

3029 

3030 {storage_options} 

3031 

3032 .. versionadded:: 1.2.0 

3033 

3034 See Also 

3035 -------- 

3036 read_pickle : Load pickled pandas object (or any object) from file. 

3037 DataFrame.to_hdf : Write DataFrame to an HDF5 file. 

3038 DataFrame.to_sql : Write DataFrame to a SQL database. 

3039 DataFrame.to_parquet : Write a DataFrame to the binary parquet format. 

3040 

3041 Examples 

3042 -------- 

3043 >>> original_df = pd.DataFrame({{"foo": range(5), "bar": range(5, 10)}}) # doctest: +SKIP 

3044 >>> original_df # doctest: +SKIP 

3045 foo bar 

3046 0 0 5 

3047 1 1 6 

3048 2 2 7 

3049 3 3 8 

3050 4 4 9 

3051 >>> original_df.to_pickle("./dummy.pkl") # doctest: +SKIP 

3052 

3053 >>> unpickled_df = pd.read_pickle("./dummy.pkl") # doctest: +SKIP 

3054 >>> unpickled_df # doctest: +SKIP 

3055 foo bar 

3056 0 0 5 

3057 1 1 6 

3058 2 2 7 

3059 3 3 8 

3060 4 4 9 

3061 """ # noqa: E501 

3062 from pandas.io.pickle import to_pickle 

3063 

3064 to_pickle( 

3065 self, 

3066 path, 

3067 compression=compression, 

3068 protocol=protocol, 

3069 storage_options=storage_options, 

3070 ) 

3071 

3072 @final 

3073 def to_clipboard( 

3074 self, excel: bool_t = True, sep: str | None = None, **kwargs 

3075 ) -> None: 

3076 r""" 

3077 Copy object to the system clipboard. 

3078 

3079 Write a text representation of object to the system clipboard. 

3080 This can be pasted into Excel, for example. 

3081 

3082 Parameters 

3083 ---------- 

3084 excel : bool, default True 

3085 Produce output in a csv format for easy pasting into excel. 

3086 

3087 - True, use the provided separator for csv pasting. 

3088 - False, write a string representation of the object to the clipboard. 

3089 

3090 sep : str, default ``'\t'`` 

3091 Field delimiter. 

3092 **kwargs 

3093 These parameters will be passed to DataFrame.to_csv. 

3094 

3095 See Also 

3096 -------- 

3097 DataFrame.to_csv : Write a DataFrame to a comma-separated values 

3098 (csv) file. 

3099 read_clipboard : Read text from clipboard and pass to read_csv. 

3100 

3101 Notes 

3102 ----- 

3103 Requirements for your platform. 

3104 

3105 - Linux : `xclip`, or `xsel` (with `PyQt4` modules) 

3106 - Windows : none 

3107 - macOS : none 

3108 

3109 This method uses the processes developed for the package `pyperclip`. A 

3110 solution to render any output string format is given in the examples. 

3111 

3112 Examples 

3113 -------- 

3114 Copy the contents of a DataFrame to the clipboard. 

3115 

3116 >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['A', 'B', 'C']) 

3117 

3118 >>> df.to_clipboard(sep=',') # doctest: +SKIP 

3119 ... # Wrote the following to the system clipboard: 

3120 ... # ,A,B,C 

3121 ... # 0,1,2,3 

3122 ... # 1,4,5,6 

3123 

3124 We can omit the index by passing the keyword `index` and setting 

3125 it to false. 

3126 

3127 >>> df.to_clipboard(sep=',', index=False) # doctest: +SKIP 

3128 ... # Wrote the following to the system clipboard: 

3129 ... # A,B,C 

3130 ... # 1,2,3 

3131 ... # 4,5,6 

3132 

3133 Using the original `pyperclip` package for any string output format. 

3134 

3135 .. code-block:: python 

3136 

3137 import pyperclip 

3138 html = df.style.to_html() 

3139 pyperclip.copy(html) 

3140 """ 

3141 from pandas.io import clipboards 

3142 

3143 clipboards.to_clipboard(self, excel=excel, sep=sep, **kwargs) 

3144 

3145 @final 

3146 def to_xarray(self): 

3147 """ 

3148 Return an xarray object from the pandas object. 

3149 

3150 Returns 

3151 ------- 

3152 xarray.DataArray or xarray.Dataset 

3153 Data in the pandas structure converted to Dataset if the object is 

3154 a DataFrame, or a DataArray if the object is a Series. 

3155 

3156 See Also 

3157 -------- 

3158 DataFrame.to_hdf : Write DataFrame to an HDF5 file. 

3159 DataFrame.to_parquet : Write a DataFrame to the binary parquet format. 

3160 

3161 Notes 

3162 ----- 

3163 See the `xarray docs <https://xarray.pydata.org/en/stable/>`__ 

3164 

3165 Examples 

3166 -------- 

3167 >>> df = pd.DataFrame([('falcon', 'bird', 389.0, 2), 

3168 ... ('parrot', 'bird', 24.0, 2), 

3169 ... ('lion', 'mammal', 80.5, 4), 

3170 ... ('monkey', 'mammal', np.nan, 4)], 

3171 ... columns=['name', 'class', 'max_speed', 

3172 ... 'num_legs']) 

3173 >>> df 

3174 name class max_speed num_legs 

3175 0 falcon bird 389.0 2 

3176 1 parrot bird 24.0 2 

3177 2 lion mammal 80.5 4 

3178 3 monkey mammal NaN 4 

3179 

3180 >>> df.to_xarray() 

3181 <xarray.Dataset> 

3182 Dimensions: (index: 4) 

3183 Coordinates: 

3184 * index (index) int64 0 1 2 3 

3185 Data variables: 

3186 name (index) object 'falcon' 'parrot' 'lion' 'monkey' 

3187 class (index) object 'bird' 'bird' 'mammal' 'mammal' 

3188 max_speed (index) float64 389.0 24.0 80.5 nan 

3189 num_legs (index) int64 2 2 4 4 

3190 

3191 >>> df['max_speed'].to_xarray() 

3192 <xarray.DataArray 'max_speed' (index: 4)> 

3193 array([389. , 24. , 80.5, nan]) 

3194 Coordinates: 

3195 * index (index) int64 0 1 2 3 

3196 

3197 >>> dates = pd.to_datetime(['2018-01-01', '2018-01-01', 

3198 ... '2018-01-02', '2018-01-02']) 

3199 >>> df_multiindex = pd.DataFrame({'date': dates, 

3200 ... 'animal': ['falcon', 'parrot', 

3201 ... 'falcon', 'parrot'], 

3202 ... 'speed': [350, 18, 361, 15]}) 

3203 >>> df_multiindex = df_multiindex.set_index(['date', 'animal']) 

3204 

3205 >>> df_multiindex 

3206 speed 

3207 date animal 

3208 2018-01-01 falcon 350 

3209 parrot 18 

3210 2018-01-02 falcon 361 

3211 parrot 15 

3212 

3213 >>> df_multiindex.to_xarray() 

3214 <xarray.Dataset> 

3215 Dimensions: (date: 2, animal: 2) 

3216 Coordinates: 

3217 * date (date) datetime64[ns] 2018-01-01 2018-01-02 

3218 * animal (animal) object 'falcon' 'parrot' 

3219 Data variables: 

3220 speed (date, animal) int64 350 18 361 15 

3221 """ 

3222 xarray = import_optional_dependency("xarray") 

3223 

3224 if self.ndim == 1: 

3225 return xarray.DataArray.from_series(self) 

3226 else: 

3227 return xarray.Dataset.from_dataframe(self) 

3228 

3229 @overload 

3230 def to_latex( 

3231 self, 

3232 buf: None = ..., 

3233 columns: Sequence[Hashable] | None = ..., 

3234 col_space: ColspaceArgType | None = ..., 

3235 header: bool_t | Sequence[str] = ..., 

3236 index: bool_t = ..., 

3237 na_rep: str = ..., 

3238 formatters: FormattersType | None = ..., 

3239 float_format: FloatFormatType | None = ..., 

3240 sparsify: bool_t | None = ..., 

3241 index_names: bool_t = ..., 

3242 bold_rows: bool_t = ..., 

3243 column_format: str | None = ..., 

3244 longtable: bool_t | None = ..., 

3245 escape: bool_t | None = ..., 

3246 encoding: str | None = ..., 

3247 decimal: str = ..., 

3248 multicolumn: bool_t | None = ..., 

3249 multicolumn_format: str | None = ..., 

3250 multirow: bool_t | None = ..., 

3251 caption: str | tuple[str, str] | None = ..., 

3252 label: str | None = ..., 

3253 position: str | None = ..., 

3254 ) -> str: 

3255 ... 

3256 

3257 @overload 

3258 def to_latex( 

3259 self, 

3260 buf: FilePath | WriteBuffer[str], 

3261 columns: Sequence[Hashable] | None = ..., 

3262 col_space: ColspaceArgType | None = ..., 

3263 header: bool_t | Sequence[str] = ..., 

3264 index: bool_t = ..., 

3265 na_rep: str = ..., 

3266 formatters: FormattersType | None = ..., 

3267 float_format: FloatFormatType | None = ..., 

3268 sparsify: bool_t | None = ..., 

3269 index_names: bool_t = ..., 

3270 bold_rows: bool_t = ..., 

3271 column_format: str | None = ..., 

3272 longtable: bool_t | None = ..., 

3273 escape: bool_t | None = ..., 

3274 encoding: str | None = ..., 

3275 decimal: str = ..., 

3276 multicolumn: bool_t | None = ..., 

3277 multicolumn_format: str | None = ..., 

3278 multirow: bool_t | None = ..., 

3279 caption: str | tuple[str, str] | None = ..., 

3280 label: str | None = ..., 

3281 position: str | None = ..., 

3282 ) -> None: 

3283 ... 

3284 

3285 @final 

3286 @doc(returns=fmt.return_docstring) 

3287 def to_latex( 

3288 self, 

3289 buf: FilePath | WriteBuffer[str] | None = None, 

3290 columns: Sequence[Hashable] | None = None, 

3291 col_space: ColspaceArgType | None = None, 

3292 header: bool_t | Sequence[str] = True, 

3293 index: bool_t = True, 

3294 na_rep: str = "NaN", 

3295 formatters: FormattersType | None = None, 

3296 float_format: FloatFormatType | None = None, 

3297 sparsify: bool_t | None = None, 

3298 index_names: bool_t = True, 

3299 bold_rows: bool_t = False, 

3300 column_format: str | None = None, 

3301 longtable: bool_t | None = None, 

3302 escape: bool_t | None = None, 

3303 encoding: str | None = None, 

3304 decimal: str = ".", 

3305 multicolumn: bool_t | None = None, 

3306 multicolumn_format: str | None = None, 

3307 multirow: bool_t | None = None, 

3308 caption: str | tuple[str, str] | None = None, 

3309 label: str | None = None, 

3310 position: str | None = None, 

3311 ) -> str | None: 

3312 r""" 

3313 Render object to a LaTeX tabular, longtable, or nested table. 

3314 

3315 Requires ``\usepackage{{booktabs}}``. The output can be copy/pasted 

3316 into a main LaTeX document or read from an external file 

3317 with ``\input{{table.tex}}``. 

3318 

3319 .. versionchanged:: 1.0.0 

3320 Added caption and label arguments. 

3321 

3322 .. versionchanged:: 1.2.0 

3323 Added position argument, changed meaning of caption argument. 

3324 

3325 Parameters 

3326 ---------- 

3327 buf : str, Path or StringIO-like, optional, default None 

3328 Buffer to write to. If None, the output is returned as a string. 

3329 columns : list of label, optional 

3330 The subset of columns to write. Writes all columns by default. 

3331 col_space : int, optional 

3332 The minimum width of each column. 

3333 header : bool or list of str, default True 

3334 Write out the column names. If a list of strings is given, 

3335 it is assumed to be aliases for the column names. 

3336 index : bool, default True 

3337 Write row names (index). 

3338 na_rep : str, default 'NaN' 

3339 Missing data representation. 

3340 formatters : list of functions or dict of {{str: function}}, optional 

3341 Formatter functions to apply to columns' elements by position or 

3342 name. The result of each function must be a unicode string. 

3343 List must be of length equal to the number of columns. 

3344 float_format : one-parameter function or str, optional, default None 

3345 Formatter for floating point numbers. For example 

3346 ``float_format="%.2f"`` and ``float_format="{{:0.2f}}".format`` will 

3347 both result in 0.1234 being formatted as 0.12. 

3348 sparsify : bool, optional 

3349 Set to False for a DataFrame with a hierarchical index to print 

3350 every multiindex key at each row. By default, the value will be 

3351 read from the config module. 

3352 index_names : bool, default True 

3353 Prints the names of the indexes. 

3354 bold_rows : bool, default False 

3355 Make the row labels bold in the output. 

3356 column_format : str, optional 

3357 The columns format as specified in `LaTeX table format 

3358 <https://en.wikibooks.org/wiki/LaTeX/Tables>`__ e.g. 'rcl' for 3 

3359 columns. By default, 'l' will be used for all columns except 

3360 columns of numbers, which default to 'r'. 

3361 longtable : bool, optional 

3362 By default, the value will be read from the pandas config 

3363 module. Use a longtable environment instead of tabular. Requires 

3364 adding a \usepackage{{longtable}} to your LaTeX preamble. 

3365 escape : bool, optional 

3366 By default, the value will be read from the pandas config 

3367 module. When set to False prevents from escaping latex special 

3368 characters in column names. 

3369 encoding : str, optional 

3370 A string representing the encoding to use in the output file, 

3371 defaults to 'utf-8'. 

3372 decimal : str, default '.' 

3373 Character recognized as decimal separator, e.g. ',' in Europe. 

3374 multicolumn : bool, default True 

3375 Use \multicolumn to enhance MultiIndex columns. 

3376 The default will be read from the config module. 

3377 multicolumn_format : str, default 'l' 

3378 The alignment for multicolumns, similar to `column_format` 

3379 The default will be read from the config module. 

3380 multirow : bool, default False 

3381 Use \multirow to enhance MultiIndex rows. Requires adding a 

3382 \usepackage{{multirow}} to your LaTeX preamble. Will print 

3383 centered labels (instead of top-aligned) across the contained 

3384 rows, separating groups via clines. The default will be read 

3385 from the pandas config module. 

3386 caption : str or tuple, optional 

3387 Tuple (full_caption, short_caption), 

3388 which results in ``\caption[short_caption]{{full_caption}}``; 

3389 if a single string is passed, no short caption will be set. 

3390 

3391 .. versionadded:: 1.0.0 

3392 

3393 .. versionchanged:: 1.2.0 

3394 Optionally allow caption to be a tuple ``(full_caption, short_caption)``. 

3395 

3396 label : str, optional 

3397 The LaTeX label to be placed inside ``\label{{}}`` in the output. 

3398 This is used with ``\ref{{}}`` in the main ``.tex`` file. 

3399 

3400 .. versionadded:: 1.0.0 

3401 position : str, optional 

3402 The LaTeX positional argument for tables, to be placed after 

3403 ``\begin{{}}`` in the output. 

3404 

3405 .. versionadded:: 1.2.0 

3406 {returns} 

3407 See Also 

3408 -------- 

3409 io.formats.style.Styler.to_latex : Render a DataFrame to LaTeX 

3410 with conditional formatting. 

3411 DataFrame.to_string : Render a DataFrame to a console-friendly 

3412 tabular output. 

3413 DataFrame.to_html : Render a DataFrame as an HTML table. 

3414 

3415 Examples 

3416 -------- 

3417 >>> df = pd.DataFrame(dict(name=['Raphael', 'Donatello'], 

3418 ... mask=['red', 'purple'], 

3419 ... weapon=['sai', 'bo staff'])) 

3420 >>> print(df.to_latex(index=False)) # doctest: +SKIP 

3421 \begin{{tabular}}{{lll}} 

3422 \toprule 

3423 name & mask & weapon \\ 

3424 \midrule 

3425 Raphael & red & sai \\ 

3426 Donatello & purple & bo staff \\ 

3427 \bottomrule 

3428 \end{{tabular}} 

3429 """ 

3430 msg = ( 

3431 "In future versions `DataFrame.to_latex` is expected to utilise the base " 

3432 "implementation of `Styler.to_latex` for formatting and rendering. " 

3433 "The arguments signature may therefore change. It is recommended instead " 

3434 "to use `DataFrame.style.to_latex` which also contains additional " 

3435 "functionality." 

3436 ) 

3437 warnings.warn(msg, FutureWarning, stacklevel=find_stack_level()) 

3438 

3439 # Get defaults from the pandas config 

3440 if self.ndim == 1: 

3441 self = self.to_frame() 

3442 if longtable is None: 

3443 longtable = config.get_option("display.latex.longtable") 

3444 if escape is None: 

3445 escape = config.get_option("display.latex.escape") 

3446 if multicolumn is None: 

3447 multicolumn = config.get_option("display.latex.multicolumn") 

3448 if multicolumn_format is None: 

3449 multicolumn_format = config.get_option("display.latex.multicolumn_format") 

3450 if multirow is None: 

3451 multirow = config.get_option("display.latex.multirow") 

3452 

3453 self = cast("DataFrame", self) 

3454 formatter = DataFrameFormatter( 

3455 self, 

3456 columns=columns, 

3457 col_space=col_space, 

3458 na_rep=na_rep, 

3459 header=header, 

3460 index=index, 

3461 formatters=formatters, 

3462 float_format=float_format, 

3463 bold_rows=bold_rows, 

3464 sparsify=sparsify, 

3465 index_names=index_names, 

3466 escape=escape, 

3467 decimal=decimal, 

3468 ) 

3469 return DataFrameRenderer(formatter).to_latex( 

3470 buf=buf, 

3471 column_format=column_format, 

3472 longtable=longtable, 

3473 encoding=encoding, 

3474 multicolumn=multicolumn, 

3475 multicolumn_format=multicolumn_format, 

3476 multirow=multirow, 

3477 caption=caption, 

3478 label=label, 

3479 position=position, 

3480 ) 

3481 

3482 @overload 

3483 def to_csv( 

3484 self, 

3485 path_or_buf: None = ..., 

3486 sep: str = ..., 

3487 na_rep: str = ..., 

3488 float_format: str | Callable | None = ..., 

3489 columns: Sequence[Hashable] | None = ..., 

3490 header: bool_t | list[str] = ..., 

3491 index: bool_t = ..., 

3492 index_label: IndexLabel | None = ..., 

3493 mode: str = ..., 

3494 encoding: str | None = ..., 

3495 compression: CompressionOptions = ..., 

3496 quoting: int | None = ..., 

3497 quotechar: str = ..., 

3498 lineterminator: str | None = ..., 

3499 chunksize: int | None = ..., 

3500 date_format: str | None = ..., 

3501 doublequote: bool_t = ..., 

3502 escapechar: str | None = ..., 

3503 decimal: str = ..., 

3504 errors: str = ..., 

3505 storage_options: StorageOptions = ..., 

3506 ) -> str: 

3507 ... 

3508 

3509 @overload 

3510 def to_csv( 

3511 self, 

3512 path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str], 

3513 sep: str = ..., 

3514 na_rep: str = ..., 

3515 float_format: str | Callable | None = ..., 

3516 columns: Sequence[Hashable] | None = ..., 

3517 header: bool_t | list[str] = ..., 

3518 index: bool_t = ..., 

3519 index_label: IndexLabel | None = ..., 

3520 mode: str = ..., 

3521 encoding: str | None = ..., 

3522 compression: CompressionOptions = ..., 

3523 quoting: int | None = ..., 

3524 quotechar: str = ..., 

3525 lineterminator: str | None = ..., 

3526 chunksize: int | None = ..., 

3527 date_format: str | None = ..., 

3528 doublequote: bool_t = ..., 

3529 escapechar: str | None = ..., 

3530 decimal: str = ..., 

3531 errors: str = ..., 

3532 storage_options: StorageOptions = ..., 

3533 ) -> None: 

3534 ... 

3535 

3536 @final 

3537 @doc( 

3538 storage_options=_shared_docs["storage_options"], 

3539 compression_options=_shared_docs["compression_options"] % "path_or_buf", 

3540 ) 

3541 @deprecate_kwarg(old_arg_name="line_terminator", new_arg_name="lineterminator") 

3542 def to_csv( 

3543 self, 

3544 path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None, 

3545 sep: str = ",", 

3546 na_rep: str = "", 

3547 float_format: str | Callable | None = None, 

3548 columns: Sequence[Hashable] | None = None, 

3549 header: bool_t | list[str] = True, 

3550 index: bool_t = True, 

3551 index_label: IndexLabel | None = None, 

3552 mode: str = "w", 

3553 encoding: str | None = None, 

3554 compression: CompressionOptions = "infer", 

3555 quoting: int | None = None, 

3556 quotechar: str = '"', 

3557 lineterminator: str | None = None, 

3558 chunksize: int | None = None, 

3559 date_format: str | None = None, 

3560 doublequote: bool_t = True, 

3561 escapechar: str | None = None, 

3562 decimal: str = ".", 

3563 errors: str = "strict", 

3564 storage_options: StorageOptions = None, 

3565 ) -> str | None: 

3566 r""" 

3567 Write object to a comma-separated values (csv) file. 

3568 

3569 Parameters 

3570 ---------- 

3571 path_or_buf : str, path object, file-like object, or None, default None 

3572 String, path object (implementing os.PathLike[str]), or file-like 

3573 object implementing a write() function. If None, the result is 

3574 returned as a string. If a non-binary file object is passed, it should 

3575 be opened with `newline=''`, disabling universal newlines. If a binary 

3576 file object is passed, `mode` might need to contain a `'b'`. 

3577 

3578 .. versionchanged:: 1.2.0 

3579 

3580 Support for binary file objects was introduced. 

3581 

3582 sep : str, default ',' 

3583 String of length 1. Field delimiter for the output file. 

3584 na_rep : str, default '' 

3585 Missing data representation. 

3586 float_format : str, Callable, default None 

3587 Format string for floating point numbers. If a Callable is given, it takes 

3588 precedence over other numeric formatting parameters, like decimal. 

3589 columns : sequence, optional 

3590 Columns to write. 

3591 header : bool or list of str, default True 

3592 Write out the column names. If a list of strings is given it is 

3593 assumed to be aliases for the column names. 

3594 index : bool, default True 

3595 Write row names (index). 

3596 index_label : str or sequence, or False, default None 

3597 Column label for index column(s) if desired. If None is given, and 

3598 `header` and `index` are True, then the index names are used. A 

3599 sequence should be given if the object uses MultiIndex. If 

3600 False do not print fields for index names. Use index_label=False 

3601 for easier importing in R. 

3602 mode : str, default 'w' 

3603 Python write mode. The available write modes are the same as 

3604 :py:func:`open`. 

3605 encoding : str, optional 

3606 A string representing the encoding to use in the output file, 

3607 defaults to 'utf-8'. `encoding` is not supported if `path_or_buf` 

3608 is a non-binary file object. 

3609 {compression_options} 

3610 

3611 .. versionchanged:: 1.0.0 

3612 

3613 May now be a dict with key 'method' as compression mode 

3614 and other entries as additional compression options if 

3615 compression mode is 'zip'. 

3616 

3617 .. versionchanged:: 1.1.0 

3618 

3619 Passing compression options as keys in dict is 

3620 supported for compression modes 'gzip', 'bz2', 'zstd', and 'zip'. 

3621 

3622 .. versionchanged:: 1.2.0 

3623 

3624 Compression is supported for binary file objects. 

3625 

3626 .. versionchanged:: 1.2.0 

3627 

3628 Previous versions forwarded dict entries for 'gzip' to 

3629 `gzip.open` instead of `gzip.GzipFile` which prevented 

3630 setting `mtime`. 

3631 

3632 quoting : optional constant from csv module 

3633 Defaults to csv.QUOTE_MINIMAL. If you have set a `float_format` 

3634 then floats are converted to strings and thus csv.QUOTE_NONNUMERIC 

3635 will treat them as non-numeric. 

3636 quotechar : str, default '\"' 

3637 String of length 1. Character used to quote fields. 

3638 lineterminator : str, optional 

3639 The newline character or character sequence to use in the output 

3640 file. Defaults to `os.linesep`, which depends on the OS in which 

3641 this method is called ('\\n' for linux, '\\r\\n' for Windows, i.e.). 

3642 

3643 .. versionchanged:: 1.5.0 

3644 

3645 Previously was line_terminator, changed for consistency with 

3646 read_csv and the standard library 'csv' module. 

3647 

3648 chunksize : int or None 

3649 Rows to write at a time. 

3650 date_format : str, default None 

3651 Format string for datetime objects. 

3652 doublequote : bool, default True 

3653 Control quoting of `quotechar` inside a field. 

3654 escapechar : str, default None 

3655 String of length 1. Character used to escape `sep` and `quotechar` 

3656 when appropriate. 

3657 decimal : str, default '.' 

3658 Character recognized as decimal separator. E.g. use ',' for 

3659 European data. 

3660 errors : str, default 'strict' 

3661 Specifies how encoding and decoding errors are to be handled. 

3662 See the errors argument for :func:`open` for a full list 

3663 of options. 

3664 

3665 .. versionadded:: 1.1.0 

3666 

3667 {storage_options} 

3668 

3669 .. versionadded:: 1.2.0 

3670 

3671 Returns 

3672 ------- 

3673 None or str 

3674 If path_or_buf is None, returns the resulting csv format as a 

3675 string. Otherwise returns None. 

3676 

3677 See Also 

3678 -------- 

3679 read_csv : Load a CSV file into a DataFrame. 

3680 to_excel : Write DataFrame to an Excel file. 

3681 

3682 Examples 

3683 -------- 

3684 >>> df = pd.DataFrame({{'name': ['Raphael', 'Donatello'], 

3685 ... 'mask': ['red', 'purple'], 

3686 ... 'weapon': ['sai', 'bo staff']}}) 

3687 >>> df.to_csv(index=False) 

3688 'name,mask,weapon\nRaphael,red,sai\nDonatello,purple,bo staff\n' 

3689 

3690 Create 'out.zip' containing 'out.csv' 

3691 

3692 >>> compression_opts = dict(method='zip', 

3693 ... archive_name='out.csv') # doctest: +SKIP 

3694 >>> df.to_csv('out.zip', index=False, 

3695 ... compression=compression_opts) # doctest: +SKIP 

3696 

3697 To write a csv file to a new folder or nested folder you will first 

3698 need to create it using either Pathlib or os: 

3699 

3700 >>> from pathlib import Path # doctest: +SKIP 

3701 >>> filepath = Path('folder/subfolder/out.csv') # doctest: +SKIP 

3702 >>> filepath.parent.mkdir(parents=True, exist_ok=True) # doctest: +SKIP 

3703 >>> df.to_csv(filepath) # doctest: +SKIP 

3704 

3705 >>> import os # doctest: +SKIP 

3706 >>> os.makedirs('folder/subfolder', exist_ok=True) # doctest: +SKIP 

3707 >>> df.to_csv('folder/subfolder/out.csv') # doctest: +SKIP 

3708 """ 

3709 df = self if isinstance(self, ABCDataFrame) else self.to_frame() 

3710 

3711 formatter = DataFrameFormatter( 

3712 frame=df, 

3713 header=header, 

3714 index=index, 

3715 na_rep=na_rep, 

3716 float_format=float_format, 

3717 decimal=decimal, 

3718 ) 

3719 

3720 return DataFrameRenderer(formatter).to_csv( 

3721 path_or_buf, 

3722 lineterminator=lineterminator, 

3723 sep=sep, 

3724 encoding=encoding, 

3725 errors=errors, 

3726 compression=compression, 

3727 quoting=quoting, 

3728 columns=columns, 

3729 index_label=index_label, 

3730 mode=mode, 

3731 chunksize=chunksize, 

3732 quotechar=quotechar, 

3733 date_format=date_format, 

3734 doublequote=doublequote, 

3735 escapechar=escapechar, 

3736 storage_options=storage_options, 

3737 ) 

3738 

3739 # ---------------------------------------------------------------------- 

3740 # Lookup Caching 

3741 

3742 def _reset_cacher(self) -> None: 

3743 """ 

3744 Reset the cacher. 

3745 """ 

3746 raise AbstractMethodError(self) 

3747 

3748 def _maybe_update_cacher( 

3749 self, 

3750 clear: bool_t = False, 

3751 verify_is_copy: bool_t = True, 

3752 inplace: bool_t = False, 

3753 ) -> None: 

3754 """ 

3755 See if we need to update our parent cacher if clear, then clear our 

3756 cache. 

3757 

3758 Parameters 

3759 ---------- 

3760 clear : bool, default False 

3761 Clear the item cache. 

3762 verify_is_copy : bool, default True 

3763 Provide is_copy checks. 

3764 """ 

3765 

3766 if verify_is_copy: 

3767 self._check_setitem_copy(t="referent") 

3768 

3769 if clear: 

3770 self._clear_item_cache() 

3771 

3772 def _clear_item_cache(self) -> None: 

3773 raise AbstractMethodError(self) 

3774 

3775 # ---------------------------------------------------------------------- 

3776 # Indexing Methods 

3777 

3778 def take( 

3779 self: NDFrameT, indices, axis=0, is_copy: bool_t | None = None, **kwargs 

3780 ) -> NDFrameT: 

3781 """ 

3782 Return the elements in the given *positional* indices along an axis. 

3783 

3784 This means that we are not indexing according to actual values in 

3785 the index attribute of the object. We are indexing according to the 

3786 actual position of the element in the object. 

3787 

3788 Parameters 

3789 ---------- 

3790 indices : array-like 

3791 An array of ints indicating which positions to take. 

3792 axis : {0 or 'index', 1 or 'columns', None}, default 0 

3793 The axis on which to select elements. ``0`` means that we are 

3794 selecting rows, ``1`` means that we are selecting columns. 

3795 For `Series` this parameter is unused and defaults to 0. 

3796 is_copy : bool 

3797 Before pandas 1.0, ``is_copy=False`` can be specified to ensure 

3798 that the return value is an actual copy. Starting with pandas 1.0, 

3799 ``take`` always returns a copy, and the keyword is therefore 

3800 deprecated. 

3801 

3802 .. deprecated:: 1.0.0 

3803 **kwargs 

3804 For compatibility with :meth:`numpy.take`. Has no effect on the 

3805 output. 

3806 

3807 Returns 

3808 ------- 

3809 taken : same type as caller 

3810 An array-like containing the elements taken from the object. 

3811 

3812 See Also 

3813 -------- 

3814 DataFrame.loc : Select a subset of a DataFrame by labels. 

3815 DataFrame.iloc : Select a subset of a DataFrame by positions. 

3816 numpy.take : Take elements from an array along an axis. 

3817 

3818 Examples 

3819 -------- 

3820 >>> df = pd.DataFrame([('falcon', 'bird', 389.0), 

3821 ... ('parrot', 'bird', 24.0), 

3822 ... ('lion', 'mammal', 80.5), 

3823 ... ('monkey', 'mammal', np.nan)], 

3824 ... columns=['name', 'class', 'max_speed'], 

3825 ... index=[0, 2, 3, 1]) 

3826 >>> df 

3827 name class max_speed 

3828 0 falcon bird 389.0 

3829 2 parrot bird 24.0 

3830 3 lion mammal 80.5 

3831 1 monkey mammal NaN 

3832 

3833 Take elements at positions 0 and 3 along the axis 0 (default). 

3834 

3835 Note how the actual indices selected (0 and 1) do not correspond to 

3836 our selected indices 0 and 3. That's because we are selecting the 0th 

3837 and 3rd rows, not rows whose indices equal 0 and 3. 

3838 

3839 >>> df.take([0, 3]) 

3840 name class max_speed 

3841 0 falcon bird 389.0 

3842 1 monkey mammal NaN 

3843 

3844 Take elements at indices 1 and 2 along the axis 1 (column selection). 

3845 

3846 >>> df.take([1, 2], axis=1) 

3847 class max_speed 

3848 0 bird 389.0 

3849 2 bird 24.0 

3850 3 mammal 80.5 

3851 1 mammal NaN 

3852 

3853 We may take elements using negative integers for positive indices, 

3854 starting from the end of the object, just like with Python lists. 

3855 

3856 >>> df.take([-1, -2]) 

3857 name class max_speed 

3858 1 monkey mammal NaN 

3859 3 lion mammal 80.5 

3860 """ 

3861 if is_copy is not None: 

3862 warnings.warn( 

3863 "is_copy is deprecated and will be removed in a future version. " 

3864 "'take' always returns a copy, so there is no need to specify this.", 

3865 FutureWarning, 

3866 stacklevel=find_stack_level(), 

3867 ) 

3868 

3869 nv.validate_take((), kwargs) 

3870 

3871 return self._take(indices, axis) 

3872 

3873 def _take( 

3874 self: NDFrameT, 

3875 indices, 

3876 axis=0, 

3877 convert_indices: bool_t = True, 

3878 ) -> NDFrameT: 

3879 """ 

3880 Internal version of the `take` allowing specification of additional args. 

3881 

3882 See the docstring of `take` for full explanation of the parameters. 

3883 """ 

3884 self._consolidate_inplace() 

3885 

3886 new_data = self._mgr.take( 

3887 indices, 

3888 axis=self._get_block_manager_axis(axis), 

3889 verify=True, 

3890 convert_indices=convert_indices, 

3891 ) 

3892 return self._constructor(new_data).__finalize__(self, method="take") 

3893 

3894 def _take_with_is_copy(self: NDFrameT, indices, axis=0) -> NDFrameT: 

3895 """ 

3896 Internal version of the `take` method that sets the `_is_copy` 

3897 attribute to keep track of the parent dataframe (using in indexing 

3898 for the SettingWithCopyWarning). 

3899 

3900 See the docstring of `take` for full explanation of the parameters. 

3901 """ 

3902 result = self._take(indices=indices, axis=axis) 

3903 # Maybe set copy if we didn't actually change the index. 

3904 if not result._get_axis(axis).equals(self._get_axis(axis)): 

3905 result._set_is_copy(self) 

3906 return result 

3907 

3908 @final 

3909 def xs( 

3910 self: NDFrameT, 

3911 key: IndexLabel, 

3912 axis: Axis = 0, 

3913 level: IndexLabel = None, 

3914 drop_level: bool_t = True, 

3915 ) -> NDFrameT: 

3916 """ 

3917 Return cross-section from the Series/DataFrame. 

3918 

3919 This method takes a `key` argument to select data at a particular 

3920 level of a MultiIndex. 

3921 

3922 Parameters 

3923 ---------- 

3924 key : label or tuple of label 

3925 Label contained in the index, or partially in a MultiIndex. 

3926 axis : {0 or 'index', 1 or 'columns'}, default 0 

3927 Axis to retrieve cross-section on. 

3928 level : object, defaults to first n levels (n=1 or len(key)) 

3929 In case of a key partially contained in a MultiIndex, indicate 

3930 which levels are used. Levels can be referred by label or position. 

3931 drop_level : bool, default True 

3932 If False, returns object with same levels as self. 

3933 

3934 Returns 

3935 ------- 

3936 Series or DataFrame 

3937 Cross-section from the original Series or DataFrame 

3938 corresponding to the selected index levels. 

3939 

3940 See Also 

3941 -------- 

3942 DataFrame.loc : Access a group of rows and columns 

3943 by label(s) or a boolean array. 

3944 DataFrame.iloc : Purely integer-location based indexing 

3945 for selection by position. 

3946 

3947 Notes 

3948 ----- 

3949 `xs` can not be used to set values. 

3950 

3951 MultiIndex Slicers is a generic way to get/set values on 

3952 any level or levels. 

3953 It is a superset of `xs` functionality, see 

3954 :ref:`MultiIndex Slicers <advanced.mi_slicers>`. 

3955 

3956 Examples 

3957 -------- 

3958 >>> d = {'num_legs': [4, 4, 2, 2], 

3959 ... 'num_wings': [0, 0, 2, 2], 

3960 ... 'class': ['mammal', 'mammal', 'mammal', 'bird'], 

3961 ... 'animal': ['cat', 'dog', 'bat', 'penguin'], 

3962 ... 'locomotion': ['walks', 'walks', 'flies', 'walks']} 

3963 >>> df = pd.DataFrame(data=d) 

3964 >>> df = df.set_index(['class', 'animal', 'locomotion']) 

3965 >>> df 

3966 num_legs num_wings 

3967 class animal locomotion 

3968 mammal cat walks 4 0 

3969 dog walks 4 0 

3970 bat flies 2 2 

3971 bird penguin walks 2 2 

3972 

3973 Get values at specified index 

3974 

3975 >>> df.xs('mammal') 

3976 num_legs num_wings 

3977 animal locomotion 

3978 cat walks 4 0 

3979 dog walks 4 0 

3980 bat flies 2 2 

3981 

3982 Get values at several indexes 

3983 

3984 >>> df.xs(('mammal', 'dog')) 

3985 num_legs num_wings 

3986 locomotion 

3987 walks 4 0 

3988 

3989 Get values at specified index and level 

3990 

3991 >>> df.xs('cat', level=1) 

3992 num_legs num_wings 

3993 class locomotion 

3994 mammal walks 4 0 

3995 

3996 Get values at several indexes and levels 

3997 

3998 >>> df.xs(('bird', 'walks'), 

3999 ... level=[0, 'locomotion']) 

4000 num_legs num_wings 

4001 animal 

4002 penguin 2 2 

4003 

4004 Get values at specified column and axis 

4005 

4006 >>> df.xs('num_wings', axis=1) 

4007 class animal locomotion 

4008 mammal cat walks 0 

4009 dog walks 0 

4010 bat flies 2 

4011 bird penguin walks 2 

4012 Name: num_wings, dtype: int64 

4013 """ 

4014 axis = self._get_axis_number(axis) 

4015 labels = self._get_axis(axis) 

4016 

4017 if isinstance(key, list): 

4018 warnings.warn( 

4019 "Passing lists as key for xs is deprecated and will be removed in a " 

4020 "future version. Pass key as a tuple instead.", 

4021 FutureWarning, 

4022 stacklevel=find_stack_level(), 

4023 ) 

4024 

4025 if level is not None: 

4026 if not isinstance(labels, MultiIndex): 

4027 raise TypeError("Index must be a MultiIndex") 

4028 loc, new_ax = labels.get_loc_level(key, level=level, drop_level=drop_level) 

4029 

4030 # create the tuple of the indexer 

4031 _indexer = [slice(None)] * self.ndim 

4032 _indexer[axis] = loc 

4033 indexer = tuple(_indexer) 

4034 

4035 result = self.iloc[indexer] 

4036 setattr(result, result._get_axis_name(axis), new_ax) 

4037 return result 

4038 

4039 if axis == 1: 

4040 if drop_level: 

4041 return self[key] 

4042 index = self.columns 

4043 else: 

4044 index = self.index 

4045 

4046 self._consolidate_inplace() 

4047 

4048 if isinstance(index, MultiIndex): 

4049 loc, new_index = index._get_loc_level(key, level=0) 

4050 if not drop_level: 

4051 if lib.is_integer(loc): 

4052 new_index = index[loc : loc + 1] 

4053 else: 

4054 new_index = index[loc] 

4055 else: 

4056 loc = index.get_loc(key) 

4057 

4058 if isinstance(loc, np.ndarray): 

4059 if loc.dtype == np.bool_: 

4060 (inds,) = loc.nonzero() 

4061 return self._take_with_is_copy(inds, axis=axis) 

4062 else: 

4063 return self._take_with_is_copy(loc, axis=axis) 

4064 

4065 if not is_scalar(loc): 

4066 new_index = index[loc] 

4067 

4068 if is_scalar(loc) and axis == 0: 

4069 # In this case loc should be an integer 

4070 if self.ndim == 1: 

4071 # if we encounter an array-like and we only have 1 dim 

4072 # that means that their are list/ndarrays inside the Series! 

4073 # so just return them (GH 6394) 

4074 return self._values[loc] 

4075 

4076 new_mgr = self._mgr.fast_xs(loc) 

4077 

4078 result = self._constructor_sliced( 

4079 new_mgr, name=self.index[loc] 

4080 ).__finalize__(self) 

4081 elif is_scalar(loc): 

4082 result = self.iloc[:, slice(loc, loc + 1)] 

4083 elif axis == 1: 

4084 result = self.iloc[:, loc] 

4085 else: 

4086 result = self.iloc[loc] 

4087 result.index = new_index 

4088 

4089 # this could be a view 

4090 # but only in a single-dtyped view sliceable case 

4091 result._set_is_copy(self, copy=not result._is_view) 

4092 return result 

4093 

4094 def __getitem__(self, item): 

4095 raise AbstractMethodError(self) 

4096 

4097 def _slice(self: NDFrameT, slobj: slice, axis=0) -> NDFrameT: 

4098 """ 

4099 Construct a slice of this container. 

4100 

4101 Slicing with this method is *always* positional. 

4102 """ 

4103 assert isinstance(slobj, slice), type(slobj) 

4104 axis = self._get_block_manager_axis(axis) 

4105 result = self._constructor(self._mgr.get_slice(slobj, axis=axis)) 

4106 result = result.__finalize__(self) 

4107 

4108 # this could be a view 

4109 # but only in a single-dtyped view sliceable case 

4110 is_copy = axis != 0 or result._is_view 

4111 result._set_is_copy(self, copy=is_copy) 

4112 return result 

4113 

4114 @final 

4115 def _set_is_copy(self, ref: NDFrame, copy: bool_t = True) -> None: 

4116 if not copy: 

4117 self._is_copy = None 

4118 else: 

4119 assert ref is not None 

4120 self._is_copy = weakref.ref(ref) 

4121 

4122 def _check_is_chained_assignment_possible(self) -> bool_t: 

4123 """ 

4124 Check if we are a view, have a cacher, and are of mixed type. 

4125 If so, then force a setitem_copy check. 

4126 

4127 Should be called just near setting a value 

4128 

4129 Will return a boolean if it we are a view and are cached, but a 

4130 single-dtype meaning that the cacher should be updated following 

4131 setting. 

4132 """ 

4133 if self._is_copy: 

4134 self._check_setitem_copy(t="referent") 

4135 return False 

4136 

4137 @final 

4138 def _check_setitem_copy(self, t="setting", force=False): 

4139 """ 

4140 

4141 Parameters 

4142 ---------- 

4143 t : str, the type of setting error 

4144 force : bool, default False 

4145 If True, then force showing an error. 

4146 

4147 validate if we are doing a setitem on a chained copy. 

4148 

4149 It is technically possible to figure out that we are setting on 

4150 a copy even WITH a multi-dtyped pandas object. In other words, some 

4151 blocks may be views while other are not. Currently _is_view will ALWAYS 

4152 return False for multi-blocks to avoid having to handle this case. 

4153 

4154 df = DataFrame(np.arange(0,9), columns=['count']) 

4155 df['group'] = 'b' 

4156 

4157 # This technically need not raise SettingWithCopy if both are view 

4158 # (which is not generally guaranteed but is usually True. However, 

4159 # this is in general not a good practice and we recommend using .loc. 

4160 df.iloc[0:5]['group'] = 'a' 

4161 

4162 """ 

4163 if ( 

4164 config.get_option("mode.copy_on_write") 

4165 and config.get_option("mode.data_manager") == "block" 

4166 ): 

4167 return 

4168 

4169 # return early if the check is not needed 

4170 if not (force or self._is_copy): 

4171 return 

4172 

4173 value = config.get_option("mode.chained_assignment") 

4174 if value is None: 

4175 return 

4176 

4177 # see if the copy is not actually referred; if so, then dissolve 

4178 # the copy weakref 

4179 if self._is_copy is not None and not isinstance(self._is_copy, str): 

4180 r = self._is_copy() 

4181 if not gc.get_referents(r) or (r is not None and r.shape == self.shape): 

4182 self._is_copy = None 

4183 return 

4184 

4185 # a custom message 

4186 if isinstance(self._is_copy, str): 

4187 t = self._is_copy 

4188 

4189 elif t == "referent": 

4190 t = ( 

4191 "\n" 

4192 "A value is trying to be set on a copy of a slice from a " 

4193 "DataFrame\n\n" 

4194 "See the caveats in the documentation: " 

4195 "https://pandas.pydata.org/pandas-docs/stable/user_guide/" 

4196 "indexing.html#returning-a-view-versus-a-copy" 

4197 ) 

4198 

4199 else: 

4200 t = ( 

4201 "\n" 

4202 "A value is trying to be set on a copy of a slice from a " 

4203 "DataFrame.\n" 

4204 "Try using .loc[row_indexer,col_indexer] = value " 

4205 "instead\n\nSee the caveats in the documentation: " 

4206 "https://pandas.pydata.org/pandas-docs/stable/user_guide/" 

4207 "indexing.html#returning-a-view-versus-a-copy" 

4208 ) 

4209 

4210 if value == "raise": 

4211 raise SettingWithCopyError(t) 

4212 elif value == "warn": 

4213 warnings.warn(t, SettingWithCopyWarning, stacklevel=find_stack_level()) 

4214 

4215 def __delitem__(self, key) -> None: 

4216 """ 

4217 Delete item 

4218 """ 

4219 deleted = False 

4220 

4221 maybe_shortcut = False 

4222 if self.ndim == 2 and isinstance(self.columns, MultiIndex): 

4223 try: 

4224 # By using engine's __contains__ we effectively 

4225 # restrict to same-length tuples 

4226 maybe_shortcut = key not in self.columns._engine 

4227 except TypeError: 

4228 pass 

4229 

4230 if maybe_shortcut: 

4231 # Allow shorthand to delete all columns whose first len(key) 

4232 # elements match key: 

4233 if not isinstance(key, tuple): 

4234 key = (key,) 

4235 for col in self.columns: 

4236 if isinstance(col, tuple) and col[: len(key)] == key: 

4237 del self[col] 

4238 deleted = True 

4239 if not deleted: 

4240 # If the above loop ran and didn't delete anything because 

4241 # there was no match, this call should raise the appropriate 

4242 # exception: 

4243 loc = self.axes[-1].get_loc(key) 

4244 self._mgr = self._mgr.idelete(loc) 

4245 

4246 # delete from the caches 

4247 try: 

4248 del self._item_cache[key] 

4249 except KeyError: 

4250 pass 

4251 

4252 # ---------------------------------------------------------------------- 

4253 # Unsorted 

4254 

4255 @final 

4256 def _check_inplace_and_allows_duplicate_labels(self, inplace): 

4257 if inplace and not self.flags.allows_duplicate_labels: 

4258 raise ValueError( 

4259 "Cannot specify 'inplace=True' when " 

4260 "'self.flags.allows_duplicate_labels' is False." 

4261 ) 

4262 

4263 @final 

4264 def get(self, key, default=None): 

4265 """ 

4266 Get item from object for given key (ex: DataFrame column). 

4267 

4268 Returns default value if not found. 

4269 

4270 Parameters 

4271 ---------- 

4272 key : object 

4273 

4274 Returns 

4275 ------- 

4276 value : same type as items contained in object 

4277 

4278 Examples 

4279 -------- 

4280 >>> df = pd.DataFrame( 

4281 ... [ 

4282 ... [24.3, 75.7, "high"], 

4283 ... [31, 87.8, "high"], 

4284 ... [22, 71.6, "medium"], 

4285 ... [35, 95, "medium"], 

4286 ... ], 

4287 ... columns=["temp_celsius", "temp_fahrenheit", "windspeed"], 

4288 ... index=pd.date_range(start="2014-02-12", end="2014-02-15", freq="D"), 

4289 ... ) 

4290 

4291 >>> df 

4292 temp_celsius temp_fahrenheit windspeed 

4293 2014-02-12 24.3 75.7 high 

4294 2014-02-13 31.0 87.8 high 

4295 2014-02-14 22.0 71.6 medium 

4296 2014-02-15 35.0 95.0 medium 

4297 

4298 >>> df.get(["temp_celsius", "windspeed"]) 

4299 temp_celsius windspeed 

4300 2014-02-12 24.3 high 

4301 2014-02-13 31.0 high 

4302 2014-02-14 22.0 medium 

4303 2014-02-15 35.0 medium 

4304 

4305 If the key isn't found, the default value will be used. 

4306 

4307 >>> df.get(["temp_celsius", "temp_kelvin"], default="default_value") 

4308 'default_value' 

4309 """ 

4310 try: 

4311 return self[key] 

4312 except (KeyError, ValueError, IndexError): 

4313 return default 

4314 

4315 @final 

4316 @property 

4317 def _is_view(self) -> bool_t: 

4318 """Return boolean indicating if self is view of another array""" 

4319 return self._mgr.is_view 

4320 

4321 @final 

4322 def reindex_like( 

4323 self: NDFrameT, 

4324 other, 

4325 method: str | None = None, 

4326 copy: bool_t = True, 

4327 limit=None, 

4328 tolerance=None, 

4329 ) -> NDFrameT: 

4330 """ 

4331 Return an object with matching indices as other object. 

4332 

4333 Conform the object to the same index on all axes. Optional 

4334 filling logic, placing NaN in locations having no value 

4335 in the previous index. A new object is produced unless the 

4336 new index is equivalent to the current one and copy=False. 

4337 

4338 Parameters 

4339 ---------- 

4340 other : Object of the same data type 

4341 Its row and column indices are used to define the new indices 

4342 of this object. 

4343 method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'} 

4344 Method to use for filling holes in reindexed DataFrame. 

4345 Please note: this is only applicable to DataFrames/Series with a 

4346 monotonically increasing/decreasing index. 

4347 

4348 * None (default): don't fill gaps 

4349 * pad / ffill: propagate last valid observation forward to next 

4350 valid 

4351 * backfill / bfill: use next valid observation to fill gap 

4352 * nearest: use nearest valid observations to fill gap. 

4353 

4354 copy : bool, default True 

4355 Return a new object, even if the passed indexes are the same. 

4356 limit : int, default None 

4357 Maximum number of consecutive labels to fill for inexact matches. 

4358 tolerance : optional 

4359 Maximum distance between original and new labels for inexact 

4360 matches. The values of the index at the matching locations must 

4361 satisfy the equation ``abs(index[indexer] - target) <= tolerance``. 

4362 

4363 Tolerance may be a scalar value, which applies the same tolerance 

4364 to all values, or list-like, which applies variable tolerance per 

4365 element. List-like includes list, tuple, array, Series, and must be 

4366 the same size as the index and its dtype must exactly match the 

4367 index's type. 

4368 

4369 Returns 

4370 ------- 

4371 Series or DataFrame 

4372 Same type as caller, but with changed indices on each axis. 

4373 

4374 See Also 

4375 -------- 

4376 DataFrame.set_index : Set row labels. 

4377 DataFrame.reset_index : Remove row labels or move them to new columns. 

4378 DataFrame.reindex : Change to new indices or expand indices. 

4379 

4380 Notes 

4381 ----- 

4382 Same as calling 

4383 ``.reindex(index=other.index, columns=other.columns,...)``. 

4384 

4385 Examples 

4386 -------- 

4387 >>> df1 = pd.DataFrame([[24.3, 75.7, 'high'], 

4388 ... [31, 87.8, 'high'], 

4389 ... [22, 71.6, 'medium'], 

4390 ... [35, 95, 'medium']], 

4391 ... columns=['temp_celsius', 'temp_fahrenheit', 

4392 ... 'windspeed'], 

4393 ... index=pd.date_range(start='2014-02-12', 

4394 ... end='2014-02-15', freq='D')) 

4395 

4396 >>> df1 

4397 temp_celsius temp_fahrenheit windspeed 

4398 2014-02-12 24.3 75.7 high 

4399 2014-02-13 31.0 87.8 high 

4400 2014-02-14 22.0 71.6 medium 

4401 2014-02-15 35.0 95.0 medium 

4402 

4403 >>> df2 = pd.DataFrame([[28, 'low'], 

4404 ... [30, 'low'], 

4405 ... [35.1, 'medium']], 

4406 ... columns=['temp_celsius', 'windspeed'], 

4407 ... index=pd.DatetimeIndex(['2014-02-12', '2014-02-13', 

4408 ... '2014-02-15'])) 

4409 

4410 >>> df2 

4411 temp_celsius windspeed 

4412 2014-02-12 28.0 low 

4413 2014-02-13 30.0 low 

4414 2014-02-15 35.1 medium 

4415 

4416 >>> df2.reindex_like(df1) 

4417 temp_celsius temp_fahrenheit windspeed 

4418 2014-02-12 28.0 NaN low 

4419 2014-02-13 30.0 NaN low 

4420 2014-02-14 NaN NaN NaN 

4421 2014-02-15 35.1 NaN medium 

4422 """ 

4423 d = other._construct_axes_dict( 

4424 axes=self._AXIS_ORDERS, 

4425 method=method, 

4426 copy=copy, 

4427 limit=limit, 

4428 tolerance=tolerance, 

4429 ) 

4430 

4431 return self.reindex(**d) 

4432 

4433 @overload 

4434 def drop( 

4435 self, 

4436 labels: IndexLabel = ..., 

4437 *, 

4438 axis: Axis = ..., 

4439 index: IndexLabel = ..., 

4440 columns: IndexLabel = ..., 

4441 level: Level | None = ..., 

4442 inplace: Literal[True], 

4443 errors: IgnoreRaise = ..., 

4444 ) -> None: 

4445 ... 

4446 

4447 @overload 

4448 def drop( 

4449 self: NDFrameT, 

4450 labels: IndexLabel = ..., 

4451 *, 

4452 axis: Axis = ..., 

4453 index: IndexLabel = ..., 

4454 columns: IndexLabel = ..., 

4455 level: Level | None = ..., 

4456 inplace: Literal[False] = ..., 

4457 errors: IgnoreRaise = ..., 

4458 ) -> NDFrameT: 

4459 ... 

4460 

4461 @overload 

4462 def drop( 

4463 self: NDFrameT, 

4464 labels: IndexLabel = ..., 

4465 *, 

4466 axis: Axis = ..., 

4467 index: IndexLabel = ..., 

4468 columns: IndexLabel = ..., 

4469 level: Level | None = ..., 

4470 inplace: bool_t = ..., 

4471 errors: IgnoreRaise = ..., 

4472 ) -> NDFrameT | None: 

4473 ... 

4474 

4475 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "labels"]) 

4476 def drop( 

4477 self: NDFrameT, 

4478 labels: IndexLabel = None, 

4479 axis: Axis = 0, 

4480 index: IndexLabel = None, 

4481 columns: IndexLabel = None, 

4482 level: Level | None = None, 

4483 inplace: bool_t = False, 

4484 errors: IgnoreRaise = "raise", 

4485 ) -> NDFrameT | None: 

4486 

4487 inplace = validate_bool_kwarg(inplace, "inplace") 

4488 

4489 if labels is not None: 

4490 if index is not None or columns is not None: 

4491 raise ValueError("Cannot specify both 'labels' and 'index'/'columns'") 

4492 axis_name = self._get_axis_name(axis) 

4493 axes = {axis_name: labels} 

4494 elif index is not None or columns is not None: 

4495 axes, _ = self._construct_axes_from_arguments((index, columns), {}) 

4496 else: 

4497 raise ValueError( 

4498 "Need to specify at least one of 'labels', 'index' or 'columns'" 

4499 ) 

4500 

4501 obj = self 

4502 

4503 for axis, labels in axes.items(): 

4504 if labels is not None: 

4505 obj = obj._drop_axis(labels, axis, level=level, errors=errors) 

4506 

4507 if inplace: 

4508 self._update_inplace(obj) 

4509 else: 

4510 return obj 

4511 

4512 @final 

4513 def _drop_axis( 

4514 self: NDFrameT, 

4515 labels, 

4516 axis, 

4517 level=None, 

4518 errors: IgnoreRaise = "raise", 

4519 only_slice: bool_t = False, 

4520 ) -> NDFrameT: 

4521 """ 

4522 Drop labels from specified axis. Used in the ``drop`` method 

4523 internally. 

4524 

4525 Parameters 

4526 ---------- 

4527 labels : single label or list-like 

4528 axis : int or axis name 

4529 level : int or level name, default None 

4530 For MultiIndex 

4531 errors : {'ignore', 'raise'}, default 'raise' 

4532 If 'ignore', suppress error and existing labels are dropped. 

4533 only_slice : bool, default False 

4534 Whether indexing along columns should be view-only. 

4535 

4536 """ 

4537 axis_num = self._get_axis_number(axis) 

4538 axis = self._get_axis(axis) 

4539 

4540 if axis.is_unique: 

4541 if level is not None: 

4542 if not isinstance(axis, MultiIndex): 

4543 raise AssertionError("axis must be a MultiIndex") 

4544 new_axis = axis.drop(labels, level=level, errors=errors) 

4545 else: 

4546 new_axis = axis.drop(labels, errors=errors) 

4547 indexer = axis.get_indexer(new_axis) 

4548 

4549 # Case for non-unique axis 

4550 else: 

4551 is_tuple_labels = is_nested_list_like(labels) or isinstance(labels, tuple) 

4552 labels = ensure_object(com.index_labels_to_array(labels)) 

4553 if level is not None: 

4554 if not isinstance(axis, MultiIndex): 

4555 raise AssertionError("axis must be a MultiIndex") 

4556 mask = ~axis.get_level_values(level).isin(labels) 

4557 

4558 # GH 18561 MultiIndex.drop should raise if label is absent 

4559 if errors == "raise" and mask.all(): 

4560 raise KeyError(f"{labels} not found in axis") 

4561 elif ( 

4562 isinstance(axis, MultiIndex) 

4563 and labels.dtype == "object" 

4564 and not is_tuple_labels 

4565 ): 

4566 # Set level to zero in case of MultiIndex and label is string, 

4567 # because isin can't handle strings for MultiIndexes GH#36293 

4568 # In case of tuples we get dtype object but have to use isin GH#42771 

4569 mask = ~axis.get_level_values(0).isin(labels) 

4570 else: 

4571 mask = ~axis.isin(labels) 

4572 # Check if label doesn't exist along axis 

4573 labels_missing = (axis.get_indexer_for(labels) == -1).any() 

4574 if errors == "raise" and labels_missing: 

4575 raise KeyError(f"{labels} not found in axis") 

4576 

4577 if is_extension_array_dtype(mask.dtype): 

4578 # GH#45860 

4579 mask = mask.to_numpy(dtype=bool) 

4580 

4581 indexer = mask.nonzero()[0] 

4582 new_axis = axis.take(indexer) 

4583 

4584 bm_axis = self.ndim - axis_num - 1 

4585 new_mgr = self._mgr.reindex_indexer( 

4586 new_axis, 

4587 indexer, 

4588 axis=bm_axis, 

4589 allow_dups=True, 

4590 only_slice=only_slice, 

4591 ) 

4592 result = self._constructor(new_mgr) 

4593 if self.ndim == 1: 

4594 result.name = self.name 

4595 

4596 return result.__finalize__(self) 

4597 

4598 @final 

4599 def _update_inplace(self, result, verify_is_copy: bool_t = True) -> None: 

4600 """ 

4601 Replace self internals with result. 

4602 

4603 Parameters 

4604 ---------- 

4605 result : same type as self 

4606 verify_is_copy : bool, default True 

4607 Provide is_copy checks. 

4608 """ 

4609 # NOTE: This does *not* call __finalize__ and that's an explicit 

4610 # decision that we may revisit in the future. 

4611 self._reset_cache() 

4612 self._clear_item_cache() 

4613 self._mgr = result._mgr 

4614 self._maybe_update_cacher(verify_is_copy=verify_is_copy, inplace=True) 

4615 

4616 @final 

4617 def add_prefix(self: NDFrameT, prefix: str) -> NDFrameT: 

4618 """ 

4619 Prefix labels with string `prefix`. 

4620 

4621 For Series, the row labels are prefixed. 

4622 For DataFrame, the column labels are prefixed. 

4623 

4624 Parameters 

4625 ---------- 

4626 prefix : str 

4627 The string to add before each label. 

4628 

4629 Returns 

4630 ------- 

4631 Series or DataFrame 

4632 New Series or DataFrame with updated labels. 

4633 

4634 See Also 

4635 -------- 

4636 Series.add_suffix: Suffix row labels with string `suffix`. 

4637 DataFrame.add_suffix: Suffix column labels with string `suffix`. 

4638 

4639 Examples 

4640 -------- 

4641 >>> s = pd.Series([1, 2, 3, 4]) 

4642 >>> s 

4643 0 1 

4644 1 2 

4645 2 3 

4646 3 4 

4647 dtype: int64 

4648 

4649 >>> s.add_prefix('item_') 

4650 item_0 1 

4651 item_1 2 

4652 item_2 3 

4653 item_3 4 

4654 dtype: int64 

4655 

4656 >>> df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': [3, 4, 5, 6]}) 

4657 >>> df 

4658 A B 

4659 0 1 3 

4660 1 2 4 

4661 2 3 5 

4662 3 4 6 

4663 

4664 >>> df.add_prefix('col_') 

4665 col_A col_B 

4666 0 1 3 

4667 1 2 4 

4668 2 3 5 

4669 3 4 6 

4670 """ 

4671 f = functools.partial("{prefix}{}".format, prefix=prefix) 

4672 

4673 mapper = {self._info_axis_name: f} 

4674 # error: Incompatible return value type (got "Optional[NDFrameT]", 

4675 # expected "NDFrameT") 

4676 # error: Argument 1 to "rename" of "NDFrame" has incompatible type 

4677 # "**Dict[str, partial[str]]"; expected "Union[str, int, None]" 

4678 return self._rename(**mapper) # type: ignore[return-value, arg-type] 

4679 

4680 @final 

4681 def add_suffix(self: NDFrameT, suffix: str) -> NDFrameT: 

4682 """ 

4683 Suffix labels with string `suffix`. 

4684 

4685 For Series, the row labels are suffixed. 

4686 For DataFrame, the column labels are suffixed. 

4687 

4688 Parameters 

4689 ---------- 

4690 suffix : str 

4691 The string to add after each label. 

4692 

4693 Returns 

4694 ------- 

4695 Series or DataFrame 

4696 New Series or DataFrame with updated labels. 

4697 

4698 See Also 

4699 -------- 

4700 Series.add_prefix: Prefix row labels with string `prefix`. 

4701 DataFrame.add_prefix: Prefix column labels with string `prefix`. 

4702 

4703 Examples 

4704 -------- 

4705 >>> s = pd.Series([1, 2, 3, 4]) 

4706 >>> s 

4707 0 1 

4708 1 2 

4709 2 3 

4710 3 4 

4711 dtype: int64 

4712 

4713 >>> s.add_suffix('_item') 

4714 0_item 1 

4715 1_item 2 

4716 2_item 3 

4717 3_item 4 

4718 dtype: int64 

4719 

4720 >>> df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': [3, 4, 5, 6]}) 

4721 >>> df 

4722 A B 

4723 0 1 3 

4724 1 2 4 

4725 2 3 5 

4726 3 4 6 

4727 

4728 >>> df.add_suffix('_col') 

4729 A_col B_col 

4730 0 1 3 

4731 1 2 4 

4732 2 3 5 

4733 3 4 6 

4734 """ 

4735 f = functools.partial("{}{suffix}".format, suffix=suffix) 

4736 

4737 mapper = {self._info_axis_name: f} 

4738 # error: Incompatible return value type (got "Optional[NDFrameT]", 

4739 # expected "NDFrameT") 

4740 # error: Argument 1 to "rename" of "NDFrame" has incompatible type 

4741 # "**Dict[str, partial[str]]"; expected "Union[str, int, None]" 

4742 return self._rename(**mapper) # type: ignore[return-value, arg-type] 

4743 

4744 @overload 

4745 def sort_values( 

4746 self: NDFrameT, 

4747 *, 

4748 axis: Axis = ..., 

4749 ascending=..., 

4750 inplace: Literal[False] = ..., 

4751 kind: str = ..., 

4752 na_position: str = ..., 

4753 ignore_index: bool_t = ..., 

4754 key: ValueKeyFunc = ..., 

4755 ) -> NDFrameT: 

4756 ... 

4757 

4758 @overload 

4759 def sort_values( 

4760 self, 

4761 *, 

4762 axis: Axis = ..., 

4763 ascending=..., 

4764 inplace: Literal[True], 

4765 kind: str = ..., 

4766 na_position: str = ..., 

4767 ignore_index: bool_t = ..., 

4768 key: ValueKeyFunc = ..., 

4769 ) -> None: 

4770 ... 

4771 

4772 @overload 

4773 def sort_values( 

4774 self: NDFrameT, 

4775 *, 

4776 axis: Axis = ..., 

4777 ascending=..., 

4778 inplace: bool_t = ..., 

4779 kind: str = ..., 

4780 na_position: str = ..., 

4781 ignore_index: bool_t = ..., 

4782 key: ValueKeyFunc = ..., 

4783 ) -> NDFrameT | None: 

4784 ... 

4785 

4786 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) 

4787 def sort_values( 

4788 self: NDFrameT, 

4789 axis: Axis = 0, 

4790 ascending=True, 

4791 inplace: bool_t = False, 

4792 kind: str = "quicksort", 

4793 na_position: str = "last", 

4794 ignore_index: bool_t = False, 

4795 key: ValueKeyFunc = None, 

4796 ) -> NDFrameT | None: 

4797 """ 

4798 Sort by the values along either axis. 

4799 

4800 Parameters 

4801 ----------%(optional_by)s 

4802 axis : %(axes_single_arg)s, default 0 

4803 Axis to be sorted. 

4804 ascending : bool or list of bool, default True 

4805 Sort ascending vs. descending. Specify list for multiple sort 

4806 orders. If this is a list of bools, must match the length of 

4807 the by. 

4808 inplace : bool, default False 

4809 If True, perform operation in-place. 

4810 kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, default 'quicksort' 

4811 Choice of sorting algorithm. See also :func:`numpy.sort` for more 

4812 information. `mergesort` and `stable` are the only stable algorithms. For 

4813 DataFrames, this option is only applied when sorting on a single 

4814 column or label. 

4815 na_position : {'first', 'last'}, default 'last' 

4816 Puts NaNs at the beginning if `first`; `last` puts NaNs at the 

4817 end. 

4818 ignore_index : bool, default False 

4819 If True, the resulting axis will be labeled 0, 1, …, n - 1. 

4820 

4821 .. versionadded:: 1.0.0 

4822 

4823 key : callable, optional 

4824 Apply the key function to the values 

4825 before sorting. This is similar to the `key` argument in the 

4826 builtin :meth:`sorted` function, with the notable difference that 

4827 this `key` function should be *vectorized*. It should expect a 

4828 ``Series`` and return a Series with the same shape as the input. 

4829 It will be applied to each column in `by` independently. 

4830 

4831 .. versionadded:: 1.1.0 

4832 

4833 Returns 

4834 ------- 

4835 DataFrame or None 

4836 DataFrame with sorted values or None if ``inplace=True``. 

4837 

4838 See Also 

4839 -------- 

4840 DataFrame.sort_index : Sort a DataFrame by the index. 

4841 Series.sort_values : Similar method for a Series. 

4842 

4843 Examples 

4844 -------- 

4845 >>> df = pd.DataFrame({ 

4846 ... 'col1': ['A', 'A', 'B', np.nan, 'D', 'C'], 

4847 ... 'col2': [2, 1, 9, 8, 7, 4], 

4848 ... 'col3': [0, 1, 9, 4, 2, 3], 

4849 ... 'col4': ['a', 'B', 'c', 'D', 'e', 'F'] 

4850 ... }) 

4851 >>> df 

4852 col1 col2 col3 col4 

4853 0 A 2 0 a 

4854 1 A 1 1 B 

4855 2 B 9 9 c 

4856 3 NaN 8 4 D 

4857 4 D 7 2 e 

4858 5 C 4 3 F 

4859 

4860 Sort by col1 

4861 

4862 >>> df.sort_values(by=['col1']) 

4863 col1 col2 col3 col4 

4864 0 A 2 0 a 

4865 1 A 1 1 B 

4866 2 B 9 9 c 

4867 5 C 4 3 F 

4868 4 D 7 2 e 

4869 3 NaN 8 4 D 

4870 

4871 Sort by multiple columns 

4872 

4873 >>> df.sort_values(by=['col1', 'col2']) 

4874 col1 col2 col3 col4 

4875 1 A 1 1 B 

4876 0 A 2 0 a 

4877 2 B 9 9 c 

4878 5 C 4 3 F 

4879 4 D 7 2 e 

4880 3 NaN 8 4 D 

4881 

4882 Sort Descending 

4883 

4884 >>> df.sort_values(by='col1', ascending=False) 

4885 col1 col2 col3 col4 

4886 4 D 7 2 e 

4887 5 C 4 3 F 

4888 2 B 9 9 c 

4889 0 A 2 0 a 

4890 1 A 1 1 B 

4891 3 NaN 8 4 D 

4892 

4893 Putting NAs first 

4894 

4895 >>> df.sort_values(by='col1', ascending=False, na_position='first') 

4896 col1 col2 col3 col4 

4897 3 NaN 8 4 D 

4898 4 D 7 2 e 

4899 5 C 4 3 F 

4900 2 B 9 9 c 

4901 0 A 2 0 a 

4902 1 A 1 1 B 

4903 

4904 Sorting with a key function 

4905 

4906 >>> df.sort_values(by='col4', key=lambda col: col.str.lower()) 

4907 col1 col2 col3 col4 

4908 0 A 2 0 a 

4909 1 A 1 1 B 

4910 2 B 9 9 c 

4911 3 NaN 8 4 D 

4912 4 D 7 2 e 

4913 5 C 4 3 F 

4914 

4915 Natural sort with the key argument, 

4916 using the `natsort <https://github.com/SethMMorton/natsort>` package. 

4917 

4918 >>> df = pd.DataFrame({ 

4919 ... "time": ['0hr', '128hr', '72hr', '48hr', '96hr'], 

4920 ... "value": [10, 20, 30, 40, 50] 

4921 ... }) 

4922 >>> df 

4923 time value 

4924 0 0hr 10 

4925 1 128hr 20 

4926 2 72hr 30 

4927 3 48hr 40 

4928 4 96hr 50 

4929 >>> from natsort import index_natsorted 

4930 >>> df.sort_values( 

4931 ... by="time", 

4932 ... key=lambda x: np.argsort(index_natsorted(df["time"])) 

4933 ... ) 

4934 time value 

4935 0 0hr 10 

4936 3 48hr 40 

4937 2 72hr 30 

4938 4 96hr 50 

4939 1 128hr 20 

4940 """ 

4941 raise AbstractMethodError(self) 

4942 

4943 @overload 

4944 def sort_index( 

4945 self, 

4946 *, 

4947 axis: Axis = ..., 

4948 level: IndexLabel = ..., 

4949 ascending: bool_t | Sequence[bool_t] = ..., 

4950 inplace: Literal[True], 

4951 kind: SortKind = ..., 

4952 na_position: NaPosition = ..., 

4953 sort_remaining: bool_t = ..., 

4954 ignore_index: bool_t = ..., 

4955 key: IndexKeyFunc = ..., 

4956 ) -> None: 

4957 ... 

4958 

4959 @overload 

4960 def sort_index( 

4961 self: NDFrameT, 

4962 *, 

4963 axis: Axis = ..., 

4964 level: IndexLabel = ..., 

4965 ascending: bool_t | Sequence[bool_t] = ..., 

4966 inplace: Literal[False] = ..., 

4967 kind: SortKind = ..., 

4968 na_position: NaPosition = ..., 

4969 sort_remaining: bool_t = ..., 

4970 ignore_index: bool_t = ..., 

4971 key: IndexKeyFunc = ..., 

4972 ) -> NDFrameT: 

4973 ... 

4974 

4975 @overload 

4976 def sort_index( 

4977 self: NDFrameT, 

4978 *, 

4979 axis: Axis = ..., 

4980 level: IndexLabel = ..., 

4981 ascending: bool_t | Sequence[bool_t] = ..., 

4982 inplace: bool_t = ..., 

4983 kind: SortKind = ..., 

4984 na_position: NaPosition = ..., 

4985 sort_remaining: bool_t = ..., 

4986 ignore_index: bool_t = ..., 

4987 key: IndexKeyFunc = ..., 

4988 ) -> NDFrameT | None: 

4989 ... 

4990 

4991 def sort_index( 

4992 self: NDFrameT, 

4993 axis: Axis = 0, 

4994 level: IndexLabel = None, 

4995 ascending: bool_t | Sequence[bool_t] = True, 

4996 inplace: bool_t = False, 

4997 kind: SortKind = "quicksort", 

4998 na_position: NaPosition = "last", 

4999 sort_remaining: bool_t = True, 

5000 ignore_index: bool_t = False, 

5001 key: IndexKeyFunc = None, 

5002 ) -> NDFrameT | None: 

5003 

5004 inplace = validate_bool_kwarg(inplace, "inplace") 

5005 axis = self._get_axis_number(axis) 

5006 ascending = validate_ascending(ascending) 

5007 

5008 target = self._get_axis(axis) 

5009 

5010 indexer = get_indexer_indexer( 

5011 target, level, ascending, kind, na_position, sort_remaining, key 

5012 ) 

5013 

5014 if indexer is None: 

5015 if inplace: 

5016 result = self 

5017 else: 

5018 result = self.copy() 

5019 

5020 if ignore_index: 

5021 result.index = default_index(len(self)) 

5022 if inplace: 

5023 return None 

5024 else: 

5025 return result 

5026 

5027 baxis = self._get_block_manager_axis(axis) 

5028 new_data = self._mgr.take(indexer, axis=baxis, verify=False) 

5029 

5030 # reconstruct axis if needed 

5031 new_data.set_axis(baxis, new_data.axes[baxis]._sort_levels_monotonic()) 

5032 

5033 if ignore_index: 

5034 axis = 1 if isinstance(self, ABCDataFrame) else 0 

5035 new_data.set_axis(axis, default_index(len(indexer))) 

5036 

5037 result = self._constructor(new_data) 

5038 

5039 if inplace: 

5040 return self._update_inplace(result) 

5041 else: 

5042 return result.__finalize__(self, method="sort_index") 

5043 

5044 @doc( 

5045 klass=_shared_doc_kwargs["klass"], 

5046 axes=_shared_doc_kwargs["axes"], 

5047 optional_labels="", 

5048 optional_axis="", 

5049 ) 

5050 def reindex(self: NDFrameT, *args, **kwargs) -> NDFrameT: 

5051 """ 

5052 Conform {klass} to new index with optional filling logic. 

5053 

5054 Places NA/NaN in locations having no value in the previous index. A new object 

5055 is produced unless the new index is equivalent to the current one and 

5056 ``copy=False``. 

5057 

5058 Parameters 

5059 ---------- 

5060 {optional_labels} 

5061 {axes} : array-like, optional 

5062 New labels / index to conform to, should be specified using 

5063 keywords. Preferably an Index object to avoid duplicating data. 

5064 {optional_axis} 

5065 method : {{None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'}} 

5066 Method to use for filling holes in reindexed DataFrame. 

5067 Please note: this is only applicable to DataFrames/Series with a 

5068 monotonically increasing/decreasing index. 

5069 

5070 * None (default): don't fill gaps 

5071 * pad / ffill: Propagate last valid observation forward to next 

5072 valid. 

5073 * backfill / bfill: Use next valid observation to fill gap. 

5074 * nearest: Use nearest valid observations to fill gap. 

5075 

5076 copy : bool, default True 

5077 Return a new object, even if the passed indexes are the same. 

5078 level : int or name 

5079 Broadcast across a level, matching Index values on the 

5080 passed MultiIndex level. 

5081 fill_value : scalar, default np.NaN 

5082 Value to use for missing values. Defaults to NaN, but can be any 

5083 "compatible" value. 

5084 limit : int, default None 

5085 Maximum number of consecutive elements to forward or backward fill. 

5086 tolerance : optional 

5087 Maximum distance between original and new labels for inexact 

5088 matches. The values of the index at the matching locations most 

5089 satisfy the equation ``abs(index[indexer] - target) <= tolerance``. 

5090 

5091 Tolerance may be a scalar value, which applies the same tolerance 

5092 to all values, or list-like, which applies variable tolerance per 

5093 element. List-like includes list, tuple, array, Series, and must be 

5094 the same size as the index and its dtype must exactly match the 

5095 index's type. 

5096 

5097 Returns 

5098 ------- 

5099 {klass} with changed index. 

5100 

5101 See Also 

5102 -------- 

5103 DataFrame.set_index : Set row labels. 

5104 DataFrame.reset_index : Remove row labels or move them to new columns. 

5105 DataFrame.reindex_like : Change to same indices as other DataFrame. 

5106 

5107 Examples 

5108 -------- 

5109 ``DataFrame.reindex`` supports two calling conventions 

5110 

5111 * ``(index=index_labels, columns=column_labels, ...)`` 

5112 * ``(labels, axis={{'index', 'columns'}}, ...)`` 

5113 

5114 We *highly* recommend using keyword arguments to clarify your 

5115 intent. 

5116 

5117 Create a dataframe with some fictional data. 

5118 

5119 >>> index = ['Firefox', 'Chrome', 'Safari', 'IE10', 'Konqueror'] 

5120 >>> df = pd.DataFrame({{'http_status': [200, 200, 404, 404, 301], 

5121 ... 'response_time': [0.04, 0.02, 0.07, 0.08, 1.0]}}, 

5122 ... index=index) 

5123 >>> df 

5124 http_status response_time 

5125 Firefox 200 0.04 

5126 Chrome 200 0.02 

5127 Safari 404 0.07 

5128 IE10 404 0.08 

5129 Konqueror 301 1.00 

5130 

5131 Create a new index and reindex the dataframe. By default 

5132 values in the new index that do not have corresponding 

5133 records in the dataframe are assigned ``NaN``. 

5134 

5135 >>> new_index = ['Safari', 'Iceweasel', 'Comodo Dragon', 'IE10', 

5136 ... 'Chrome'] 

5137 >>> df.reindex(new_index) 

5138 http_status response_time 

5139 Safari 404.0 0.07 

5140 Iceweasel NaN NaN 

5141 Comodo Dragon NaN NaN 

5142 IE10 404.0 0.08 

5143 Chrome 200.0 0.02 

5144 

5145 We can fill in the missing values by passing a value to 

5146 the keyword ``fill_value``. Because the index is not monotonically 

5147 increasing or decreasing, we cannot use arguments to the keyword 

5148 ``method`` to fill the ``NaN`` values. 

5149 

5150 >>> df.reindex(new_index, fill_value=0) 

5151 http_status response_time 

5152 Safari 404 0.07 

5153 Iceweasel 0 0.00 

5154 Comodo Dragon 0 0.00 

5155 IE10 404 0.08 

5156 Chrome 200 0.02 

5157 

5158 >>> df.reindex(new_index, fill_value='missing') 

5159 http_status response_time 

5160 Safari 404 0.07 

5161 Iceweasel missing missing 

5162 Comodo Dragon missing missing 

5163 IE10 404 0.08 

5164 Chrome 200 0.02 

5165 

5166 We can also reindex the columns. 

5167 

5168 >>> df.reindex(columns=['http_status', 'user_agent']) 

5169 http_status user_agent 

5170 Firefox 200 NaN 

5171 Chrome 200 NaN 

5172 Safari 404 NaN 

5173 IE10 404 NaN 

5174 Konqueror 301 NaN 

5175 

5176 Or we can use "axis-style" keyword arguments 

5177 

5178 >>> df.reindex(['http_status', 'user_agent'], axis="columns") 

5179 http_status user_agent 

5180 Firefox 200 NaN 

5181 Chrome 200 NaN 

5182 Safari 404 NaN 

5183 IE10 404 NaN 

5184 Konqueror 301 NaN 

5185 

5186 To further illustrate the filling functionality in 

5187 ``reindex``, we will create a dataframe with a 

5188 monotonically increasing index (for example, a sequence 

5189 of dates). 

5190 

5191 >>> date_index = pd.date_range('1/1/2010', periods=6, freq='D') 

5192 >>> df2 = pd.DataFrame({{"prices": [100, 101, np.nan, 100, 89, 88]}}, 

5193 ... index=date_index) 

5194 >>> df2 

5195 prices 

5196 2010-01-01 100.0 

5197 2010-01-02 101.0 

5198 2010-01-03 NaN 

5199 2010-01-04 100.0 

5200 2010-01-05 89.0 

5201 2010-01-06 88.0 

5202 

5203 Suppose we decide to expand the dataframe to cover a wider 

5204 date range. 

5205 

5206 >>> date_index2 = pd.date_range('12/29/2009', periods=10, freq='D') 

5207 >>> df2.reindex(date_index2) 

5208 prices 

5209 2009-12-29 NaN 

5210 2009-12-30 NaN 

5211 2009-12-31 NaN 

5212 2010-01-01 100.0 

5213 2010-01-02 101.0 

5214 2010-01-03 NaN 

5215 2010-01-04 100.0 

5216 2010-01-05 89.0 

5217 2010-01-06 88.0 

5218 2010-01-07 NaN 

5219 

5220 The index entries that did not have a value in the original data frame 

5221 (for example, '2009-12-29') are by default filled with ``NaN``. 

5222 If desired, we can fill in the missing values using one of several 

5223 options. 

5224 

5225 For example, to back-propagate the last valid value to fill the ``NaN`` 

5226 values, pass ``bfill`` as an argument to the ``method`` keyword. 

5227 

5228 >>> df2.reindex(date_index2, method='bfill') 

5229 prices 

5230 2009-12-29 100.0 

5231 2009-12-30 100.0 

5232 2009-12-31 100.0 

5233 2010-01-01 100.0 

5234 2010-01-02 101.0 

5235 2010-01-03 NaN 

5236 2010-01-04 100.0 

5237 2010-01-05 89.0 

5238 2010-01-06 88.0 

5239 2010-01-07 NaN 

5240 

5241 Please note that the ``NaN`` value present in the original dataframe 

5242 (at index value 2010-01-03) will not be filled by any of the 

5243 value propagation schemes. This is because filling while reindexing 

5244 does not look at dataframe values, but only compares the original and 

5245 desired indexes. If you do want to fill in the ``NaN`` values present 

5246 in the original dataframe, use the ``fillna()`` method. 

5247 

5248 See the :ref:`user guide <basics.reindexing>` for more. 

5249 """ 

5250 # TODO: Decide if we care about having different examples for different 

5251 # kinds 

5252 

5253 # construct the args 

5254 axes, kwargs = self._construct_axes_from_arguments(args, kwargs) 

5255 method = missing.clean_reindex_fill_method(kwargs.pop("method", None)) 

5256 level = kwargs.pop("level", None) 

5257 copy = kwargs.pop("copy", None) 

5258 limit = kwargs.pop("limit", None) 

5259 tolerance = kwargs.pop("tolerance", None) 

5260 fill_value = kwargs.pop("fill_value", None) 

5261 

5262 # Series.reindex doesn't use / need the axis kwarg 

5263 # We pop and ignore it here, to make writing Series/Frame generic code 

5264 # easier 

5265 kwargs.pop("axis", None) 

5266 

5267 if kwargs: 

5268 raise TypeError( 

5269 "reindex() got an unexpected keyword " 

5270 f'argument "{list(kwargs.keys())[0]}"' 

5271 ) 

5272 

5273 self._consolidate_inplace() 

5274 

5275 # if all axes that are requested to reindex are equal, then only copy 

5276 # if indicated must have index names equal here as well as values 

5277 if all( 

5278 self._get_axis(axis).identical(ax) 

5279 for axis, ax in axes.items() 

5280 if ax is not None 

5281 ): 

5282 return self.copy(deep=copy) 

5283 

5284 # check if we are a multi reindex 

5285 if self._needs_reindex_multi(axes, method, level): 

5286 return self._reindex_multi(axes, copy, fill_value) 

5287 

5288 # perform the reindex on the axes 

5289 return self._reindex_axes( 

5290 axes, level, limit, tolerance, method, fill_value, copy 

5291 ).__finalize__(self, method="reindex") 

5292 

5293 def _reindex_axes( 

5294 self: NDFrameT, axes, level, limit, tolerance, method, fill_value, copy 

5295 ) -> NDFrameT: 

5296 """Perform the reindex for all the axes.""" 

5297 obj = self 

5298 for a in self._AXIS_ORDERS: 

5299 labels = axes[a] 

5300 if labels is None: 

5301 continue 

5302 

5303 ax = self._get_axis(a) 

5304 new_index, indexer = ax.reindex( 

5305 labels, level=level, limit=limit, tolerance=tolerance, method=method 

5306 ) 

5307 

5308 axis = self._get_axis_number(a) 

5309 obj = obj._reindex_with_indexers( 

5310 {axis: [new_index, indexer]}, 

5311 fill_value=fill_value, 

5312 copy=copy, 

5313 allow_dups=False, 

5314 ) 

5315 # If we've made a copy once, no need to make another one 

5316 copy = False 

5317 

5318 return obj 

5319 

5320 def _needs_reindex_multi(self, axes, method, level) -> bool_t: 

5321 """Check if we do need a multi reindex.""" 

5322 return ( 

5323 (com.count_not_none(*axes.values()) == self._AXIS_LEN) 

5324 and method is None 

5325 and level is None 

5326 and not self._is_mixed_type 

5327 ) 

5328 

5329 def _reindex_multi(self, axes, copy, fill_value): 

5330 raise AbstractMethodError(self) 

5331 

5332 @final 

5333 def _reindex_with_indexers( 

5334 self: NDFrameT, 

5335 reindexers, 

5336 fill_value=None, 

5337 copy: bool_t = False, 

5338 allow_dups: bool_t = False, 

5339 ) -> NDFrameT: 

5340 """allow_dups indicates an internal call here""" 

5341 # reindex doing multiple operations on different axes if indicated 

5342 new_data = self._mgr 

5343 for axis in sorted(reindexers.keys()): 

5344 index, indexer = reindexers[axis] 

5345 baxis = self._get_block_manager_axis(axis) 

5346 

5347 if index is None: 

5348 continue 

5349 

5350 index = ensure_index(index) 

5351 if indexer is not None: 

5352 indexer = ensure_platform_int(indexer) 

5353 

5354 # TODO: speed up on homogeneous DataFrame objects (see _reindex_multi) 

5355 new_data = new_data.reindex_indexer( 

5356 index, 

5357 indexer, 

5358 axis=baxis, 

5359 fill_value=fill_value, 

5360 allow_dups=allow_dups, 

5361 copy=copy, 

5362 ) 

5363 # If we've made a copy once, no need to make another one 

5364 copy = False 

5365 

5366 if copy and new_data is self._mgr: 

5367 new_data = new_data.copy() 

5368 

5369 return self._constructor(new_data).__finalize__(self) 

5370 

5371 def filter( 

5372 self: NDFrameT, 

5373 items=None, 

5374 like: str | None = None, 

5375 regex: str | None = None, 

5376 axis=None, 

5377 ) -> NDFrameT: 

5378 """ 

5379 Subset the dataframe rows or columns according to the specified index labels. 

5380 

5381 Note that this routine does not filter a dataframe on its 

5382 contents. The filter is applied to the labels of the index. 

5383 

5384 Parameters 

5385 ---------- 

5386 items : list-like 

5387 Keep labels from axis which are in items. 

5388 like : str 

5389 Keep labels from axis for which "like in label == True". 

5390 regex : str (regular expression) 

5391 Keep labels from axis for which re.search(regex, label) == True. 

5392 axis : {0 or ‘index’, 1 or ‘columns’, None}, default None 

5393 The axis to filter on, expressed either as an index (int) 

5394 or axis name (str). By default this is the info axis, 'columns' for 

5395 DataFrame. For `Series` this parameter is unused and defaults to `None`. 

5396 

5397 Returns 

5398 ------- 

5399 same type as input object 

5400 

5401 See Also 

5402 -------- 

5403 DataFrame.loc : Access a group of rows and columns 

5404 by label(s) or a boolean array. 

5405 

5406 Notes 

5407 ----- 

5408 The ``items``, ``like``, and ``regex`` parameters are 

5409 enforced to be mutually exclusive. 

5410 

5411 ``axis`` defaults to the info axis that is used when indexing 

5412 with ``[]``. 

5413 

5414 Examples 

5415 -------- 

5416 >>> df = pd.DataFrame(np.array(([1, 2, 3], [4, 5, 6])), 

5417 ... index=['mouse', 'rabbit'], 

5418 ... columns=['one', 'two', 'three']) 

5419 >>> df 

5420 one two three 

5421 mouse 1 2 3 

5422 rabbit 4 5 6 

5423 

5424 >>> # select columns by name 

5425 >>> df.filter(items=['one', 'three']) 

5426 one three 

5427 mouse 1 3 

5428 rabbit 4 6 

5429 

5430 >>> # select columns by regular expression 

5431 >>> df.filter(regex='e$', axis=1) 

5432 one three 

5433 mouse 1 3 

5434 rabbit 4 6 

5435 

5436 >>> # select rows containing 'bbi' 

5437 >>> df.filter(like='bbi', axis=0) 

5438 one two three 

5439 rabbit 4 5 6 

5440 """ 

5441 nkw = com.count_not_none(items, like, regex) 

5442 if nkw > 1: 

5443 raise TypeError( 

5444 "Keyword arguments `items`, `like`, or `regex` " 

5445 "are mutually exclusive" 

5446 ) 

5447 

5448 if axis is None: 

5449 axis = self._info_axis_name 

5450 labels = self._get_axis(axis) 

5451 

5452 if items is not None: 

5453 name = self._get_axis_name(axis) 

5454 return self.reindex(**{name: [r for r in items if r in labels]}) 

5455 elif like: 

5456 

5457 def f(x) -> bool_t: 

5458 assert like is not None # needed for mypy 

5459 return like in ensure_str(x) 

5460 

5461 values = labels.map(f) 

5462 return self.loc(axis=axis)[values] 

5463 elif regex: 

5464 

5465 def f(x) -> bool_t: 

5466 return matcher.search(ensure_str(x)) is not None 

5467 

5468 matcher = re.compile(regex) 

5469 values = labels.map(f) 

5470 return self.loc(axis=axis)[values] 

5471 else: 

5472 raise TypeError("Must pass either `items`, `like`, or `regex`") 

5473 

5474 @final 

5475 def head(self: NDFrameT, n: int = 5) -> NDFrameT: 

5476 """ 

5477 Return the first `n` rows. 

5478 

5479 This function returns the first `n` rows for the object based 

5480 on position. It is useful for quickly testing if your object 

5481 has the right type of data in it. 

5482 

5483 For negative values of `n`, this function returns all rows except 

5484 the last `|n|` rows, equivalent to ``df[:n]``. 

5485 

5486 If n is larger than the number of rows, this function returns all rows. 

5487 

5488 Parameters 

5489 ---------- 

5490 n : int, default 5 

5491 Number of rows to select. 

5492 

5493 Returns 

5494 ------- 

5495 same type as caller 

5496 The first `n` rows of the caller object. 

5497 

5498 See Also 

5499 -------- 

5500 DataFrame.tail: Returns the last `n` rows. 

5501 

5502 Examples 

5503 -------- 

5504 >>> df = pd.DataFrame({'animal': ['alligator', 'bee', 'falcon', 'lion', 

5505 ... 'monkey', 'parrot', 'shark', 'whale', 'zebra']}) 

5506 >>> df 

5507 animal 

5508 0 alligator 

5509 1 bee 

5510 2 falcon 

5511 3 lion 

5512 4 monkey 

5513 5 parrot 

5514 6 shark 

5515 7 whale 

5516 8 zebra 

5517 

5518 Viewing the first 5 lines 

5519 

5520 >>> df.head() 

5521 animal 

5522 0 alligator 

5523 1 bee 

5524 2 falcon 

5525 3 lion 

5526 4 monkey 

5527 

5528 Viewing the first `n` lines (three in this case) 

5529 

5530 >>> df.head(3) 

5531 animal 

5532 0 alligator 

5533 1 bee 

5534 2 falcon 

5535 

5536 For negative values of `n` 

5537 

5538 >>> df.head(-3) 

5539 animal 

5540 0 alligator 

5541 1 bee 

5542 2 falcon 

5543 3 lion 

5544 4 monkey 

5545 5 parrot 

5546 """ 

5547 return self.iloc[:n] 

5548 

5549 @final 

5550 def tail(self: NDFrameT, n: int = 5) -> NDFrameT: 

5551 """ 

5552 Return the last `n` rows. 

5553 

5554 This function returns last `n` rows from the object based on 

5555 position. It is useful for quickly verifying data, for example, 

5556 after sorting or appending rows. 

5557 

5558 For negative values of `n`, this function returns all rows except 

5559 the first `|n|` rows, equivalent to ``df[|n|:]``. 

5560 

5561 If n is larger than the number of rows, this function returns all rows. 

5562 

5563 Parameters 

5564 ---------- 

5565 n : int, default 5 

5566 Number of rows to select. 

5567 

5568 Returns 

5569 ------- 

5570 type of caller 

5571 The last `n` rows of the caller object. 

5572 

5573 See Also 

5574 -------- 

5575 DataFrame.head : The first `n` rows of the caller object. 

5576 

5577 Examples 

5578 -------- 

5579 >>> df = pd.DataFrame({'animal': ['alligator', 'bee', 'falcon', 'lion', 

5580 ... 'monkey', 'parrot', 'shark', 'whale', 'zebra']}) 

5581 >>> df 

5582 animal 

5583 0 alligator 

5584 1 bee 

5585 2 falcon 

5586 3 lion 

5587 4 monkey 

5588 5 parrot 

5589 6 shark 

5590 7 whale 

5591 8 zebra 

5592 

5593 Viewing the last 5 lines 

5594 

5595 >>> df.tail() 

5596 animal 

5597 4 monkey 

5598 5 parrot 

5599 6 shark 

5600 7 whale 

5601 8 zebra 

5602 

5603 Viewing the last `n` lines (three in this case) 

5604 

5605 >>> df.tail(3) 

5606 animal 

5607 6 shark 

5608 7 whale 

5609 8 zebra 

5610 

5611 For negative values of `n` 

5612 

5613 >>> df.tail(-3) 

5614 animal 

5615 3 lion 

5616 4 monkey 

5617 5 parrot 

5618 6 shark 

5619 7 whale 

5620 8 zebra 

5621 """ 

5622 if n == 0: 

5623 return self.iloc[0:0] 

5624 return self.iloc[-n:] 

5625 

5626 @final 

5627 def sample( 

5628 self: NDFrameT, 

5629 n: int | None = None, 

5630 frac: float | None = None, 

5631 replace: bool_t = False, 

5632 weights=None, 

5633 random_state: RandomState | None = None, 

5634 axis: Axis | None = None, 

5635 ignore_index: bool_t = False, 

5636 ) -> NDFrameT: 

5637 """ 

5638 Return a random sample of items from an axis of object. 

5639 

5640 You can use `random_state` for reproducibility. 

5641 

5642 Parameters 

5643 ---------- 

5644 n : int, optional 

5645 Number of items from axis to return. Cannot be used with `frac`. 

5646 Default = 1 if `frac` = None. 

5647 frac : float, optional 

5648 Fraction of axis items to return. Cannot be used with `n`. 

5649 replace : bool, default False 

5650 Allow or disallow sampling of the same row more than once. 

5651 weights : str or ndarray-like, optional 

5652 Default 'None' results in equal probability weighting. 

5653 If passed a Series, will align with target object on index. Index 

5654 values in weights not found in sampled object will be ignored and 

5655 index values in sampled object not in weights will be assigned 

5656 weights of zero. 

5657 If called on a DataFrame, will accept the name of a column 

5658 when axis = 0. 

5659 Unless weights are a Series, weights must be same length as axis 

5660 being sampled. 

5661 If weights do not sum to 1, they will be normalized to sum to 1. 

5662 Missing values in the weights column will be treated as zero. 

5663 Infinite values not allowed. 

5664 random_state : int, array-like, BitGenerator, np.random.RandomState, np.random.Generator, optional 

5665 If int, array-like, or BitGenerator, seed for random number generator. 

5666 If np.random.RandomState or np.random.Generator, use as given. 

5667 

5668 .. versionchanged:: 1.1.0 

5669 

5670 array-like and BitGenerator object now passed to np.random.RandomState() 

5671 as seed 

5672 

5673 .. versionchanged:: 1.4.0 

5674 

5675 np.random.Generator objects now accepted 

5676 

5677 axis : {0 or ‘index’, 1 or ‘columns’, None}, default None 

5678 Axis to sample. Accepts axis number or name. Default is stat axis 

5679 for given data type. For `Series` this parameter is unused and defaults to `None`. 

5680 ignore_index : bool, default False 

5681 If True, the resulting index will be labeled 0, 1, …, n - 1. 

5682 

5683 .. versionadded:: 1.3.0 

5684 

5685 Returns 

5686 ------- 

5687 Series or DataFrame 

5688 A new object of same type as caller containing `n` items randomly 

5689 sampled from the caller object. 

5690 

5691 See Also 

5692 -------- 

5693 DataFrameGroupBy.sample: Generates random samples from each group of a 

5694 DataFrame object. 

5695 SeriesGroupBy.sample: Generates random samples from each group of a 

5696 Series object. 

5697 numpy.random.choice: Generates a random sample from a given 1-D numpy 

5698 array. 

5699 

5700 Notes 

5701 ----- 

5702 If `frac` > 1, `replacement` should be set to `True`. 

5703 

5704 Examples 

5705 -------- 

5706 >>> df = pd.DataFrame({'num_legs': [2, 4, 8, 0], 

5707 ... 'num_wings': [2, 0, 0, 0], 

5708 ... 'num_specimen_seen': [10, 2, 1, 8]}, 

5709 ... index=['falcon', 'dog', 'spider', 'fish']) 

5710 >>> df 

5711 num_legs num_wings num_specimen_seen 

5712 falcon 2 2 10 

5713 dog 4 0 2 

5714 spider 8 0 1 

5715 fish 0 0 8 

5716 

5717 Extract 3 random elements from the ``Series`` ``df['num_legs']``: 

5718 Note that we use `random_state` to ensure the reproducibility of 

5719 the examples. 

5720 

5721 >>> df['num_legs'].sample(n=3, random_state=1) 

5722 fish 0 

5723 spider 8 

5724 falcon 2 

5725 Name: num_legs, dtype: int64 

5726 

5727 A random 50% sample of the ``DataFrame`` with replacement: 

5728 

5729 >>> df.sample(frac=0.5, replace=True, random_state=1) 

5730 num_legs num_wings num_specimen_seen 

5731 dog 4 0 2 

5732 fish 0 0 8 

5733 

5734 An upsample sample of the ``DataFrame`` with replacement: 

5735 Note that `replace` parameter has to be `True` for `frac` parameter > 1. 

5736 

5737 >>> df.sample(frac=2, replace=True, random_state=1) 

5738 num_legs num_wings num_specimen_seen 

5739 dog 4 0 2 

5740 fish 0 0 8 

5741 falcon 2 2 10 

5742 falcon 2 2 10 

5743 fish 0 0 8 

5744 dog 4 0 2 

5745 fish 0 0 8 

5746 dog 4 0 2 

5747 

5748 Using a DataFrame column as weights. Rows with larger value in the 

5749 `num_specimen_seen` column are more likely to be sampled. 

5750 

5751 >>> df.sample(n=2, weights='num_specimen_seen', random_state=1) 

5752 num_legs num_wings num_specimen_seen 

5753 falcon 2 2 10 

5754 fish 0 0 8 

5755 """ # noqa:E501 

5756 if axis is None: 

5757 axis = self._stat_axis_number 

5758 

5759 axis = self._get_axis_number(axis) 

5760 obj_len = self.shape[axis] 

5761 

5762 # Process random_state argument 

5763 rs = com.random_state(random_state) 

5764 

5765 size = sample.process_sampling_size(n, frac, replace) 

5766 if size is None: 

5767 assert frac is not None 

5768 size = round(frac * obj_len) 

5769 

5770 if weights is not None: 

5771 weights = sample.preprocess_weights(self, weights, axis) 

5772 

5773 sampled_indices = sample.sample(obj_len, size, replace, weights, rs) 

5774 result = self.take(sampled_indices, axis=axis) 

5775 

5776 if ignore_index: 

5777 result.index = default_index(len(result)) 

5778 

5779 return result 

5780 

5781 @final 

5782 @doc(klass=_shared_doc_kwargs["klass"]) 

5783 def pipe( 

5784 self, 

5785 func: Callable[..., T] | tuple[Callable[..., T], str], 

5786 *args, 

5787 **kwargs, 

5788 ) -> T: 

5789 r""" 

5790 Apply chainable functions that expect Series or DataFrames. 

5791 

5792 Parameters 

5793 ---------- 

5794 func : function 

5795 Function to apply to the {klass}. 

5796 ``args``, and ``kwargs`` are passed into ``func``. 

5797 Alternatively a ``(callable, data_keyword)`` tuple where 

5798 ``data_keyword`` is a string indicating the keyword of 

5799 ``callable`` that expects the {klass}. 

5800 args : iterable, optional 

5801 Positional arguments passed into ``func``. 

5802 kwargs : mapping, optional 

5803 A dictionary of keyword arguments passed into ``func``. 

5804 

5805 Returns 

5806 ------- 

5807 object : the return type of ``func``. 

5808 

5809 See Also 

5810 -------- 

5811 DataFrame.apply : Apply a function along input axis of DataFrame. 

5812 DataFrame.applymap : Apply a function elementwise on a whole DataFrame. 

5813 Series.map : Apply a mapping correspondence on a 

5814 :class:`~pandas.Series`. 

5815 

5816 Notes 

5817 ----- 

5818 Use ``.pipe`` when chaining together functions that expect 

5819 Series, DataFrames or GroupBy objects. Instead of writing 

5820 

5821 >>> func(g(h(df), arg1=a), arg2=b, arg3=c) # doctest: +SKIP 

5822 

5823 You can write 

5824 

5825 >>> (df.pipe(h) 

5826 ... .pipe(g, arg1=a) 

5827 ... .pipe(func, arg2=b, arg3=c) 

5828 ... ) # doctest: +SKIP 

5829 

5830 If you have a function that takes the data as (say) the second 

5831 argument, pass a tuple indicating which keyword expects the 

5832 data. For example, suppose ``f`` takes its data as ``arg2``: 

5833 

5834 >>> (df.pipe(h) 

5835 ... .pipe(g, arg1=a) 

5836 ... .pipe((func, 'arg2'), arg1=a, arg3=c) 

5837 ... ) # doctest: +SKIP 

5838 """ 

5839 return com.pipe(self, func, *args, **kwargs) 

5840 

5841 # ---------------------------------------------------------------------- 

5842 # Attribute access 

5843 

5844 @final 

5845 def __finalize__( 

5846 self: NDFrameT, other, method: str | None = None, **kwargs 

5847 ) -> NDFrameT: 

5848 """ 

5849 Propagate metadata from other to self. 

5850 

5851 Parameters 

5852 ---------- 

5853 other : the object from which to get the attributes that we are going 

5854 to propagate 

5855 method : str, optional 

5856 A passed method name providing context on where ``__finalize__`` 

5857 was called. 

5858 

5859 .. warning:: 

5860 

5861 The value passed as `method` are not currently considered 

5862 stable across pandas releases. 

5863 """ 

5864 if isinstance(other, NDFrame): 

5865 for name in other.attrs: 

5866 self.attrs[name] = other.attrs[name] 

5867 

5868 self.flags.allows_duplicate_labels = other.flags.allows_duplicate_labels 

5869 # For subclasses using _metadata. 

5870 for name in set(self._metadata) & set(other._metadata): 

5871 assert isinstance(name, str) 

5872 object.__setattr__(self, name, getattr(other, name, None)) 

5873 

5874 if method == "concat": 

5875 attrs = other.objs[0].attrs 

5876 check_attrs = all(objs.attrs == attrs for objs in other.objs[1:]) 

5877 if check_attrs: 

5878 for name in attrs: 

5879 self.attrs[name] = attrs[name] 

5880 

5881 allows_duplicate_labels = all( 

5882 x.flags.allows_duplicate_labels for x in other.objs 

5883 ) 

5884 self.flags.allows_duplicate_labels = allows_duplicate_labels 

5885 

5886 return self 

5887 

5888 def __getattr__(self, name: str): 

5889 """ 

5890 After regular attribute access, try looking up the name 

5891 This allows simpler access to columns for interactive use. 

5892 """ 

5893 # Note: obj.x will always call obj.__getattribute__('x') prior to 

5894 # calling obj.__getattr__('x'). 

5895 if ( 

5896 name not in self._internal_names_set 

5897 and name not in self._metadata 

5898 and name not in self._accessors 

5899 and self._info_axis._can_hold_identifiers_and_holds_name(name) 

5900 ): 

5901 return self[name] 

5902 return object.__getattribute__(self, name) 

5903 

5904 def __setattr__(self, name: str, value) -> None: 

5905 """ 

5906 After regular attribute access, try setting the name 

5907 This allows simpler access to columns for interactive use. 

5908 """ 

5909 # first try regular attribute access via __getattribute__, so that 

5910 # e.g. ``obj.x`` and ``obj.x = 4`` will always reference/modify 

5911 # the same attribute. 

5912 

5913 try: 

5914 object.__getattribute__(self, name) 

5915 return object.__setattr__(self, name, value) 

5916 except AttributeError: 

5917 pass 

5918 

5919 # if this fails, go on to more involved attribute setting 

5920 # (note that this matches __getattr__, above). 

5921 if name in self._internal_names_set: 

5922 object.__setattr__(self, name, value) 

5923 elif name in self._metadata: 

5924 object.__setattr__(self, name, value) 

5925 else: 

5926 try: 

5927 existing = getattr(self, name) 

5928 if isinstance(existing, Index): 

5929 object.__setattr__(self, name, value) 

5930 elif name in self._info_axis: 

5931 self[name] = value 

5932 else: 

5933 object.__setattr__(self, name, value) 

5934 except (AttributeError, TypeError): 

5935 if isinstance(self, ABCDataFrame) and (is_list_like(value)): 

5936 warnings.warn( 

5937 "Pandas doesn't allow columns to be " 

5938 "created via a new attribute name - see " 

5939 "https://pandas.pydata.org/pandas-docs/" 

5940 "stable/indexing.html#attribute-access", 

5941 stacklevel=find_stack_level(), 

5942 ) 

5943 object.__setattr__(self, name, value) 

5944 

5945 @final 

5946 def _dir_additions(self) -> set[str]: 

5947 """ 

5948 add the string-like attributes from the info_axis. 

5949 If info_axis is a MultiIndex, its first level values are used. 

5950 """ 

5951 additions = super()._dir_additions() 

5952 if self._info_axis._can_hold_strings: 

5953 additions.update(self._info_axis._dir_additions_for_owner) 

5954 return additions 

5955 

5956 # ---------------------------------------------------------------------- 

5957 # Consolidation of internals 

5958 

5959 @final 

5960 def _protect_consolidate(self, f): 

5961 """ 

5962 Consolidate _mgr -- if the blocks have changed, then clear the 

5963 cache 

5964 """ 

5965 if isinstance(self._mgr, (ArrayManager, SingleArrayManager)): 

5966 return f() 

5967 blocks_before = len(self._mgr.blocks) 

5968 result = f() 

5969 if len(self._mgr.blocks) != blocks_before: 

5970 self._clear_item_cache() 

5971 return result 

5972 

5973 @final 

5974 def _consolidate_inplace(self) -> None: 

5975 """Consolidate data in place and return None""" 

5976 

5977 def f(): 

5978 self._mgr = self._mgr.consolidate() 

5979 

5980 self._protect_consolidate(f) 

5981 

5982 @final 

5983 def _consolidate(self): 

5984 """ 

5985 Compute NDFrame with "consolidated" internals (data of each dtype 

5986 grouped together in a single ndarray). 

5987 

5988 Returns 

5989 ------- 

5990 consolidated : same type as caller 

5991 """ 

5992 f = lambda: self._mgr.consolidate() 

5993 cons_data = self._protect_consolidate(f) 

5994 return self._constructor(cons_data).__finalize__(self) 

5995 

5996 @final 

5997 @property 

5998 def _is_mixed_type(self) -> bool_t: 

5999 if self._mgr.is_single_block: 

6000 return False 

6001 

6002 if self._mgr.any_extension_types: 

6003 # Even if they have the same dtype, we can't consolidate them, 

6004 # so we pretend this is "mixed'" 

6005 return True 

6006 

6007 return self.dtypes.nunique() > 1 

6008 

6009 @final 

6010 def _check_inplace_setting(self, value) -> bool_t: 

6011 """check whether we allow in-place setting with this type of value""" 

6012 if self._is_mixed_type and not self._mgr.is_numeric_mixed_type: 

6013 

6014 # allow an actual np.nan thru 

6015 if is_float(value) and np.isnan(value): 

6016 return True 

6017 

6018 raise TypeError( 

6019 "Cannot do inplace boolean setting on " 

6020 "mixed-types with a non np.nan value" 

6021 ) 

6022 

6023 return True 

6024 

6025 @final 

6026 def _get_numeric_data(self: NDFrameT) -> NDFrameT: 

6027 return self._constructor(self._mgr.get_numeric_data()).__finalize__(self) 

6028 

6029 @final 

6030 def _get_bool_data(self): 

6031 return self._constructor(self._mgr.get_bool_data()).__finalize__(self) 

6032 

6033 # ---------------------------------------------------------------------- 

6034 # Internal Interface Methods 

6035 

6036 @property 

6037 def values(self): 

6038 raise AbstractMethodError(self) 

6039 

6040 @property 

6041 def _values(self) -> np.ndarray: 

6042 """internal implementation""" 

6043 raise AbstractMethodError(self) 

6044 

6045 @property 

6046 def dtypes(self): 

6047 """ 

6048 Return the dtypes in the DataFrame. 

6049 

6050 This returns a Series with the data type of each column. 

6051 The result's index is the original DataFrame's columns. Columns 

6052 with mixed types are stored with the ``object`` dtype. See 

6053 :ref:`the User Guide <basics.dtypes>` for more. 

6054 

6055 Returns 

6056 ------- 

6057 pandas.Series 

6058 The data type of each column. 

6059 

6060 Examples 

6061 -------- 

6062 >>> df = pd.DataFrame({'float': [1.0], 

6063 ... 'int': [1], 

6064 ... 'datetime': [pd.Timestamp('20180310')], 

6065 ... 'string': ['foo']}) 

6066 >>> df.dtypes 

6067 float float64 

6068 int int64 

6069 datetime datetime64[ns] 

6070 string object 

6071 dtype: object 

6072 """ 

6073 data = self._mgr.get_dtypes() 

6074 return self._constructor_sliced(data, index=self._info_axis, dtype=np.object_) 

6075 

6076 def astype( 

6077 self: NDFrameT, dtype, copy: bool_t = True, errors: IgnoreRaise = "raise" 

6078 ) -> NDFrameT: 

6079 """ 

6080 Cast a pandas object to a specified dtype ``dtype``. 

6081 

6082 Parameters 

6083 ---------- 

6084 dtype : data type, or dict of column name -> data type 

6085 Use a numpy.dtype or Python type to cast entire pandas object to 

6086 the same type. Alternatively, use {col: dtype, ...}, where col is a 

6087 column label and dtype is a numpy.dtype or Python type to cast one 

6088 or more of the DataFrame's columns to column-specific types. 

6089 copy : bool, default True 

6090 Return a copy when ``copy=True`` (be very careful setting 

6091 ``copy=False`` as changes to values then may propagate to other 

6092 pandas objects). 

6093 errors : {'raise', 'ignore'}, default 'raise' 

6094 Control raising of exceptions on invalid data for provided dtype. 

6095 

6096 - ``raise`` : allow exceptions to be raised 

6097 - ``ignore`` : suppress exceptions. On error return original object. 

6098 

6099 Returns 

6100 ------- 

6101 casted : same type as caller 

6102 

6103 See Also 

6104 -------- 

6105 to_datetime : Convert argument to datetime. 

6106 to_timedelta : Convert argument to timedelta. 

6107 to_numeric : Convert argument to a numeric type. 

6108 numpy.ndarray.astype : Cast a numpy array to a specified type. 

6109 

6110 Notes 

6111 ----- 

6112 .. deprecated:: 1.3.0 

6113 

6114 Using ``astype`` to convert from timezone-naive dtype to 

6115 timezone-aware dtype is deprecated and will raise in a 

6116 future version. Use :meth:`Series.dt.tz_localize` instead. 

6117 

6118 Examples 

6119 -------- 

6120 Create a DataFrame: 

6121 

6122 >>> d = {'col1': [1, 2], 'col2': [3, 4]} 

6123 >>> df = pd.DataFrame(data=d) 

6124 >>> df.dtypes 

6125 col1 int64 

6126 col2 int64 

6127 dtype: object 

6128 

6129 Cast all columns to int32: 

6130 

6131 >>> df.astype('int32').dtypes 

6132 col1 int32 

6133 col2 int32 

6134 dtype: object 

6135 

6136 Cast col1 to int32 using a dictionary: 

6137 

6138 >>> df.astype({'col1': 'int32'}).dtypes 

6139 col1 int32 

6140 col2 int64 

6141 dtype: object 

6142 

6143 Create a series: 

6144 

6145 >>> ser = pd.Series([1, 2], dtype='int32') 

6146 >>> ser 

6147 0 1 

6148 1 2 

6149 dtype: int32 

6150 >>> ser.astype('int64') 

6151 0 1 

6152 1 2 

6153 dtype: int64 

6154 

6155 Convert to categorical type: 

6156 

6157 >>> ser.astype('category') 

6158 0 1 

6159 1 2 

6160 dtype: category 

6161 Categories (2, int64): [1, 2] 

6162 

6163 Convert to ordered categorical type with custom ordering: 

6164 

6165 >>> from pandas.api.types import CategoricalDtype 

6166 >>> cat_dtype = CategoricalDtype( 

6167 ... categories=[2, 1], ordered=True) 

6168 >>> ser.astype(cat_dtype) 

6169 0 1 

6170 1 2 

6171 dtype: category 

6172 Categories (2, int64): [2 < 1] 

6173 

6174 Note that using ``copy=False`` and changing data on a new 

6175 pandas object may propagate changes: 

6176 

6177 >>> s1 = pd.Series([1, 2]) 

6178 >>> s2 = s1.astype('int64', copy=False) 

6179 >>> s2[0] = 10 

6180 >>> s1 # note that s1[0] has changed too 

6181 0 10 

6182 1 2 

6183 dtype: int64 

6184 

6185 Create a series of dates: 

6186 

6187 >>> ser_date = pd.Series(pd.date_range('20200101', periods=3)) 

6188 >>> ser_date 

6189 0 2020-01-01 

6190 1 2020-01-02 

6191 2 2020-01-03 

6192 dtype: datetime64[ns] 

6193 """ 

6194 if is_dict_like(dtype): 

6195 if self.ndim == 1: # i.e. Series 

6196 if len(dtype) > 1 or self.name not in dtype: 

6197 raise KeyError( 

6198 "Only the Series name can be used for " 

6199 "the key in Series dtype mappings." 

6200 ) 

6201 new_type = dtype[self.name] 

6202 return self.astype(new_type, copy, errors) 

6203 

6204 # GH#44417 cast to Series so we can use .iat below, which will be 

6205 # robust in case we 

6206 from pandas import Series 

6207 

6208 dtype_ser = Series(dtype, dtype=object) 

6209 

6210 for col_name in dtype_ser.index: 

6211 if col_name not in self: 

6212 raise KeyError( 

6213 "Only a column name can be used for the " 

6214 "key in a dtype mappings argument. " 

6215 f"'{col_name}' not found in columns." 

6216 ) 

6217 

6218 dtype_ser = dtype_ser.reindex(self.columns, fill_value=None, copy=False) 

6219 

6220 results = [] 

6221 for i, (col_name, col) in enumerate(self.items()): 

6222 cdt = dtype_ser.iat[i] 

6223 if isna(cdt): 

6224 res_col = col.copy() if copy else col 

6225 else: 

6226 res_col = col.astype(dtype=cdt, copy=copy, errors=errors) 

6227 results.append(res_col) 

6228 

6229 elif is_extension_array_dtype(dtype) and self.ndim > 1: 

6230 # GH 18099/22869: columnwise conversion to extension dtype 

6231 # GH 24704: use iloc to handle duplicate column names 

6232 # TODO(EA2D): special case not needed with 2D EAs 

6233 results = [ 

6234 self.iloc[:, i].astype(dtype, copy=copy) 

6235 for i in range(len(self.columns)) 

6236 ] 

6237 

6238 else: 

6239 # else, only a single dtype is given 

6240 new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors) 

6241 return self._constructor(new_data).__finalize__(self, method="astype") 

6242 

6243 # GH 33113: handle empty frame or series 

6244 if not results: 

6245 return self.copy() 

6246 

6247 # GH 19920: retain column metadata after concat 

6248 result = concat(results, axis=1, copy=False) 

6249 # GH#40810 retain subclass 

6250 # error: Incompatible types in assignment 

6251 # (expression has type "NDFrameT", variable has type "DataFrame") 

6252 result = self._constructor(result) # type: ignore[assignment] 

6253 result.columns = self.columns 

6254 result = result.__finalize__(self, method="astype") 

6255 # https://github.com/python/mypy/issues/8354 

6256 return cast(NDFrameT, result) 

6257 

6258 @final 

6259 def copy(self: NDFrameT, deep: bool_t | None = True) -> NDFrameT: 

6260 """ 

6261 Make a copy of this object's indices and data. 

6262 

6263 When ``deep=True`` (default), a new object will be created with a 

6264 copy of the calling object's data and indices. Modifications to 

6265 the data or indices of the copy will not be reflected in the 

6266 original object (see notes below). 

6267 

6268 When ``deep=False``, a new object will be created without copying 

6269 the calling object's data or index (only references to the data 

6270 and index are copied). Any changes to the data of the original 

6271 will be reflected in the shallow copy (and vice versa). 

6272 

6273 Parameters 

6274 ---------- 

6275 deep : bool, default True 

6276 Make a deep copy, including a copy of the data and the indices. 

6277 With ``deep=False`` neither the indices nor the data are copied. 

6278 

6279 Returns 

6280 ------- 

6281 copy : Series or DataFrame 

6282 Object type matches caller. 

6283 

6284 Notes 

6285 ----- 

6286 When ``deep=True``, data is copied but actual Python objects 

6287 will not be copied recursively, only the reference to the object. 

6288 This is in contrast to `copy.deepcopy` in the Standard Library, 

6289 which recursively copies object data (see examples below). 

6290 

6291 While ``Index`` objects are copied when ``deep=True``, the underlying 

6292 numpy array is not copied for performance reasons. Since ``Index`` is 

6293 immutable, the underlying data can be safely shared and a copy 

6294 is not needed. 

6295 

6296 Since pandas is not thread safe, see the 

6297 :ref:`gotchas <gotchas.thread-safety>` when copying in a threading 

6298 environment. 

6299 

6300 Examples 

6301 -------- 

6302 >>> s = pd.Series([1, 2], index=["a", "b"]) 

6303 >>> s 

6304 a 1 

6305 b 2 

6306 dtype: int64 

6307 

6308 >>> s_copy = s.copy() 

6309 >>> s_copy 

6310 a 1 

6311 b 2 

6312 dtype: int64 

6313 

6314 **Shallow copy versus default (deep) copy:** 

6315 

6316 >>> s = pd.Series([1, 2], index=["a", "b"]) 

6317 >>> deep = s.copy() 

6318 >>> shallow = s.copy(deep=False) 

6319 

6320 Shallow copy shares data and index with original. 

6321 

6322 >>> s is shallow 

6323 False 

6324 >>> s.values is shallow.values and s.index is shallow.index 

6325 True 

6326 

6327 Deep copy has own copy of data and index. 

6328 

6329 >>> s is deep 

6330 False 

6331 >>> s.values is deep.values or s.index is deep.index 

6332 False 

6333 

6334 Updates to the data shared by shallow copy and original is reflected 

6335 in both; deep copy remains unchanged. 

6336 

6337 >>> s[0] = 3 

6338 >>> shallow[1] = 4 

6339 >>> s 

6340 a 3 

6341 b 4 

6342 dtype: int64 

6343 >>> shallow 

6344 a 3 

6345 b 4 

6346 dtype: int64 

6347 >>> deep 

6348 a 1 

6349 b 2 

6350 dtype: int64 

6351 

6352 Note that when copying an object containing Python objects, a deep copy 

6353 will copy the data, but will not do so recursively. Updating a nested 

6354 data object will be reflected in the deep copy. 

6355 

6356 >>> s = pd.Series([[1, 2], [3, 4]]) 

6357 >>> deep = s.copy() 

6358 >>> s[0][0] = 10 

6359 >>> s 

6360 0 [10, 2] 

6361 1 [3, 4] 

6362 dtype: object 

6363 >>> deep 

6364 0 [10, 2] 

6365 1 [3, 4] 

6366 dtype: object 

6367 """ 

6368 data = self._mgr.copy(deep=deep) 

6369 self._clear_item_cache() 

6370 return self._constructor(data).__finalize__(self, method="copy") 

6371 

6372 @final 

6373 def __copy__(self: NDFrameT, deep: bool_t = True) -> NDFrameT: 

6374 return self.copy(deep=deep) 

6375 

6376 @final 

6377 def __deepcopy__(self: NDFrameT, memo=None) -> NDFrameT: 

6378 """ 

6379 Parameters 

6380 ---------- 

6381 memo, default None 

6382 Standard signature. Unused 

6383 """ 

6384 return self.copy(deep=True) 

6385 

6386 @final 

6387 def _convert( 

6388 self: NDFrameT, 

6389 datetime: bool_t = False, 

6390 numeric: bool_t = False, 

6391 timedelta: bool_t = False, 

6392 ) -> NDFrameT: 

6393 """ 

6394 Attempt to infer better dtype for object columns. 

6395 

6396 Parameters 

6397 ---------- 

6398 datetime : bool, default False 

6399 If True, convert to date where possible. 

6400 numeric : bool, default False 

6401 If True, attempt to convert to numbers (including strings), with 

6402 unconvertible values becoming NaN. 

6403 timedelta : bool, default False 

6404 If True, convert to timedelta where possible. 

6405 

6406 Returns 

6407 ------- 

6408 converted : same as input object 

6409 """ 

6410 validate_bool_kwarg(datetime, "datetime") 

6411 validate_bool_kwarg(numeric, "numeric") 

6412 validate_bool_kwarg(timedelta, "timedelta") 

6413 return self._constructor( 

6414 self._mgr.convert( 

6415 datetime=datetime, 

6416 numeric=numeric, 

6417 timedelta=timedelta, 

6418 copy=True, 

6419 ) 

6420 ).__finalize__(self) 

6421 

6422 @final 

6423 def infer_objects(self: NDFrameT) -> NDFrameT: 

6424 """ 

6425 Attempt to infer better dtypes for object columns. 

6426 

6427 Attempts soft conversion of object-dtyped 

6428 columns, leaving non-object and unconvertible 

6429 columns unchanged. The inference rules are the 

6430 same as during normal Series/DataFrame construction. 

6431 

6432 Returns 

6433 ------- 

6434 converted : same type as input object 

6435 

6436 See Also 

6437 -------- 

6438 to_datetime : Convert argument to datetime. 

6439 to_timedelta : Convert argument to timedelta. 

6440 to_numeric : Convert argument to numeric type. 

6441 convert_dtypes : Convert argument to best possible dtype. 

6442 

6443 Examples 

6444 -------- 

6445 >>> df = pd.DataFrame({"A": ["a", 1, 2, 3]}) 

6446 >>> df = df.iloc[1:] 

6447 >>> df 

6448 A 

6449 1 1 

6450 2 2 

6451 3 3 

6452 

6453 >>> df.dtypes 

6454 A object 

6455 dtype: object 

6456 

6457 >>> df.infer_objects().dtypes 

6458 A int64 

6459 dtype: object 

6460 """ 

6461 # numeric=False necessary to only soft convert; 

6462 # python objects will still be converted to 

6463 # native numpy numeric types 

6464 return self._constructor( 

6465 self._mgr.convert(datetime=True, numeric=False, timedelta=True, copy=True) 

6466 ).__finalize__(self, method="infer_objects") 

6467 

6468 @final 

6469 def convert_dtypes( 

6470 self: NDFrameT, 

6471 infer_objects: bool_t = True, 

6472 convert_string: bool_t = True, 

6473 convert_integer: bool_t = True, 

6474 convert_boolean: bool_t = True, 

6475 convert_floating: bool_t = True, 

6476 ) -> NDFrameT: 

6477 """ 

6478 Convert columns to best possible dtypes using dtypes supporting ``pd.NA``. 

6479 

6480 .. versionadded:: 1.0.0 

6481 

6482 Parameters 

6483 ---------- 

6484 infer_objects : bool, default True 

6485 Whether object dtypes should be converted to the best possible types. 

6486 convert_string : bool, default True 

6487 Whether object dtypes should be converted to ``StringDtype()``. 

6488 convert_integer : bool, default True 

6489 Whether, if possible, conversion can be done to integer extension types. 

6490 convert_boolean : bool, defaults True 

6491 Whether object dtypes should be converted to ``BooleanDtypes()``. 

6492 convert_floating : bool, defaults True 

6493 Whether, if possible, conversion can be done to floating extension types. 

6494 If `convert_integer` is also True, preference will be give to integer 

6495 dtypes if the floats can be faithfully casted to integers. 

6496 

6497 .. versionadded:: 1.2.0 

6498 

6499 Returns 

6500 ------- 

6501 Series or DataFrame 

6502 Copy of input object with new dtype. 

6503 

6504 See Also 

6505 -------- 

6506 infer_objects : Infer dtypes of objects. 

6507 to_datetime : Convert argument to datetime. 

6508 to_timedelta : Convert argument to timedelta. 

6509 to_numeric : Convert argument to a numeric type. 

6510 

6511 Notes 

6512 ----- 

6513 By default, ``convert_dtypes`` will attempt to convert a Series (or each 

6514 Series in a DataFrame) to dtypes that support ``pd.NA``. By using the options 

6515 ``convert_string``, ``convert_integer``, ``convert_boolean`` and 

6516 ``convert_boolean``, it is possible to turn off individual conversions 

6517 to ``StringDtype``, the integer extension types, ``BooleanDtype`` 

6518 or floating extension types, respectively. 

6519 

6520 For object-dtyped columns, if ``infer_objects`` is ``True``, use the inference 

6521 rules as during normal Series/DataFrame construction. Then, if possible, 

6522 convert to ``StringDtype``, ``BooleanDtype`` or an appropriate integer 

6523 or floating extension type, otherwise leave as ``object``. 

6524 

6525 If the dtype is integer, convert to an appropriate integer extension type. 

6526 

6527 If the dtype is numeric, and consists of all integers, convert to an 

6528 appropriate integer extension type. Otherwise, convert to an 

6529 appropriate floating extension type. 

6530 

6531 .. versionchanged:: 1.2 

6532 Starting with pandas 1.2, this method also converts float columns 

6533 to the nullable floating extension type. 

6534 

6535 In the future, as new dtypes are added that support ``pd.NA``, the results 

6536 of this method will change to support those new dtypes. 

6537 

6538 Examples 

6539 -------- 

6540 >>> df = pd.DataFrame( 

6541 ... { 

6542 ... "a": pd.Series([1, 2, 3], dtype=np.dtype("int32")), 

6543 ... "b": pd.Series(["x", "y", "z"], dtype=np.dtype("O")), 

6544 ... "c": pd.Series([True, False, np.nan], dtype=np.dtype("O")), 

6545 ... "d": pd.Series(["h", "i", np.nan], dtype=np.dtype("O")), 

6546 ... "e": pd.Series([10, np.nan, 20], dtype=np.dtype("float")), 

6547 ... "f": pd.Series([np.nan, 100.5, 200], dtype=np.dtype("float")), 

6548 ... } 

6549 ... ) 

6550 

6551 Start with a DataFrame with default dtypes. 

6552 

6553 >>> df 

6554 a b c d e f 

6555 0 1 x True h 10.0 NaN 

6556 1 2 y False i NaN 100.5 

6557 2 3 z NaN NaN 20.0 200.0 

6558 

6559 >>> df.dtypes 

6560 a int32 

6561 b object 

6562 c object 

6563 d object 

6564 e float64 

6565 f float64 

6566 dtype: object 

6567 

6568 Convert the DataFrame to use best possible dtypes. 

6569 

6570 >>> dfn = df.convert_dtypes() 

6571 >>> dfn 

6572 a b c d e f 

6573 0 1 x True h 10 <NA> 

6574 1 2 y False i <NA> 100.5 

6575 2 3 z <NA> <NA> 20 200.0 

6576 

6577 >>> dfn.dtypes 

6578 a Int32 

6579 b string 

6580 c boolean 

6581 d string 

6582 e Int64 

6583 f Float64 

6584 dtype: object 

6585 

6586 Start with a Series of strings and missing data represented by ``np.nan``. 

6587 

6588 >>> s = pd.Series(["a", "b", np.nan]) 

6589 >>> s 

6590 0 a 

6591 1 b 

6592 2 NaN 

6593 dtype: object 

6594 

6595 Obtain a Series with dtype ``StringDtype``. 

6596 

6597 >>> s.convert_dtypes() 

6598 0 a 

6599 1 b 

6600 2 <NA> 

6601 dtype: string 

6602 """ 

6603 if self.ndim == 1: 

6604 return self._convert_dtypes( 

6605 infer_objects, 

6606 convert_string, 

6607 convert_integer, 

6608 convert_boolean, 

6609 convert_floating, 

6610 ) 

6611 else: 

6612 results = [ 

6613 col._convert_dtypes( 

6614 infer_objects, 

6615 convert_string, 

6616 convert_integer, 

6617 convert_boolean, 

6618 convert_floating, 

6619 ) 

6620 for col_name, col in self.items() 

6621 ] 

6622 if len(results) > 0: 

6623 result = concat(results, axis=1, copy=False, keys=self.columns) 

6624 cons = cast(Type["DataFrame"], self._constructor) 

6625 result = cons(result) 

6626 result = result.__finalize__(self, method="convert_dtypes") 

6627 # https://github.com/python/mypy/issues/8354 

6628 return cast(NDFrameT, result) 

6629 else: 

6630 return self.copy() 

6631 

6632 # ---------------------------------------------------------------------- 

6633 # Filling NA's 

6634 

6635 @overload 

6636 def fillna( 

6637 self: NDFrameT, 

6638 value: Hashable | Mapping | Series | DataFrame = ..., 

6639 *, 

6640 method: FillnaOptions | None = ..., 

6641 axis: Axis | None = ..., 

6642 inplace: Literal[False] = ..., 

6643 limit: int | None = ..., 

6644 downcast: dict | None = ..., 

6645 ) -> NDFrameT: 

6646 ... 

6647 

6648 @overload 

6649 def fillna( 

6650 self, 

6651 value: Hashable | Mapping | Series | DataFrame = ..., 

6652 *, 

6653 method: FillnaOptions | None = ..., 

6654 axis: Axis | None = ..., 

6655 inplace: Literal[True], 

6656 limit: int | None = ..., 

6657 downcast: dict | None = ..., 

6658 ) -> None: 

6659 ... 

6660 

6661 @overload 

6662 def fillna( 

6663 self: NDFrameT, 

6664 value: Hashable | Mapping | Series | DataFrame = ..., 

6665 *, 

6666 method: FillnaOptions | None = ..., 

6667 axis: Axis | None = ..., 

6668 inplace: bool_t = ..., 

6669 limit: int | None = ..., 

6670 downcast: dict | None = ..., 

6671 ) -> NDFrameT | None: 

6672 ... 

6673 

6674 @doc(**_shared_doc_kwargs) 

6675 def fillna( 

6676 self: NDFrameT, 

6677 value: Hashable | Mapping | Series | DataFrame = None, 

6678 method: FillnaOptions | None = None, 

6679 axis: Axis | None = None, 

6680 inplace: bool_t = False, 

6681 limit: int | None = None, 

6682 downcast: dict | None = None, 

6683 ) -> NDFrameT | None: 

6684 """ 

6685 Fill NA/NaN values using the specified method. 

6686 

6687 Parameters 

6688 ---------- 

6689 value : scalar, dict, Series, or DataFrame 

6690 Value to use to fill holes (e.g. 0), alternately a 

6691 dict/Series/DataFrame of values specifying which value to use for 

6692 each index (for a Series) or column (for a DataFrame). Values not 

6693 in the dict/Series/DataFrame will not be filled. This value cannot 

6694 be a list. 

6695 method : {{'backfill', 'bfill', 'pad', 'ffill', None}}, default None 

6696 Method to use for filling holes in reindexed Series 

6697 pad / ffill: propagate last valid observation forward to next valid 

6698 backfill / bfill: use next valid observation to fill gap. 

6699 axis : {axes_single_arg} 

6700 Axis along which to fill missing values. For `Series` 

6701 this parameter is unused and defaults to 0. 

6702 inplace : bool, default False 

6703 If True, fill in-place. Note: this will modify any 

6704 other views on this object (e.g., a no-copy slice for a column in a 

6705 DataFrame). 

6706 limit : int, default None 

6707 If method is specified, this is the maximum number of consecutive 

6708 NaN values to forward/backward fill. In other words, if there is 

6709 a gap with more than this number of consecutive NaNs, it will only 

6710 be partially filled. If method is not specified, this is the 

6711 maximum number of entries along the entire axis where NaNs will be 

6712 filled. Must be greater than 0 if not None. 

6713 downcast : dict, default is None 

6714 A dict of item->dtype of what to downcast if possible, 

6715 or the string 'infer' which will try to downcast to an appropriate 

6716 equal type (e.g. float64 to int64 if possible). 

6717 

6718 Returns 

6719 ------- 

6720 {klass} or None 

6721 Object with missing values filled or None if ``inplace=True``. 

6722 

6723 See Also 

6724 -------- 

6725 interpolate : Fill NaN values using interpolation. 

6726 reindex : Conform object to new index. 

6727 asfreq : Convert TimeSeries to specified frequency. 

6728 

6729 Examples 

6730 -------- 

6731 >>> df = pd.DataFrame([[np.nan, 2, np.nan, 0], 

6732 ... [3, 4, np.nan, 1], 

6733 ... [np.nan, np.nan, np.nan, np.nan], 

6734 ... [np.nan, 3, np.nan, 4]], 

6735 ... columns=list("ABCD")) 

6736 >>> df 

6737 A B C D 

6738 0 NaN 2.0 NaN 0.0 

6739 1 3.0 4.0 NaN 1.0 

6740 2 NaN NaN NaN NaN 

6741 3 NaN 3.0 NaN 4.0 

6742 

6743 Replace all NaN elements with 0s. 

6744 

6745 >>> df.fillna(0) 

6746 A B C D 

6747 0 0.0 2.0 0.0 0.0 

6748 1 3.0 4.0 0.0 1.0 

6749 2 0.0 0.0 0.0 0.0 

6750 3 0.0 3.0 0.0 4.0 

6751 

6752 We can also propagate non-null values forward or backward. 

6753 

6754 >>> df.fillna(method="ffill") 

6755 A B C D 

6756 0 NaN 2.0 NaN 0.0 

6757 1 3.0 4.0 NaN 1.0 

6758 2 3.0 4.0 NaN 1.0 

6759 3 3.0 3.0 NaN 4.0 

6760 

6761 Replace all NaN elements in column 'A', 'B', 'C', and 'D', with 0, 1, 

6762 2, and 3 respectively. 

6763 

6764 >>> values = {{"A": 0, "B": 1, "C": 2, "D": 3}} 

6765 >>> df.fillna(value=values) 

6766 A B C D 

6767 0 0.0 2.0 2.0 0.0 

6768 1 3.0 4.0 2.0 1.0 

6769 2 0.0 1.0 2.0 3.0 

6770 3 0.0 3.0 2.0 4.0 

6771 

6772 Only replace the first NaN element. 

6773 

6774 >>> df.fillna(value=values, limit=1) 

6775 A B C D 

6776 0 0.0 2.0 2.0 0.0 

6777 1 3.0 4.0 NaN 1.0 

6778 2 NaN 1.0 NaN 3.0 

6779 3 NaN 3.0 NaN 4.0 

6780 

6781 When filling using a DataFrame, replacement happens along 

6782 the same column names and same indices 

6783 

6784 >>> df2 = pd.DataFrame(np.zeros((4, 4)), columns=list("ABCE")) 

6785 >>> df.fillna(df2) 

6786 A B C D 

6787 0 0.0 2.0 0.0 0.0 

6788 1 3.0 4.0 0.0 1.0 

6789 2 0.0 0.0 0.0 NaN 

6790 3 0.0 3.0 0.0 4.0 

6791 

6792 Note that column D is not affected since it is not present in df2. 

6793 """ 

6794 inplace = validate_bool_kwarg(inplace, "inplace") 

6795 value, method = validate_fillna_kwargs(value, method) 

6796 

6797 self._consolidate_inplace() 

6798 

6799 # set the default here, so functions examining the signaure 

6800 # can detect if something was set (e.g. in groupby) (GH9221) 

6801 if axis is None: 

6802 axis = 0 

6803 axis = self._get_axis_number(axis) 

6804 

6805 if value is None: 

6806 if not self._mgr.is_single_block and axis == 1: 

6807 if inplace: 

6808 raise NotImplementedError() 

6809 result = self.T.fillna(method=method, limit=limit).T 

6810 

6811 return result 

6812 

6813 new_data = self._mgr.interpolate( 

6814 method=method, 

6815 axis=axis, 

6816 limit=limit, 

6817 inplace=inplace, 

6818 downcast=downcast, 

6819 ) 

6820 else: 

6821 if self.ndim == 1: 

6822 if isinstance(value, (dict, ABCSeries)): 

6823 if not len(value): 

6824 # test_fillna_nonscalar 

6825 if inplace: 

6826 return None 

6827 return self.copy() 

6828 value = create_series_with_explicit_dtype( 

6829 value, dtype_if_empty=object 

6830 ) 

6831 value = value.reindex(self.index, copy=False) 

6832 value = value._values 

6833 elif not is_list_like(value): 

6834 pass 

6835 else: 

6836 raise TypeError( 

6837 '"value" parameter must be a scalar, dict ' 

6838 "or Series, but you passed a " 

6839 f'"{type(value).__name__}"' 

6840 ) 

6841 

6842 new_data = self._mgr.fillna( 

6843 value=value, limit=limit, inplace=inplace, downcast=downcast 

6844 ) 

6845 

6846 elif isinstance(value, (dict, ABCSeries)): 

6847 if axis == 1: 

6848 raise NotImplementedError( 

6849 "Currently only can fill " 

6850 "with dict/Series column " 

6851 "by column" 

6852 ) 

6853 

6854 result = self if inplace else self.copy() 

6855 is_dict = isinstance(downcast, dict) 

6856 for k, v in value.items(): 

6857 if k not in result: 

6858 continue 

6859 

6860 # error: Item "None" of "Optional[Dict[Any, Any]]" has no 

6861 # attribute "get" 

6862 downcast_k = ( 

6863 downcast 

6864 if not is_dict 

6865 else downcast.get(k) # type: ignore[union-attr] 

6866 ) 

6867 

6868 res_k = result[k].fillna(v, limit=limit, downcast=downcast_k) 

6869 

6870 if not inplace: 

6871 result[k] = res_k 

6872 else: 

6873 # We can write into our existing column(s) iff dtype 

6874 # was preserved. 

6875 if isinstance(res_k, ABCSeries): 

6876 # i.e. 'k' only shows up once in self.columns 

6877 if res_k.dtype == result[k].dtype: 

6878 result.loc[:, k] = res_k 

6879 else: 

6880 # Different dtype -> no way to do inplace. 

6881 result[k] = res_k 

6882 else: 

6883 # see test_fillna_dict_inplace_nonunique_columns 

6884 locs = result.columns.get_loc(k) 

6885 if isinstance(locs, slice): 

6886 locs = np.arange(self.shape[1])[locs] 

6887 elif ( 

6888 isinstance(locs, np.ndarray) and locs.dtype.kind == "b" 

6889 ): 

6890 locs = locs.nonzero()[0] 

6891 elif not ( 

6892 isinstance(locs, np.ndarray) and locs.dtype.kind == "i" 

6893 ): 

6894 # Should never be reached, but let's cover our bases 

6895 raise NotImplementedError( 

6896 "Unexpected get_loc result, please report a bug at " 

6897 "https://github.com/pandas-dev/pandas" 

6898 ) 

6899 

6900 for i, loc in enumerate(locs): 

6901 res_loc = res_k.iloc[:, i] 

6902 target = self.iloc[:, loc] 

6903 

6904 if res_loc.dtype == target.dtype: 

6905 result.iloc[:, loc] = res_loc 

6906 else: 

6907 result.isetitem(loc, res_loc) 

6908 

6909 return result if not inplace else None 

6910 

6911 elif not is_list_like(value): 

6912 if axis == 1: 

6913 

6914 result = self.T.fillna(value=value, limit=limit).T 

6915 

6916 # error: Incompatible types in assignment (expression has type 

6917 # "NDFrameT", variable has type "Union[ArrayManager, 

6918 # SingleArrayManager, BlockManager, SingleBlockManager]") 

6919 new_data = result # type: ignore[assignment] 

6920 else: 

6921 

6922 new_data = self._mgr.fillna( 

6923 value=value, limit=limit, inplace=inplace, downcast=downcast 

6924 ) 

6925 elif isinstance(value, ABCDataFrame) and self.ndim == 2: 

6926 

6927 new_data = self.where(self.notna(), value)._mgr 

6928 else: 

6929 raise ValueError(f"invalid fill value with a {type(value)}") 

6930 

6931 result = self._constructor(new_data) 

6932 if inplace: 

6933 return self._update_inplace(result) 

6934 else: 

6935 return result.__finalize__(self, method="fillna") 

6936 

6937 @overload 

6938 def ffill( 

6939 self: NDFrameT, 

6940 *, 

6941 axis: None | Axis = ..., 

6942 inplace: Literal[False] = ..., 

6943 limit: None | int = ..., 

6944 downcast: dict | None = ..., 

6945 ) -> NDFrameT: 

6946 ... 

6947 

6948 @overload 

6949 def ffill( 

6950 self, 

6951 *, 

6952 axis: None | Axis = ..., 

6953 inplace: Literal[True], 

6954 limit: None | int = ..., 

6955 downcast: dict | None = ..., 

6956 ) -> None: 

6957 ... 

6958 

6959 @overload 

6960 def ffill( 

6961 self: NDFrameT, 

6962 *, 

6963 axis: None | Axis = ..., 

6964 inplace: bool_t = ..., 

6965 limit: None | int = ..., 

6966 downcast: dict | None = ..., 

6967 ) -> NDFrameT | None: 

6968 ... 

6969 

6970 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) 

6971 @doc(klass=_shared_doc_kwargs["klass"]) 

6972 def ffill( 

6973 self: NDFrameT, 

6974 axis: None | Axis = None, 

6975 inplace: bool_t = False, 

6976 limit: None | int = None, 

6977 downcast: dict | None = None, 

6978 ) -> NDFrameT | None: 

6979 """ 

6980 Synonym for :meth:`DataFrame.fillna` with ``method='ffill'``. 

6981 

6982 Returns 

6983 ------- 

6984 {klass} or None 

6985 Object with missing values filled or None if ``inplace=True``. 

6986 """ 

6987 return self.fillna( 

6988 method="ffill", axis=axis, inplace=inplace, limit=limit, downcast=downcast 

6989 ) 

6990 

6991 pad = ffill 

6992 

6993 @overload 

6994 def bfill( 

6995 self: NDFrameT, 

6996 *, 

6997 axis: None | Axis = ..., 

6998 inplace: Literal[False] = ..., 

6999 limit: None | int = ..., 

7000 downcast: dict | None = ..., 

7001 ) -> NDFrameT: 

7002 ... 

7003 

7004 @overload 

7005 def bfill( 

7006 self, 

7007 *, 

7008 axis: None | Axis = ..., 

7009 inplace: Literal[True], 

7010 limit: None | int = ..., 

7011 downcast: dict | None = ..., 

7012 ) -> None: 

7013 ... 

7014 

7015 @overload 

7016 def bfill( 

7017 self: NDFrameT, 

7018 *, 

7019 axis: None | Axis = ..., 

7020 inplace: bool_t = ..., 

7021 limit: None | int = ..., 

7022 downcast: dict | None = ..., 

7023 ) -> NDFrameT | None: 

7024 ... 

7025 

7026 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) 

7027 @doc(klass=_shared_doc_kwargs["klass"]) 

7028 def bfill( 

7029 self: NDFrameT, 

7030 axis: None | Axis = None, 

7031 inplace: bool_t = False, 

7032 limit: None | int = None, 

7033 downcast: dict | None = None, 

7034 ) -> NDFrameT | None: 

7035 """ 

7036 Synonym for :meth:`DataFrame.fillna` with ``method='bfill'``. 

7037 

7038 Returns 

7039 ------- 

7040 {klass} or None 

7041 Object with missing values filled or None if ``inplace=True``. 

7042 """ 

7043 return self.fillna( 

7044 method="bfill", axis=axis, inplace=inplace, limit=limit, downcast=downcast 

7045 ) 

7046 

7047 backfill = bfill 

7048 

7049 @overload 

7050 def replace( 

7051 self: NDFrameT, 

7052 to_replace=..., 

7053 value=..., 

7054 *, 

7055 inplace: Literal[False] = ..., 

7056 limit: int | None = ..., 

7057 regex: bool_t = ..., 

7058 method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = ..., 

7059 ) -> NDFrameT: 

7060 ... 

7061 

7062 @overload 

7063 def replace( 

7064 self, 

7065 to_replace=..., 

7066 value=..., 

7067 *, 

7068 inplace: Literal[True], 

7069 limit: int | None = ..., 

7070 regex: bool_t = ..., 

7071 method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = ..., 

7072 ) -> None: 

7073 ... 

7074 

7075 @overload 

7076 def replace( 

7077 self: NDFrameT, 

7078 to_replace=..., 

7079 value=..., 

7080 *, 

7081 inplace: bool_t = ..., 

7082 limit: int | None = ..., 

7083 regex: bool_t = ..., 

7084 method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = ..., 

7085 ) -> NDFrameT | None: 

7086 ... 

7087 

7088 @deprecate_nonkeyword_arguments( 

7089 version=None, allowed_args=["self", "to_replace", "value"] 

7090 ) 

7091 @doc( 

7092 _shared_docs["replace"], 

7093 klass=_shared_doc_kwargs["klass"], 

7094 inplace=_shared_doc_kwargs["inplace"], 

7095 replace_iloc=_shared_doc_kwargs["replace_iloc"], 

7096 ) 

7097 def replace( 

7098 self: NDFrameT, 

7099 to_replace=None, 

7100 value=lib.no_default, 

7101 inplace: bool_t = False, 

7102 limit: int | None = None, 

7103 regex: bool_t = False, 

7104 method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = lib.no_default, 

7105 ) -> NDFrameT | None: 

7106 if not ( 

7107 is_scalar(to_replace) 

7108 or is_re_compilable(to_replace) 

7109 or is_list_like(to_replace) 

7110 ): 

7111 raise TypeError( 

7112 "Expecting 'to_replace' to be either a scalar, array-like, " 

7113 "dict or None, got invalid type " 

7114 f"{repr(type(to_replace).__name__)}" 

7115 ) 

7116 

7117 inplace = validate_bool_kwarg(inplace, "inplace") 

7118 if not is_bool(regex) and to_replace is not None: 

7119 raise ValueError("'to_replace' must be 'None' if 'regex' is not a bool") 

7120 

7121 self._consolidate_inplace() 

7122 

7123 if value is lib.no_default or method is not lib.no_default: 

7124 # GH#36984 if the user explicitly passes value=None we want to 

7125 # respect that. We have the corner case where the user explicitly 

7126 # passes value=None *and* a method, which we interpret as meaning 

7127 # they want the (documented) default behavior. 

7128 if method is lib.no_default: 

7129 # TODO: get this to show up as the default in the docs? 

7130 method = "pad" 

7131 

7132 # passing a single value that is scalar like 

7133 # when value is None (GH5319), for compat 

7134 if not is_dict_like(to_replace) and not is_dict_like(regex): 

7135 to_replace = [to_replace] 

7136 

7137 if isinstance(to_replace, (tuple, list)): 

7138 if isinstance(self, ABCDataFrame): 

7139 from pandas import Series 

7140 

7141 result = self.apply( 

7142 Series._replace_single, 

7143 args=(to_replace, method, inplace, limit), 

7144 ) 

7145 if inplace: 

7146 return None 

7147 return result 

7148 return self._replace_single(to_replace, method, inplace, limit) 

7149 

7150 if not is_dict_like(to_replace): 

7151 if not is_dict_like(regex): 

7152 raise TypeError( 

7153 'If "to_replace" and "value" are both None ' 

7154 'and "to_replace" is not a list, then ' 

7155 "regex must be a mapping" 

7156 ) 

7157 to_replace = regex 

7158 regex = True 

7159 

7160 items = list(to_replace.items()) 

7161 if items: 

7162 keys, values = zip(*items) 

7163 else: 

7164 keys, values = ([], []) 

7165 

7166 are_mappings = [is_dict_like(v) for v in values] 

7167 

7168 if any(are_mappings): 

7169 if not all(are_mappings): 

7170 raise TypeError( 

7171 "If a nested mapping is passed, all values " 

7172 "of the top level mapping must be mappings" 

7173 ) 

7174 # passed a nested dict/Series 

7175 to_rep_dict = {} 

7176 value_dict = {} 

7177 

7178 for k, v in items: 

7179 keys, values = list(zip(*v.items())) or ([], []) 

7180 

7181 to_rep_dict[k] = list(keys) 

7182 value_dict[k] = list(values) 

7183 

7184 to_replace, value = to_rep_dict, value_dict 

7185 else: 

7186 to_replace, value = keys, values 

7187 

7188 return self.replace( 

7189 to_replace, value, inplace=inplace, limit=limit, regex=regex 

7190 ) 

7191 else: 

7192 

7193 # need a non-zero len on all axes 

7194 if not self.size: 

7195 if inplace: 

7196 return None 

7197 return self.copy() 

7198 

7199 if is_dict_like(to_replace): 

7200 if is_dict_like(value): # {'A' : NA} -> {'A' : 0} 

7201 # Note: Checking below for `in foo.keys()` instead of 

7202 # `in foo` is needed for when we have a Series and not dict 

7203 mapping = { 

7204 col: (to_replace[col], value[col]) 

7205 for col in to_replace.keys() 

7206 if col in value.keys() and col in self 

7207 } 

7208 return self._replace_columnwise(mapping, inplace, regex) 

7209 

7210 # {'A': NA} -> 0 

7211 elif not is_list_like(value): 

7212 # Operate column-wise 

7213 if self.ndim == 1: 

7214 raise ValueError( 

7215 "Series.replace cannot use dict-like to_replace " 

7216 "and non-None value" 

7217 ) 

7218 mapping = { 

7219 col: (to_rep, value) for col, to_rep in to_replace.items() 

7220 } 

7221 return self._replace_columnwise(mapping, inplace, regex) 

7222 else: 

7223 raise TypeError("value argument must be scalar, dict, or Series") 

7224 

7225 elif is_list_like(to_replace): 

7226 if not is_list_like(value): 

7227 # e.g. to_replace = [NA, ''] and value is 0, 

7228 # so we replace NA with 0 and then replace '' with 0 

7229 value = [value] * len(to_replace) 

7230 

7231 # e.g. we have to_replace = [NA, ''] and value = [0, 'missing'] 

7232 if len(to_replace) != len(value): 

7233 raise ValueError( 

7234 f"Replacement lists must match in length. " 

7235 f"Expecting {len(to_replace)} got {len(value)} " 

7236 ) 

7237 new_data = self._mgr.replace_list( 

7238 src_list=to_replace, 

7239 dest_list=value, 

7240 inplace=inplace, 

7241 regex=regex, 

7242 ) 

7243 

7244 elif to_replace is None: 

7245 if not ( 

7246 is_re_compilable(regex) 

7247 or is_list_like(regex) 

7248 or is_dict_like(regex) 

7249 ): 

7250 raise TypeError( 

7251 f"'regex' must be a string or a compiled regular expression " 

7252 f"or a list or dict of strings or regular expressions, " 

7253 f"you passed a {repr(type(regex).__name__)}" 

7254 ) 

7255 return self.replace( 

7256 regex, value, inplace=inplace, limit=limit, regex=True 

7257 ) 

7258 else: 

7259 

7260 # dest iterable dict-like 

7261 if is_dict_like(value): # NA -> {'A' : 0, 'B' : -1} 

7262 # Operate column-wise 

7263 if self.ndim == 1: 

7264 raise ValueError( 

7265 "Series.replace cannot use dict-value and " 

7266 "non-None to_replace" 

7267 ) 

7268 mapping = {col: (to_replace, val) for col, val in value.items()} 

7269 return self._replace_columnwise(mapping, inplace, regex) 

7270 

7271 elif not is_list_like(value): # NA -> 0 

7272 regex = should_use_regex(regex, to_replace) 

7273 if regex: 

7274 new_data = self._mgr.replace_regex( 

7275 to_replace=to_replace, 

7276 value=value, 

7277 inplace=inplace, 

7278 ) 

7279 else: 

7280 new_data = self._mgr.replace( 

7281 to_replace=to_replace, value=value, inplace=inplace 

7282 ) 

7283 else: 

7284 raise TypeError( 

7285 f'Invalid "to_replace" type: {repr(type(to_replace).__name__)}' 

7286 ) 

7287 

7288 result = self._constructor(new_data) 

7289 if inplace: 

7290 return self._update_inplace(result) 

7291 else: 

7292 return result.__finalize__(self, method="replace") 

7293 

7294 def interpolate( 

7295 self: NDFrameT, 

7296 method: str = "linear", 

7297 axis: Axis = 0, 

7298 limit: int | None = None, 

7299 inplace: bool_t = False, 

7300 limit_direction: str | None = None, 

7301 limit_area: str | None = None, 

7302 downcast: str | None = None, 

7303 **kwargs, 

7304 ) -> NDFrameT | None: 

7305 """ 

7306 Fill NaN values using an interpolation method. 

7307 

7308 Please note that only ``method='linear'`` is supported for 

7309 DataFrame/Series with a MultiIndex. 

7310 

7311 Parameters 

7312 ---------- 

7313 method : str, default 'linear' 

7314 Interpolation technique to use. One of: 

7315 

7316 * 'linear': Ignore the index and treat the values as equally 

7317 spaced. This is the only method supported on MultiIndexes. 

7318 * 'time': Works on daily and higher resolution data to interpolate 

7319 given length of interval. 

7320 * 'index', 'values': use the actual numerical values of the index. 

7321 * 'pad': Fill in NaNs using existing values. 

7322 * 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'spline', 

7323 'barycentric', 'polynomial': Passed to 

7324 `scipy.interpolate.interp1d`. These methods use the numerical 

7325 values of the index. Both 'polynomial' and 'spline' require that 

7326 you also specify an `order` (int), e.g. 

7327 ``df.interpolate(method='polynomial', order=5)``. 

7328 * 'krogh', 'piecewise_polynomial', 'spline', 'pchip', 'akima', 

7329 'cubicspline': Wrappers around the SciPy interpolation methods of 

7330 similar names. See `Notes`. 

7331 * 'from_derivatives': Refers to 

7332 `scipy.interpolate.BPoly.from_derivatives` which 

7333 replaces 'piecewise_polynomial' interpolation method in 

7334 scipy 0.18. 

7335 

7336 axis : {{0 or 'index', 1 or 'columns', None}}, default None 

7337 Axis to interpolate along. For `Series` this parameter is unused 

7338 and defaults to 0. 

7339 limit : int, optional 

7340 Maximum number of consecutive NaNs to fill. Must be greater than 

7341 0. 

7342 inplace : bool, default False 

7343 Update the data in place if possible. 

7344 limit_direction : {{'forward', 'backward', 'both'}}, Optional 

7345 Consecutive NaNs will be filled in this direction. 

7346 

7347 If limit is specified: 

7348 * If 'method' is 'pad' or 'ffill', 'limit_direction' must be 'forward'. 

7349 * If 'method' is 'backfill' or 'bfill', 'limit_direction' must be 

7350 'backwards'. 

7351 

7352 If 'limit' is not specified: 

7353 * If 'method' is 'backfill' or 'bfill', the default is 'backward' 

7354 * else the default is 'forward' 

7355 

7356 .. versionchanged:: 1.1.0 

7357 raises ValueError if `limit_direction` is 'forward' or 'both' and 

7358 method is 'backfill' or 'bfill'. 

7359 raises ValueError if `limit_direction` is 'backward' or 'both' and 

7360 method is 'pad' or 'ffill'. 

7361 

7362 limit_area : {{`None`, 'inside', 'outside'}}, default None 

7363 If limit is specified, consecutive NaNs will be filled with this 

7364 restriction. 

7365 

7366 * ``None``: No fill restriction. 

7367 * 'inside': Only fill NaNs surrounded by valid values 

7368 (interpolate). 

7369 * 'outside': Only fill NaNs outside valid values (extrapolate). 

7370 

7371 downcast : optional, 'infer' or None, defaults to None 

7372 Downcast dtypes if possible. 

7373 ``**kwargs`` : optional 

7374 Keyword arguments to pass on to the interpolating function. 

7375 

7376 Returns 

7377 ------- 

7378 Series or DataFrame or None 

7379 Returns the same object type as the caller, interpolated at 

7380 some or all ``NaN`` values or None if ``inplace=True``. 

7381 

7382 See Also 

7383 -------- 

7384 fillna : Fill missing values using different methods. 

7385 scipy.interpolate.Akima1DInterpolator : Piecewise cubic polynomials 

7386 (Akima interpolator). 

7387 scipy.interpolate.BPoly.from_derivatives : Piecewise polynomial in the 

7388 Bernstein basis. 

7389 scipy.interpolate.interp1d : Interpolate a 1-D function. 

7390 scipy.interpolate.KroghInterpolator : Interpolate polynomial (Krogh 

7391 interpolator). 

7392 scipy.interpolate.PchipInterpolator : PCHIP 1-d monotonic cubic 

7393 interpolation. 

7394 scipy.interpolate.CubicSpline : Cubic spline data interpolator. 

7395 

7396 Notes 

7397 ----- 

7398 The 'krogh', 'piecewise_polynomial', 'spline', 'pchip' and 'akima' 

7399 methods are wrappers around the respective SciPy implementations of 

7400 similar names. These use the actual numerical values of the index. 

7401 For more information on their behavior, see the 

7402 `SciPy documentation 

7403 <https://docs.scipy.org/doc/scipy/reference/interpolate.html#univariate-interpolation>`__. 

7404 

7405 Examples 

7406 -------- 

7407 Filling in ``NaN`` in a :class:`~pandas.Series` via linear 

7408 interpolation. 

7409 

7410 >>> s = pd.Series([0, 1, np.nan, 3]) 

7411 >>> s 

7412 0 0.0 

7413 1 1.0 

7414 2 NaN 

7415 3 3.0 

7416 dtype: float64 

7417 >>> s.interpolate() 

7418 0 0.0 

7419 1 1.0 

7420 2 2.0 

7421 3 3.0 

7422 dtype: float64 

7423 

7424 Filling in ``NaN`` in a Series by padding, but filling at most two 

7425 consecutive ``NaN`` at a time. 

7426 

7427 >>> s = pd.Series([np.nan, "single_one", np.nan, 

7428 ... "fill_two_more", np.nan, np.nan, np.nan, 

7429 ... 4.71, np.nan]) 

7430 >>> s 

7431 0 NaN 

7432 1 single_one 

7433 2 NaN 

7434 3 fill_two_more 

7435 4 NaN 

7436 5 NaN 

7437 6 NaN 

7438 7 4.71 

7439 8 NaN 

7440 dtype: object 

7441 >>> s.interpolate(method='pad', limit=2) 

7442 0 NaN 

7443 1 single_one 

7444 2 single_one 

7445 3 fill_two_more 

7446 4 fill_two_more 

7447 5 fill_two_more 

7448 6 NaN 

7449 7 4.71 

7450 8 4.71 

7451 dtype: object 

7452 

7453 Filling in ``NaN`` in a Series via polynomial interpolation or splines: 

7454 Both 'polynomial' and 'spline' methods require that you also specify 

7455 an ``order`` (int). 

7456 

7457 >>> s = pd.Series([0, 2, np.nan, 8]) 

7458 >>> s.interpolate(method='polynomial', order=2) 

7459 0 0.000000 

7460 1 2.000000 

7461 2 4.666667 

7462 3 8.000000 

7463 dtype: float64 

7464 

7465 Fill the DataFrame forward (that is, going down) along each column 

7466 using linear interpolation. 

7467 

7468 Note how the last entry in column 'a' is interpolated differently, 

7469 because there is no entry after it to use for interpolation. 

7470 Note how the first entry in column 'b' remains ``NaN``, because there 

7471 is no entry before it to use for interpolation. 

7472 

7473 >>> df = pd.DataFrame([(0.0, np.nan, -1.0, 1.0), 

7474 ... (np.nan, 2.0, np.nan, np.nan), 

7475 ... (2.0, 3.0, np.nan, 9.0), 

7476 ... (np.nan, 4.0, -4.0, 16.0)], 

7477 ... columns=list('abcd')) 

7478 >>> df 

7479 a b c d 

7480 0 0.0 NaN -1.0 1.0 

7481 1 NaN 2.0 NaN NaN 

7482 2 2.0 3.0 NaN 9.0 

7483 3 NaN 4.0 -4.0 16.0 

7484 >>> df.interpolate(method='linear', limit_direction='forward', axis=0) 

7485 a b c d 

7486 0 0.0 NaN -1.0 1.0 

7487 1 1.0 2.0 -2.0 5.0 

7488 2 2.0 3.0 -3.0 9.0 

7489 3 2.0 4.0 -4.0 16.0 

7490 

7491 Using polynomial interpolation. 

7492 

7493 >>> df['d'].interpolate(method='polynomial', order=2) 

7494 0 1.0 

7495 1 4.0 

7496 2 9.0 

7497 3 16.0 

7498 Name: d, dtype: float64 

7499 """ 

7500 inplace = validate_bool_kwarg(inplace, "inplace") 

7501 

7502 axis = self._get_axis_number(axis) 

7503 

7504 fillna_methods = ["ffill", "bfill", "pad", "backfill"] 

7505 should_transpose = axis == 1 and method not in fillna_methods 

7506 

7507 obj = self.T if should_transpose else self 

7508 

7509 if obj.empty: 

7510 return self.copy() 

7511 

7512 if method not in fillna_methods: 

7513 axis = self._info_axis_number 

7514 

7515 if isinstance(obj.index, MultiIndex) and method != "linear": 

7516 raise ValueError( 

7517 "Only `method=linear` interpolation is supported on MultiIndexes." 

7518 ) 

7519 

7520 # Set `limit_direction` depending on `method` 

7521 if limit_direction is None: 

7522 limit_direction = ( 

7523 "backward" if method in ("backfill", "bfill") else "forward" 

7524 ) 

7525 else: 

7526 if method in ("pad", "ffill") and limit_direction != "forward": 

7527 raise ValueError( 

7528 f"`limit_direction` must be 'forward' for method `{method}`" 

7529 ) 

7530 if method in ("backfill", "bfill") and limit_direction != "backward": 

7531 raise ValueError( 

7532 f"`limit_direction` must be 'backward' for method `{method}`" 

7533 ) 

7534 

7535 if obj.ndim == 2 and np.all(obj.dtypes == np.dtype("object")): 

7536 raise TypeError( 

7537 "Cannot interpolate with all object-dtype columns " 

7538 "in the DataFrame. Try setting at least one " 

7539 "column to a numeric dtype." 

7540 ) 

7541 

7542 # create/use the index 

7543 if method == "linear": 

7544 # prior default 

7545 index = Index(np.arange(len(obj.index))) 

7546 else: 

7547 index = obj.index 

7548 methods = {"index", "values", "nearest", "time"} 

7549 is_numeric_or_datetime = ( 

7550 is_numeric_dtype(index.dtype) 

7551 or is_datetime64_any_dtype(index.dtype) 

7552 or is_timedelta64_dtype(index.dtype) 

7553 ) 

7554 if method not in methods and not is_numeric_or_datetime: 

7555 raise ValueError( 

7556 "Index column must be numeric or datetime type when " 

7557 f"using {method} method other than linear. " 

7558 "Try setting a numeric or datetime index column before " 

7559 "interpolating." 

7560 ) 

7561 

7562 if isna(index).any(): 

7563 raise NotImplementedError( 

7564 "Interpolation with NaNs in the index " 

7565 "has not been implemented. Try filling " 

7566 "those NaNs before interpolating." 

7567 ) 

7568 new_data = obj._mgr.interpolate( 

7569 method=method, 

7570 axis=axis, 

7571 index=index, 

7572 limit=limit, 

7573 limit_direction=limit_direction, 

7574 limit_area=limit_area, 

7575 inplace=inplace, 

7576 downcast=downcast, 

7577 **kwargs, 

7578 ) 

7579 

7580 result = self._constructor(new_data) 

7581 if should_transpose: 

7582 result = result.T 

7583 if inplace: 

7584 return self._update_inplace(result) 

7585 else: 

7586 return result.__finalize__(self, method="interpolate") 

7587 

7588 # ---------------------------------------------------------------------- 

7589 # Timeseries methods Methods 

7590 

7591 @final 

7592 def asof(self, where, subset=None): 

7593 """ 

7594 Return the last row(s) without any NaNs before `where`. 

7595 

7596 The last row (for each element in `where`, if list) without any 

7597 NaN is taken. 

7598 In case of a :class:`~pandas.DataFrame`, the last row without NaN 

7599 considering only the subset of columns (if not `None`) 

7600 

7601 If there is no good value, NaN is returned for a Series or 

7602 a Series of NaN values for a DataFrame 

7603 

7604 Parameters 

7605 ---------- 

7606 where : date or array-like of dates 

7607 Date(s) before which the last row(s) are returned. 

7608 subset : str or array-like of str, default `None` 

7609 For DataFrame, if not `None`, only use these columns to 

7610 check for NaNs. 

7611 

7612 Returns 

7613 ------- 

7614 scalar, Series, or DataFrame 

7615 

7616 The return can be: 

7617 

7618 * scalar : when `self` is a Series and `where` is a scalar 

7619 * Series: when `self` is a Series and `where` is an array-like, 

7620 or when `self` is a DataFrame and `where` is a scalar 

7621 * DataFrame : when `self` is a DataFrame and `where` is an 

7622 array-like 

7623 

7624 Return scalar, Series, or DataFrame. 

7625 

7626 See Also 

7627 -------- 

7628 merge_asof : Perform an asof merge. Similar to left join. 

7629 

7630 Notes 

7631 ----- 

7632 Dates are assumed to be sorted. Raises if this is not the case. 

7633 

7634 Examples 

7635 -------- 

7636 A Series and a scalar `where`. 

7637 

7638 >>> s = pd.Series([1, 2, np.nan, 4], index=[10, 20, 30, 40]) 

7639 >>> s 

7640 10 1.0 

7641 20 2.0 

7642 30 NaN 

7643 40 4.0 

7644 dtype: float64 

7645 

7646 >>> s.asof(20) 

7647 2.0 

7648 

7649 For a sequence `where`, a Series is returned. The first value is 

7650 NaN, because the first element of `where` is before the first 

7651 index value. 

7652 

7653 >>> s.asof([5, 20]) 

7654 5 NaN 

7655 20 2.0 

7656 dtype: float64 

7657 

7658 Missing values are not considered. The following is ``2.0``, not 

7659 NaN, even though NaN is at the index location for ``30``. 

7660 

7661 >>> s.asof(30) 

7662 2.0 

7663 

7664 Take all columns into consideration 

7665 

7666 >>> df = pd.DataFrame({'a': [10, 20, 30, 40, 50], 

7667 ... 'b': [None, None, None, None, 500]}, 

7668 ... index=pd.DatetimeIndex(['2018-02-27 09:01:00', 

7669 ... '2018-02-27 09:02:00', 

7670 ... '2018-02-27 09:03:00', 

7671 ... '2018-02-27 09:04:00', 

7672 ... '2018-02-27 09:05:00'])) 

7673 >>> df.asof(pd.DatetimeIndex(['2018-02-27 09:03:30', 

7674 ... '2018-02-27 09:04:30'])) 

7675 a b 

7676 2018-02-27 09:03:30 NaN NaN 

7677 2018-02-27 09:04:30 NaN NaN 

7678 

7679 Take a single column into consideration 

7680 

7681 >>> df.asof(pd.DatetimeIndex(['2018-02-27 09:03:30', 

7682 ... '2018-02-27 09:04:30']), 

7683 ... subset=['a']) 

7684 a b 

7685 2018-02-27 09:03:30 30 NaN 

7686 2018-02-27 09:04:30 40 NaN 

7687 """ 

7688 if isinstance(where, str): 

7689 where = Timestamp(where) 

7690 

7691 if not self.index.is_monotonic_increasing: 

7692 raise ValueError("asof requires a sorted index") 

7693 

7694 is_series = isinstance(self, ABCSeries) 

7695 if is_series: 

7696 if subset is not None: 

7697 raise ValueError("subset is not valid for Series") 

7698 else: 

7699 if subset is None: 

7700 subset = self.columns 

7701 if not is_list_like(subset): 

7702 subset = [subset] 

7703 

7704 is_list = is_list_like(where) 

7705 if not is_list: 

7706 start = self.index[0] 

7707 if isinstance(self.index, PeriodIndex): 

7708 where = Period(where, freq=self.index.freq) 

7709 

7710 if where < start: 

7711 if not is_series: 

7712 return self._constructor_sliced( 

7713 index=self.columns, name=where, dtype=np.float64 

7714 ) 

7715 return np.nan 

7716 

7717 # It's always much faster to use a *while* loop here for 

7718 # Series than pre-computing all the NAs. However a 

7719 # *while* loop is extremely expensive for DataFrame 

7720 # so we later pre-compute all the NAs and use the same 

7721 # code path whether *where* is a scalar or list. 

7722 # See PR: https://github.com/pandas-dev/pandas/pull/14476 

7723 if is_series: 

7724 loc = self.index.searchsorted(where, side="right") 

7725 if loc > 0: 

7726 loc -= 1 

7727 

7728 values = self._values 

7729 while loc > 0 and isna(values[loc]): 

7730 loc -= 1 

7731 return values[loc] 

7732 

7733 if not isinstance(where, Index): 

7734 where = Index(where) if is_list else Index([where]) 

7735 

7736 nulls = self.isna() if is_series else self[subset].isna().any(axis=1) 

7737 if nulls.all(): 

7738 if is_series: 

7739 self = cast("Series", self) 

7740 return self._constructor(np.nan, index=where, name=self.name) 

7741 elif is_list: 

7742 self = cast("DataFrame", self) 

7743 return self._constructor(np.nan, index=where, columns=self.columns) 

7744 else: 

7745 self = cast("DataFrame", self) 

7746 return self._constructor_sliced( 

7747 np.nan, index=self.columns, name=where[0] 

7748 ) 

7749 

7750 locs = self.index.asof_locs(where, ~(nulls._values)) 

7751 

7752 # mask the missing 

7753 missing = locs == -1 

7754 data = self.take(locs) 

7755 data.index = where 

7756 if missing.any(): 

7757 # GH#16063 only do this setting when necessary, otherwise 

7758 # we'd cast e.g. bools to floats 

7759 data.loc[missing] = np.nan 

7760 return data if is_list else data.iloc[-1] 

7761 

7762 # ---------------------------------------------------------------------- 

7763 # Action Methods 

7764 

7765 @doc(klass=_shared_doc_kwargs["klass"]) 

7766 def isna(self: NDFrameT) -> NDFrameT: 

7767 """ 

7768 Detect missing values. 

7769 

7770 Return a boolean same-sized object indicating if the values are NA. 

7771 NA values, such as None or :attr:`numpy.NaN`, gets mapped to True 

7772 values. 

7773 Everything else gets mapped to False values. Characters such as empty 

7774 strings ``''`` or :attr:`numpy.inf` are not considered NA values 

7775 (unless you set ``pandas.options.mode.use_inf_as_na = True``). 

7776 

7777 Returns 

7778 ------- 

7779 {klass} 

7780 Mask of bool values for each element in {klass} that 

7781 indicates whether an element is an NA value. 

7782 

7783 See Also 

7784 -------- 

7785 {klass}.isnull : Alias of isna. 

7786 {klass}.notna : Boolean inverse of isna. 

7787 {klass}.dropna : Omit axes labels with missing values. 

7788 isna : Top-level isna. 

7789 

7790 Examples 

7791 -------- 

7792 Show which entries in a DataFrame are NA. 

7793 

7794 >>> df = pd.DataFrame(dict(age=[5, 6, np.NaN], 

7795 ... born=[pd.NaT, pd.Timestamp('1939-05-27'), 

7796 ... pd.Timestamp('1940-04-25')], 

7797 ... name=['Alfred', 'Batman', ''], 

7798 ... toy=[None, 'Batmobile', 'Joker'])) 

7799 >>> df 

7800 age born name toy 

7801 0 5.0 NaT Alfred None 

7802 1 6.0 1939-05-27 Batman Batmobile 

7803 2 NaN 1940-04-25 Joker 

7804 

7805 >>> df.isna() 

7806 age born name toy 

7807 0 False True False True 

7808 1 False False False False 

7809 2 True False False False 

7810 

7811 Show which entries in a Series are NA. 

7812 

7813 >>> ser = pd.Series([5, 6, np.NaN]) 

7814 >>> ser 

7815 0 5.0 

7816 1 6.0 

7817 2 NaN 

7818 dtype: float64 

7819 

7820 >>> ser.isna() 

7821 0 False 

7822 1 False 

7823 2 True 

7824 dtype: bool 

7825 """ 

7826 return isna(self).__finalize__(self, method="isna") 

7827 

7828 @doc(isna, klass=_shared_doc_kwargs["klass"]) 

7829 def isnull(self: NDFrameT) -> NDFrameT: 

7830 return isna(self).__finalize__(self, method="isnull") 

7831 

7832 @doc(klass=_shared_doc_kwargs["klass"]) 

7833 def notna(self: NDFrameT) -> NDFrameT: 

7834 """ 

7835 Detect existing (non-missing) values. 

7836 

7837 Return a boolean same-sized object indicating if the values are not NA. 

7838 Non-missing values get mapped to True. Characters such as empty 

7839 strings ``''`` or :attr:`numpy.inf` are not considered NA values 

7840 (unless you set ``pandas.options.mode.use_inf_as_na = True``). 

7841 NA values, such as None or :attr:`numpy.NaN`, get mapped to False 

7842 values. 

7843 

7844 Returns 

7845 ------- 

7846 {klass} 

7847 Mask of bool values for each element in {klass} that 

7848 indicates whether an element is not an NA value. 

7849 

7850 See Also 

7851 -------- 

7852 {klass}.notnull : Alias of notna. 

7853 {klass}.isna : Boolean inverse of notna. 

7854 {klass}.dropna : Omit axes labels with missing values. 

7855 notna : Top-level notna. 

7856 

7857 Examples 

7858 -------- 

7859 Show which entries in a DataFrame are not NA. 

7860 

7861 >>> df = pd.DataFrame(dict(age=[5, 6, np.NaN], 

7862 ... born=[pd.NaT, pd.Timestamp('1939-05-27'), 

7863 ... pd.Timestamp('1940-04-25')], 

7864 ... name=['Alfred', 'Batman', ''], 

7865 ... toy=[None, 'Batmobile', 'Joker'])) 

7866 >>> df 

7867 age born name toy 

7868 0 5.0 NaT Alfred None 

7869 1 6.0 1939-05-27 Batman Batmobile 

7870 2 NaN 1940-04-25 Joker 

7871 

7872 >>> df.notna() 

7873 age born name toy 

7874 0 True False True False 

7875 1 True True True True 

7876 2 False True True True 

7877 

7878 Show which entries in a Series are not NA. 

7879 

7880 >>> ser = pd.Series([5, 6, np.NaN]) 

7881 >>> ser 

7882 0 5.0 

7883 1 6.0 

7884 2 NaN 

7885 dtype: float64 

7886 

7887 >>> ser.notna() 

7888 0 True 

7889 1 True 

7890 2 False 

7891 dtype: bool 

7892 """ 

7893 return notna(self).__finalize__(self, method="notna") 

7894 

7895 @doc(notna, klass=_shared_doc_kwargs["klass"]) 

7896 def notnull(self: NDFrameT) -> NDFrameT: 

7897 return notna(self).__finalize__(self, method="notnull") 

7898 

7899 @final 

7900 def _clip_with_scalar(self, lower, upper, inplace: bool_t = False): 

7901 if (lower is not None and np.any(isna(lower))) or ( 

7902 upper is not None and np.any(isna(upper)) 

7903 ): 

7904 raise ValueError("Cannot use an NA value as a clip threshold") 

7905 

7906 result = self 

7907 mask = isna(self._values) 

7908 

7909 with np.errstate(all="ignore"): 

7910 if upper is not None: 

7911 subset = self <= upper 

7912 result = result.where(subset, upper, axis=None, inplace=False) 

7913 if lower is not None: 

7914 subset = self >= lower 

7915 result = result.where(subset, lower, axis=None, inplace=False) 

7916 

7917 if np.any(mask): 

7918 result[mask] = np.nan 

7919 

7920 if inplace: 

7921 return self._update_inplace(result) 

7922 else: 

7923 return result 

7924 

7925 @final 

7926 def _clip_with_one_bound(self, threshold, method, axis, inplace): 

7927 

7928 if axis is not None: 

7929 axis = self._get_axis_number(axis) 

7930 

7931 # method is self.le for upper bound and self.ge for lower bound 

7932 if is_scalar(threshold) and is_number(threshold): 

7933 if method.__name__ == "le": 

7934 return self._clip_with_scalar(None, threshold, inplace=inplace) 

7935 return self._clip_with_scalar(threshold, None, inplace=inplace) 

7936 

7937 # GH #15390 

7938 # In order for where method to work, the threshold must 

7939 # be transformed to NDFrame from other array like structure. 

7940 if (not isinstance(threshold, ABCSeries)) and is_list_like(threshold): 

7941 if isinstance(self, ABCSeries): 

7942 threshold = self._constructor(threshold, index=self.index) 

7943 else: 

7944 threshold = align_method_FRAME(self, threshold, axis, flex=None)[1] 

7945 

7946 # GH 40420 

7947 # Treat missing thresholds as no bounds, not clipping the values 

7948 if is_list_like(threshold): 

7949 fill_value = np.inf if method.__name__ == "le" else -np.inf 

7950 threshold_inf = threshold.fillna(fill_value) 

7951 else: 

7952 threshold_inf = threshold 

7953 

7954 subset = method(threshold_inf, axis=axis) | isna(self) 

7955 

7956 # GH 40420 

7957 return self.where(subset, threshold, axis=axis, inplace=inplace) 

7958 

7959 def clip( 

7960 self: NDFrameT, 

7961 lower=None, 

7962 upper=None, 

7963 axis: Axis | None = None, 

7964 inplace: bool_t = False, 

7965 *args, 

7966 **kwargs, 

7967 ) -> NDFrameT | None: 

7968 """ 

7969 Trim values at input threshold(s). 

7970 

7971 Assigns values outside boundary to boundary values. Thresholds 

7972 can be singular values or array like, and in the latter case 

7973 the clipping is performed element-wise in the specified axis. 

7974 

7975 Parameters 

7976 ---------- 

7977 lower : float or array-like, default None 

7978 Minimum threshold value. All values below this 

7979 threshold will be set to it. A missing 

7980 threshold (e.g `NA`) will not clip the value. 

7981 upper : float or array-like, default None 

7982 Maximum threshold value. All values above this 

7983 threshold will be set to it. A missing 

7984 threshold (e.g `NA`) will not clip the value. 

7985 axis : {{0 or 'index', 1 or 'columns', None}}, default None 

7986 Align object with lower and upper along the given axis. 

7987 For `Series` this parameter is unused and defaults to `None`. 

7988 inplace : bool, default False 

7989 Whether to perform the operation in place on the data. 

7990 *args, **kwargs 

7991 Additional keywords have no effect but might be accepted 

7992 for compatibility with numpy. 

7993 

7994 Returns 

7995 ------- 

7996 Series or DataFrame or None 

7997 Same type as calling object with the values outside the 

7998 clip boundaries replaced or None if ``inplace=True``. 

7999 

8000 See Also 

8001 -------- 

8002 Series.clip : Trim values at input threshold in series. 

8003 DataFrame.clip : Trim values at input threshold in dataframe. 

8004 numpy.clip : Clip (limit) the values in an array. 

8005 

8006 Examples 

8007 -------- 

8008 >>> data = {'col_0': [9, -3, 0, -1, 5], 'col_1': [-2, -7, 6, 8, -5]} 

8009 >>> df = pd.DataFrame(data) 

8010 >>> df 

8011 col_0 col_1 

8012 0 9 -2 

8013 1 -3 -7 

8014 2 0 6 

8015 3 -1 8 

8016 4 5 -5 

8017 

8018 Clips per column using lower and upper thresholds: 

8019 

8020 >>> df.clip(-4, 6) 

8021 col_0 col_1 

8022 0 6 -2 

8023 1 -3 -4 

8024 2 0 6 

8025 3 -1 6 

8026 4 5 -4 

8027 

8028 Clips using specific lower and upper thresholds per column element: 

8029 

8030 >>> t = pd.Series([2, -4, -1, 6, 3]) 

8031 >>> t 

8032 0 2 

8033 1 -4 

8034 2 -1 

8035 3 6 

8036 4 3 

8037 dtype: int64 

8038 

8039 >>> df.clip(t, t + 4, axis=0) 

8040 col_0 col_1 

8041 0 6 2 

8042 1 -3 -4 

8043 2 0 3 

8044 3 6 8 

8045 4 5 3 

8046 

8047 Clips using specific lower threshold per column element, with missing values: 

8048 

8049 >>> t = pd.Series([2, -4, np.NaN, 6, 3]) 

8050 >>> t 

8051 0 2.0 

8052 1 -4.0 

8053 2 NaN 

8054 3 6.0 

8055 4 3.0 

8056 dtype: float64 

8057 

8058 >>> df.clip(t, axis=0) 

8059 col_0 col_1 

8060 0 9 2 

8061 1 -3 -4 

8062 2 0 6 

8063 3 6 8 

8064 4 5 3 

8065 """ 

8066 inplace = validate_bool_kwarg(inplace, "inplace") 

8067 

8068 axis = nv.validate_clip_with_axis(axis, args, kwargs) 

8069 if axis is not None: 

8070 axis = self._get_axis_number(axis) 

8071 

8072 # GH 17276 

8073 # numpy doesn't like NaN as a clip value 

8074 # so ignore 

8075 # GH 19992 

8076 # numpy doesn't drop a list-like bound containing NaN 

8077 isna_lower = isna(lower) 

8078 if not is_list_like(lower): 

8079 if np.any(isna_lower): 

8080 lower = None 

8081 elif np.all(isna_lower): 

8082 lower = None 

8083 isna_upper = isna(upper) 

8084 if not is_list_like(upper): 

8085 if np.any(isna_upper): 

8086 upper = None 

8087 elif np.all(isna_upper): 

8088 upper = None 

8089 

8090 # GH 2747 (arguments were reversed) 

8091 if ( 

8092 lower is not None 

8093 and upper is not None 

8094 and is_scalar(lower) 

8095 and is_scalar(upper) 

8096 ): 

8097 lower, upper = min(lower, upper), max(lower, upper) 

8098 

8099 # fast-path for scalars 

8100 if (lower is None or (is_scalar(lower) and is_number(lower))) and ( 

8101 upper is None or (is_scalar(upper) and is_number(upper)) 

8102 ): 

8103 return self._clip_with_scalar(lower, upper, inplace=inplace) 

8104 

8105 result = self 

8106 if lower is not None: 

8107 result = result._clip_with_one_bound( 

8108 lower, method=self.ge, axis=axis, inplace=inplace 

8109 ) 

8110 if upper is not None: 

8111 if inplace: 

8112 result = self 

8113 result = result._clip_with_one_bound( 

8114 upper, method=self.le, axis=axis, inplace=inplace 

8115 ) 

8116 

8117 return result 

8118 

8119 @doc(**_shared_doc_kwargs) 

8120 def asfreq( 

8121 self: NDFrameT, 

8122 freq: Frequency, 

8123 method: FillnaOptions | None = None, 

8124 how: str | None = None, 

8125 normalize: bool_t = False, 

8126 fill_value: Hashable = None, 

8127 ) -> NDFrameT: 

8128 """ 

8129 Convert time series to specified frequency. 

8130 

8131 Returns the original data conformed to a new index with the specified 

8132 frequency. 

8133 

8134 If the index of this {klass} is a :class:`~pandas.PeriodIndex`, the new index 

8135 is the result of transforming the original index with 

8136 :meth:`PeriodIndex.asfreq <pandas.PeriodIndex.asfreq>` (so the original index 

8137 will map one-to-one to the new index). 

8138 

8139 Otherwise, the new index will be equivalent to ``pd.date_range(start, end, 

8140 freq=freq)`` where ``start`` and ``end`` are, respectively, the first and 

8141 last entries in the original index (see :func:`pandas.date_range`). The 

8142 values corresponding to any timesteps in the new index which were not present 

8143 in the original index will be null (``NaN``), unless a method for filling 

8144 such unknowns is provided (see the ``method`` parameter below). 

8145 

8146 The :meth:`resample` method is more appropriate if an operation on each group of 

8147 timesteps (such as an aggregate) is necessary to represent the data at the new 

8148 frequency. 

8149 

8150 Parameters 

8151 ---------- 

8152 freq : DateOffset or str 

8153 Frequency DateOffset or string. 

8154 method : {{'backfill'/'bfill', 'pad'/'ffill'}}, default None 

8155 Method to use for filling holes in reindexed Series (note this 

8156 does not fill NaNs that already were present): 

8157 

8158 * 'pad' / 'ffill': propagate last valid observation forward to next 

8159 valid 

8160 * 'backfill' / 'bfill': use NEXT valid observation to fill. 

8161 how : {{'start', 'end'}}, default end 

8162 For PeriodIndex only (see PeriodIndex.asfreq). 

8163 normalize : bool, default False 

8164 Whether to reset output index to midnight. 

8165 fill_value : scalar, optional 

8166 Value to use for missing values, applied during upsampling (note 

8167 this does not fill NaNs that already were present). 

8168 

8169 Returns 

8170 ------- 

8171 {klass} 

8172 {klass} object reindexed to the specified frequency. 

8173 

8174 See Also 

8175 -------- 

8176 reindex : Conform DataFrame to new index with optional filling logic. 

8177 

8178 Notes 

8179 ----- 

8180 To learn more about the frequency strings, please see `this link 

8181 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__. 

8182 

8183 Examples 

8184 -------- 

8185 Start by creating a series with 4 one minute timestamps. 

8186 

8187 >>> index = pd.date_range('1/1/2000', periods=4, freq='T') 

8188 >>> series = pd.Series([0.0, None, 2.0, 3.0], index=index) 

8189 >>> df = pd.DataFrame({{'s': series}}) 

8190 >>> df 

8191 s 

8192 2000-01-01 00:00:00 0.0 

8193 2000-01-01 00:01:00 NaN 

8194 2000-01-01 00:02:00 2.0 

8195 2000-01-01 00:03:00 3.0 

8196 

8197 Upsample the series into 30 second bins. 

8198 

8199 >>> df.asfreq(freq='30S') 

8200 s 

8201 2000-01-01 00:00:00 0.0 

8202 2000-01-01 00:00:30 NaN 

8203 2000-01-01 00:01:00 NaN 

8204 2000-01-01 00:01:30 NaN 

8205 2000-01-01 00:02:00 2.0 

8206 2000-01-01 00:02:30 NaN 

8207 2000-01-01 00:03:00 3.0 

8208 

8209 Upsample again, providing a ``fill value``. 

8210 

8211 >>> df.asfreq(freq='30S', fill_value=9.0) 

8212 s 

8213 2000-01-01 00:00:00 0.0 

8214 2000-01-01 00:00:30 9.0 

8215 2000-01-01 00:01:00 NaN 

8216 2000-01-01 00:01:30 9.0 

8217 2000-01-01 00:02:00 2.0 

8218 2000-01-01 00:02:30 9.0 

8219 2000-01-01 00:03:00 3.0 

8220 

8221 Upsample again, providing a ``method``. 

8222 

8223 >>> df.asfreq(freq='30S', method='bfill') 

8224 s 

8225 2000-01-01 00:00:00 0.0 

8226 2000-01-01 00:00:30 NaN 

8227 2000-01-01 00:01:00 NaN 

8228 2000-01-01 00:01:30 2.0 

8229 2000-01-01 00:02:00 2.0 

8230 2000-01-01 00:02:30 3.0 

8231 2000-01-01 00:03:00 3.0 

8232 """ 

8233 from pandas.core.resample import asfreq 

8234 

8235 return asfreq( 

8236 self, 

8237 freq, 

8238 method=method, 

8239 how=how, 

8240 normalize=normalize, 

8241 fill_value=fill_value, 

8242 ) 

8243 

8244 @final 

8245 def at_time(self: NDFrameT, time, asof: bool_t = False, axis=None) -> NDFrameT: 

8246 """ 

8247 Select values at particular time of day (e.g., 9:30AM). 

8248 

8249 Parameters 

8250 ---------- 

8251 time : datetime.time or str 

8252 axis : {0 or 'index', 1 or 'columns'}, default 0 

8253 For `Series` this parameter is unused and defaults to 0. 

8254 

8255 Returns 

8256 ------- 

8257 Series or DataFrame 

8258 

8259 Raises 

8260 ------ 

8261 TypeError 

8262 If the index is not a :class:`DatetimeIndex` 

8263 

8264 See Also 

8265 -------- 

8266 between_time : Select values between particular times of the day. 

8267 first : Select initial periods of time series based on a date offset. 

8268 last : Select final periods of time series based on a date offset. 

8269 DatetimeIndex.indexer_at_time : Get just the index locations for 

8270 values at particular time of the day. 

8271 

8272 Examples 

8273 -------- 

8274 >>> i = pd.date_range('2018-04-09', periods=4, freq='12H') 

8275 >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i) 

8276 >>> ts 

8277 A 

8278 2018-04-09 00:00:00 1 

8279 2018-04-09 12:00:00 2 

8280 2018-04-10 00:00:00 3 

8281 2018-04-10 12:00:00 4 

8282 

8283 >>> ts.at_time('12:00') 

8284 A 

8285 2018-04-09 12:00:00 2 

8286 2018-04-10 12:00:00 4 

8287 """ 

8288 if axis is None: 

8289 axis = self._stat_axis_number 

8290 axis = self._get_axis_number(axis) 

8291 

8292 index = self._get_axis(axis) 

8293 

8294 if not isinstance(index, DatetimeIndex): 

8295 raise TypeError("Index must be DatetimeIndex") 

8296 

8297 indexer = index.indexer_at_time(time, asof=asof) 

8298 return self._take_with_is_copy(indexer, axis=axis) 

8299 

8300 @final 

8301 def between_time( 

8302 self: NDFrameT, 

8303 start_time, 

8304 end_time, 

8305 include_start: bool_t | lib.NoDefault = lib.no_default, 

8306 include_end: bool_t | lib.NoDefault = lib.no_default, 

8307 inclusive: IntervalClosedType | None = None, 

8308 axis=None, 

8309 ) -> NDFrameT: 

8310 """ 

8311 Select values between particular times of the day (e.g., 9:00-9:30 AM). 

8312 

8313 By setting ``start_time`` to be later than ``end_time``, 

8314 you can get the times that are *not* between the two times. 

8315 

8316 Parameters 

8317 ---------- 

8318 start_time : datetime.time or str 

8319 Initial time as a time filter limit. 

8320 end_time : datetime.time or str 

8321 End time as a time filter limit. 

8322 include_start : bool, default True 

8323 Whether the start time needs to be included in the result. 

8324 

8325 .. deprecated:: 1.4.0 

8326 Arguments `include_start` and `include_end` have been deprecated 

8327 to standardize boundary inputs. Use `inclusive` instead, to set 

8328 each bound as closed or open. 

8329 include_end : bool, default True 

8330 Whether the end time needs to be included in the result. 

8331 

8332 .. deprecated:: 1.4.0 

8333 Arguments `include_start` and `include_end` have been deprecated 

8334 to standardize boundary inputs. Use `inclusive` instead, to set 

8335 each bound as closed or open. 

8336 inclusive : {"both", "neither", "left", "right"}, default "both" 

8337 Include boundaries; whether to set each bound as closed or open. 

8338 axis : {0 or 'index', 1 or 'columns'}, default 0 

8339 Determine range time on index or columns value. 

8340 For `Series` this parameter is unused and defaults to 0. 

8341 

8342 Returns 

8343 ------- 

8344 Series or DataFrame 

8345 Data from the original object filtered to the specified dates range. 

8346 

8347 Raises 

8348 ------ 

8349 TypeError 

8350 If the index is not a :class:`DatetimeIndex` 

8351 

8352 See Also 

8353 -------- 

8354 at_time : Select values at a particular time of the day. 

8355 first : Select initial periods of time series based on a date offset. 

8356 last : Select final periods of time series based on a date offset. 

8357 DatetimeIndex.indexer_between_time : Get just the index locations for 

8358 values between particular times of the day. 

8359 

8360 Examples 

8361 -------- 

8362 >>> i = pd.date_range('2018-04-09', periods=4, freq='1D20min') 

8363 >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i) 

8364 >>> ts 

8365 A 

8366 2018-04-09 00:00:00 1 

8367 2018-04-10 00:20:00 2 

8368 2018-04-11 00:40:00 3 

8369 2018-04-12 01:00:00 4 

8370 

8371 >>> ts.between_time('0:15', '0:45') 

8372 A 

8373 2018-04-10 00:20:00 2 

8374 2018-04-11 00:40:00 3 

8375 

8376 You get the times that are *not* between two times by setting 

8377 ``start_time`` later than ``end_time``: 

8378 

8379 >>> ts.between_time('0:45', '0:15') 

8380 A 

8381 2018-04-09 00:00:00 1 

8382 2018-04-12 01:00:00 4 

8383 """ 

8384 if axis is None: 

8385 axis = self._stat_axis_number 

8386 axis = self._get_axis_number(axis) 

8387 

8388 index = self._get_axis(axis) 

8389 if not isinstance(index, DatetimeIndex): 

8390 raise TypeError("Index must be DatetimeIndex") 

8391 

8392 old_include_arg_used = (include_start != lib.no_default) or ( 

8393 include_end != lib.no_default 

8394 ) 

8395 

8396 if old_include_arg_used and inclusive is not None: 

8397 raise ValueError( 

8398 "Deprecated arguments `include_start` and `include_end` " 

8399 "cannot be passed if `inclusive` has been given." 

8400 ) 

8401 # If any of the deprecated arguments ('include_start', 'include_end') 

8402 # have been passed 

8403 elif old_include_arg_used: 

8404 warnings.warn( 

8405 "`include_start` and `include_end` are deprecated in " 

8406 "favour of `inclusive`.", 

8407 FutureWarning, 

8408 stacklevel=find_stack_level(), 

8409 ) 

8410 left = True if include_start is lib.no_default else include_start 

8411 right = True if include_end is lib.no_default else include_end 

8412 

8413 inc_dict: dict[tuple[bool_t, bool_t], IntervalClosedType] = { 

8414 (True, True): "both", 

8415 (True, False): "left", 

8416 (False, True): "right", 

8417 (False, False): "neither", 

8418 } 

8419 inclusive = inc_dict[(left, right)] 

8420 elif inclusive is None: 

8421 # On arg removal inclusive can default to "both" 

8422 inclusive = "both" 

8423 left_inclusive, right_inclusive = validate_inclusive(inclusive) 

8424 indexer = index.indexer_between_time( 

8425 start_time, 

8426 end_time, 

8427 include_start=left_inclusive, 

8428 include_end=right_inclusive, 

8429 ) 

8430 return self._take_with_is_copy(indexer, axis=axis) 

8431 

8432 @doc(**_shared_doc_kwargs) 

8433 def resample( 

8434 self, 

8435 rule, 

8436 axis: Axis = 0, 

8437 closed: str | None = None, 

8438 label: str | None = None, 

8439 convention: str = "start", 

8440 kind: str | None = None, 

8441 loffset=None, 

8442 base: int | None = None, 

8443 on: Level = None, 

8444 level: Level = None, 

8445 origin: str | TimestampConvertibleTypes = "start_day", 

8446 offset: TimedeltaConvertibleTypes | None = None, 

8447 group_keys: bool_t | lib.NoDefault = lib.no_default, 

8448 ) -> Resampler: 

8449 """ 

8450 Resample time-series data. 

8451 

8452 Convenience method for frequency conversion and resampling of time series. 

8453 The object must have a datetime-like index (`DatetimeIndex`, `PeriodIndex`, 

8454 or `TimedeltaIndex`), or the caller must pass the label of a datetime-like 

8455 series/index to the ``on``/``level`` keyword parameter. 

8456 

8457 Parameters 

8458 ---------- 

8459 rule : DateOffset, Timedelta or str 

8460 The offset string or object representing target conversion. 

8461 axis : {{0 or 'index', 1 or 'columns'}}, default 0 

8462 Which axis to use for up- or down-sampling. For `Series` this parameter 

8463 is unused and defaults to 0. Must be 

8464 `DatetimeIndex`, `TimedeltaIndex` or `PeriodIndex`. 

8465 closed : {{'right', 'left'}}, default None 

8466 Which side of bin interval is closed. The default is 'left' 

8467 for all frequency offsets except for 'M', 'A', 'Q', 'BM', 

8468 'BA', 'BQ', and 'W' which all have a default of 'right'. 

8469 label : {{'right', 'left'}}, default None 

8470 Which bin edge label to label bucket with. The default is 'left' 

8471 for all frequency offsets except for 'M', 'A', 'Q', 'BM', 

8472 'BA', 'BQ', and 'W' which all have a default of 'right'. 

8473 convention : {{'start', 'end', 's', 'e'}}, default 'start' 

8474 For `PeriodIndex` only, controls whether to use the start or 

8475 end of `rule`. 

8476 kind : {{'timestamp', 'period'}}, optional, default None 

8477 Pass 'timestamp' to convert the resulting index to a 

8478 `DateTimeIndex` or 'period' to convert it to a `PeriodIndex`. 

8479 By default the input representation is retained. 

8480 loffset : timedelta, default None 

8481 Adjust the resampled time labels. 

8482 

8483 .. deprecated:: 1.1.0 

8484 You should add the loffset to the `df.index` after the resample. 

8485 See below. 

8486 

8487 base : int, default 0 

8488 For frequencies that evenly subdivide 1 day, the "origin" of the 

8489 aggregated intervals. For example, for '5min' frequency, base could 

8490 range from 0 through 4. Defaults to 0. 

8491 

8492 .. deprecated:: 1.1.0 

8493 The new arguments that you should use are 'offset' or 'origin'. 

8494 

8495 on : str, optional 

8496 For a DataFrame, column to use instead of index for resampling. 

8497 Column must be datetime-like. 

8498 level : str or int, optional 

8499 For a MultiIndex, level (name or number) to use for 

8500 resampling. `level` must be datetime-like. 

8501 origin : Timestamp or str, default 'start_day' 

8502 The timestamp on which to adjust the grouping. The timezone of origin 

8503 must match the timezone of the index. 

8504 If string, must be one of the following: 

8505 

8506 - 'epoch': `origin` is 1970-01-01 

8507 - 'start': `origin` is the first value of the timeseries 

8508 - 'start_day': `origin` is the first day at midnight of the timeseries 

8509 

8510 .. versionadded:: 1.1.0 

8511 

8512 - 'end': `origin` is the last value of the timeseries 

8513 - 'end_day': `origin` is the ceiling midnight of the last day 

8514 

8515 .. versionadded:: 1.3.0 

8516 

8517 offset : Timedelta or str, default is None 

8518 An offset timedelta added to the origin. 

8519 

8520 .. versionadded:: 1.1.0 

8521 

8522 group_keys : bool, optional 

8523 Whether to include the group keys in the result index when using 

8524 ``.apply()`` on the resampled object. Not specifying ``group_keys`` 

8525 will retain values-dependent behavior from pandas 1.4 

8526 and earlier (see :ref:`pandas 1.5.0 Release notes 

8527 <whatsnew_150.enhancements.resample_group_keys>` 

8528 for examples). In a future version of pandas, the behavior will 

8529 default to the same as specifying ``group_keys=False``. 

8530 

8531 .. versionadded:: 1.5.0 

8532 

8533 Returns 

8534 ------- 

8535 pandas.core.Resampler 

8536 :class:`~pandas.core.Resampler` object. 

8537 

8538 See Also 

8539 -------- 

8540 Series.resample : Resample a Series. 

8541 DataFrame.resample : Resample a DataFrame. 

8542 groupby : Group {klass} by mapping, function, label, or list of labels. 

8543 asfreq : Reindex a {klass} with the given frequency without grouping. 

8544 

8545 Notes 

8546 ----- 

8547 See the `user guide 

8548 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#resampling>`__ 

8549 for more. 

8550 

8551 To learn more about the offset strings, please see `this link 

8552 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects>`__. 

8553 

8554 Examples 

8555 -------- 

8556 Start by creating a series with 9 one minute timestamps. 

8557 

8558 >>> index = pd.date_range('1/1/2000', periods=9, freq='T') 

8559 >>> series = pd.Series(range(9), index=index) 

8560 >>> series 

8561 2000-01-01 00:00:00 0 

8562 2000-01-01 00:01:00 1 

8563 2000-01-01 00:02:00 2 

8564 2000-01-01 00:03:00 3 

8565 2000-01-01 00:04:00 4 

8566 2000-01-01 00:05:00 5 

8567 2000-01-01 00:06:00 6 

8568 2000-01-01 00:07:00 7 

8569 2000-01-01 00:08:00 8 

8570 Freq: T, dtype: int64 

8571 

8572 Downsample the series into 3 minute bins and sum the values 

8573 of the timestamps falling into a bin. 

8574 

8575 >>> series.resample('3T').sum() 

8576 2000-01-01 00:00:00 3 

8577 2000-01-01 00:03:00 12 

8578 2000-01-01 00:06:00 21 

8579 Freq: 3T, dtype: int64 

8580 

8581 Downsample the series into 3 minute bins as above, but label each 

8582 bin using the right edge instead of the left. Please note that the 

8583 value in the bucket used as the label is not included in the bucket, 

8584 which it labels. For example, in the original series the 

8585 bucket ``2000-01-01 00:03:00`` contains the value 3, but the summed 

8586 value in the resampled bucket with the label ``2000-01-01 00:03:00`` 

8587 does not include 3 (if it did, the summed value would be 6, not 3). 

8588 To include this value close the right side of the bin interval as 

8589 illustrated in the example below this one. 

8590 

8591 >>> series.resample('3T', label='right').sum() 

8592 2000-01-01 00:03:00 3 

8593 2000-01-01 00:06:00 12 

8594 2000-01-01 00:09:00 21 

8595 Freq: 3T, dtype: int64 

8596 

8597 Downsample the series into 3 minute bins as above, but close the right 

8598 side of the bin interval. 

8599 

8600 >>> series.resample('3T', label='right', closed='right').sum() 

8601 2000-01-01 00:00:00 0 

8602 2000-01-01 00:03:00 6 

8603 2000-01-01 00:06:00 15 

8604 2000-01-01 00:09:00 15 

8605 Freq: 3T, dtype: int64 

8606 

8607 Upsample the series into 30 second bins. 

8608 

8609 >>> series.resample('30S').asfreq()[0:5] # Select first 5 rows 

8610 2000-01-01 00:00:00 0.0 

8611 2000-01-01 00:00:30 NaN 

8612 2000-01-01 00:01:00 1.0 

8613 2000-01-01 00:01:30 NaN 

8614 2000-01-01 00:02:00 2.0 

8615 Freq: 30S, dtype: float64 

8616 

8617 Upsample the series into 30 second bins and fill the ``NaN`` 

8618 values using the ``ffill`` method. 

8619 

8620 >>> series.resample('30S').ffill()[0:5] 

8621 2000-01-01 00:00:00 0 

8622 2000-01-01 00:00:30 0 

8623 2000-01-01 00:01:00 1 

8624 2000-01-01 00:01:30 1 

8625 2000-01-01 00:02:00 2 

8626 Freq: 30S, dtype: int64 

8627 

8628 Upsample the series into 30 second bins and fill the 

8629 ``NaN`` values using the ``bfill`` method. 

8630 

8631 >>> series.resample('30S').bfill()[0:5] 

8632 2000-01-01 00:00:00 0 

8633 2000-01-01 00:00:30 1 

8634 2000-01-01 00:01:00 1 

8635 2000-01-01 00:01:30 2 

8636 2000-01-01 00:02:00 2 

8637 Freq: 30S, dtype: int64 

8638 

8639 Pass a custom function via ``apply`` 

8640 

8641 >>> def custom_resampler(arraylike): 

8642 ... return np.sum(arraylike) + 5 

8643 ... 

8644 >>> series.resample('3T').apply(custom_resampler) 

8645 2000-01-01 00:00:00 8 

8646 2000-01-01 00:03:00 17 

8647 2000-01-01 00:06:00 26 

8648 Freq: 3T, dtype: int64 

8649 

8650 For a Series with a PeriodIndex, the keyword `convention` can be 

8651 used to control whether to use the start or end of `rule`. 

8652 

8653 Resample a year by quarter using 'start' `convention`. Values are 

8654 assigned to the first quarter of the period. 

8655 

8656 >>> s = pd.Series([1, 2], index=pd.period_range('2012-01-01', 

8657 ... freq='A', 

8658 ... periods=2)) 

8659 >>> s 

8660 2012 1 

8661 2013 2 

8662 Freq: A-DEC, dtype: int64 

8663 >>> s.resample('Q', convention='start').asfreq() 

8664 2012Q1 1.0 

8665 2012Q2 NaN 

8666 2012Q3 NaN 

8667 2012Q4 NaN 

8668 2013Q1 2.0 

8669 2013Q2 NaN 

8670 2013Q3 NaN 

8671 2013Q4 NaN 

8672 Freq: Q-DEC, dtype: float64 

8673 

8674 Resample quarters by month using 'end' `convention`. Values are 

8675 assigned to the last month of the period. 

8676 

8677 >>> q = pd.Series([1, 2, 3, 4], index=pd.period_range('2018-01-01', 

8678 ... freq='Q', 

8679 ... periods=4)) 

8680 >>> q 

8681 2018Q1 1 

8682 2018Q2 2 

8683 2018Q3 3 

8684 2018Q4 4 

8685 Freq: Q-DEC, dtype: int64 

8686 >>> q.resample('M', convention='end').asfreq() 

8687 2018-03 1.0 

8688 2018-04 NaN 

8689 2018-05 NaN 

8690 2018-06 2.0 

8691 2018-07 NaN 

8692 2018-08 NaN 

8693 2018-09 3.0 

8694 2018-10 NaN 

8695 2018-11 NaN 

8696 2018-12 4.0 

8697 Freq: M, dtype: float64 

8698 

8699 For DataFrame objects, the keyword `on` can be used to specify the 

8700 column instead of the index for resampling. 

8701 

8702 >>> d = {{'price': [10, 11, 9, 13, 14, 18, 17, 19], 

8703 ... 'volume': [50, 60, 40, 100, 50, 100, 40, 50]}} 

8704 >>> df = pd.DataFrame(d) 

8705 >>> df['week_starting'] = pd.date_range('01/01/2018', 

8706 ... periods=8, 

8707 ... freq='W') 

8708 >>> df 

8709 price volume week_starting 

8710 0 10 50 2018-01-07 

8711 1 11 60 2018-01-14 

8712 2 9 40 2018-01-21 

8713 3 13 100 2018-01-28 

8714 4 14 50 2018-02-04 

8715 5 18 100 2018-02-11 

8716 6 17 40 2018-02-18 

8717 7 19 50 2018-02-25 

8718 >>> df.resample('M', on='week_starting').mean() 

8719 price volume 

8720 week_starting 

8721 2018-01-31 10.75 62.5 

8722 2018-02-28 17.00 60.0 

8723 

8724 For a DataFrame with MultiIndex, the keyword `level` can be used to 

8725 specify on which level the resampling needs to take place. 

8726 

8727 >>> days = pd.date_range('1/1/2000', periods=4, freq='D') 

8728 >>> d2 = {{'price': [10, 11, 9, 13, 14, 18, 17, 19], 

8729 ... 'volume': [50, 60, 40, 100, 50, 100, 40, 50]}} 

8730 >>> df2 = pd.DataFrame( 

8731 ... d2, 

8732 ... index=pd.MultiIndex.from_product( 

8733 ... [days, ['morning', 'afternoon']] 

8734 ... ) 

8735 ... ) 

8736 >>> df2 

8737 price volume 

8738 2000-01-01 morning 10 50 

8739 afternoon 11 60 

8740 2000-01-02 morning 9 40 

8741 afternoon 13 100 

8742 2000-01-03 morning 14 50 

8743 afternoon 18 100 

8744 2000-01-04 morning 17 40 

8745 afternoon 19 50 

8746 >>> df2.resample('D', level=0).sum() 

8747 price volume 

8748 2000-01-01 21 110 

8749 2000-01-02 22 140 

8750 2000-01-03 32 150 

8751 2000-01-04 36 90 

8752 

8753 If you want to adjust the start of the bins based on a fixed timestamp: 

8754 

8755 >>> start, end = '2000-10-01 23:30:00', '2000-10-02 00:30:00' 

8756 >>> rng = pd.date_range(start, end, freq='7min') 

8757 >>> ts = pd.Series(np.arange(len(rng)) * 3, index=rng) 

8758 >>> ts 

8759 2000-10-01 23:30:00 0 

8760 2000-10-01 23:37:00 3 

8761 2000-10-01 23:44:00 6 

8762 2000-10-01 23:51:00 9 

8763 2000-10-01 23:58:00 12 

8764 2000-10-02 00:05:00 15 

8765 2000-10-02 00:12:00 18 

8766 2000-10-02 00:19:00 21 

8767 2000-10-02 00:26:00 24 

8768 Freq: 7T, dtype: int64 

8769 

8770 >>> ts.resample('17min').sum() 

8771 2000-10-01 23:14:00 0 

8772 2000-10-01 23:31:00 9 

8773 2000-10-01 23:48:00 21 

8774 2000-10-02 00:05:00 54 

8775 2000-10-02 00:22:00 24 

8776 Freq: 17T, dtype: int64 

8777 

8778 >>> ts.resample('17min', origin='epoch').sum() 

8779 2000-10-01 23:18:00 0 

8780 2000-10-01 23:35:00 18 

8781 2000-10-01 23:52:00 27 

8782 2000-10-02 00:09:00 39 

8783 2000-10-02 00:26:00 24 

8784 Freq: 17T, dtype: int64 

8785 

8786 >>> ts.resample('17min', origin='2000-01-01').sum() 

8787 2000-10-01 23:24:00 3 

8788 2000-10-01 23:41:00 15 

8789 2000-10-01 23:58:00 45 

8790 2000-10-02 00:15:00 45 

8791 Freq: 17T, dtype: int64 

8792 

8793 If you want to adjust the start of the bins with an `offset` Timedelta, the two 

8794 following lines are equivalent: 

8795 

8796 >>> ts.resample('17min', origin='start').sum() 

8797 2000-10-01 23:30:00 9 

8798 2000-10-01 23:47:00 21 

8799 2000-10-02 00:04:00 54 

8800 2000-10-02 00:21:00 24 

8801 Freq: 17T, dtype: int64 

8802 

8803 >>> ts.resample('17min', offset='23h30min').sum() 

8804 2000-10-01 23:30:00 9 

8805 2000-10-01 23:47:00 21 

8806 2000-10-02 00:04:00 54 

8807 2000-10-02 00:21:00 24 

8808 Freq: 17T, dtype: int64 

8809 

8810 If you want to take the largest Timestamp as the end of the bins: 

8811 

8812 >>> ts.resample('17min', origin='end').sum() 

8813 2000-10-01 23:35:00 0 

8814 2000-10-01 23:52:00 18 

8815 2000-10-02 00:09:00 27 

8816 2000-10-02 00:26:00 63 

8817 Freq: 17T, dtype: int64 

8818 

8819 In contrast with the `start_day`, you can use `end_day` to take the ceiling 

8820 midnight of the largest Timestamp as the end of the bins and drop the bins 

8821 not containing data: 

8822 

8823 >>> ts.resample('17min', origin='end_day').sum() 

8824 2000-10-01 23:38:00 3 

8825 2000-10-01 23:55:00 15 

8826 2000-10-02 00:12:00 45 

8827 2000-10-02 00:29:00 45 

8828 Freq: 17T, dtype: int64 

8829 

8830 To replace the use of the deprecated `base` argument, you can now use `offset`, 

8831 in this example it is equivalent to have `base=2`: 

8832 

8833 >>> ts.resample('17min', offset='2min').sum() 

8834 2000-10-01 23:16:00 0 

8835 2000-10-01 23:33:00 9 

8836 2000-10-01 23:50:00 36 

8837 2000-10-02 00:07:00 39 

8838 2000-10-02 00:24:00 24 

8839 Freq: 17T, dtype: int64 

8840 

8841 To replace the use of the deprecated `loffset` argument: 

8842 

8843 >>> from pandas.tseries.frequencies import to_offset 

8844 >>> loffset = '19min' 

8845 >>> ts_out = ts.resample('17min').sum() 

8846 >>> ts_out.index = ts_out.index + to_offset(loffset) 

8847 >>> ts_out 

8848 2000-10-01 23:33:00 0 

8849 2000-10-01 23:50:00 9 

8850 2000-10-02 00:07:00 21 

8851 2000-10-02 00:24:00 54 

8852 2000-10-02 00:41:00 24 

8853 Freq: 17T, dtype: int64 

8854 """ 

8855 from pandas.core.resample import get_resampler 

8856 

8857 axis = self._get_axis_number(axis) 

8858 return get_resampler( 

8859 self, 

8860 freq=rule, 

8861 label=label, 

8862 closed=closed, 

8863 axis=axis, 

8864 kind=kind, 

8865 loffset=loffset, 

8866 convention=convention, 

8867 base=base, 

8868 key=on, 

8869 level=level, 

8870 origin=origin, 

8871 offset=offset, 

8872 group_keys=group_keys, 

8873 ) 

8874 

8875 @final 

8876 def first(self: NDFrameT, offset) -> NDFrameT: 

8877 """ 

8878 Select initial periods of time series data based on a date offset. 

8879 

8880 When having a DataFrame with dates as index, this function can 

8881 select the first few rows based on a date offset. 

8882 

8883 Parameters 

8884 ---------- 

8885 offset : str, DateOffset or dateutil.relativedelta 

8886 The offset length of the data that will be selected. For instance, 

8887 '1M' will display all the rows having their index within the first month. 

8888 

8889 Returns 

8890 ------- 

8891 Series or DataFrame 

8892 A subset of the caller. 

8893 

8894 Raises 

8895 ------ 

8896 TypeError 

8897 If the index is not a :class:`DatetimeIndex` 

8898 

8899 See Also 

8900 -------- 

8901 last : Select final periods of time series based on a date offset. 

8902 at_time : Select values at a particular time of the day. 

8903 between_time : Select values between particular times of the day. 

8904 

8905 Examples 

8906 -------- 

8907 >>> i = pd.date_range('2018-04-09', periods=4, freq='2D') 

8908 >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i) 

8909 >>> ts 

8910 A 

8911 2018-04-09 1 

8912 2018-04-11 2 

8913 2018-04-13 3 

8914 2018-04-15 4 

8915 

8916 Get the rows for the first 3 days: 

8917 

8918 >>> ts.first('3D') 

8919 A 

8920 2018-04-09 1 

8921 2018-04-11 2 

8922 

8923 Notice the data for 3 first calendar days were returned, not the first 

8924 3 days observed in the dataset, and therefore data for 2018-04-13 was 

8925 not returned. 

8926 """ 

8927 if not isinstance(self.index, DatetimeIndex): 

8928 raise TypeError("'first' only supports a DatetimeIndex index") 

8929 

8930 if len(self.index) == 0: 

8931 return self 

8932 

8933 offset = to_offset(offset) 

8934 if not isinstance(offset, Tick) and offset.is_on_offset(self.index[0]): 

8935 # GH#29623 if first value is end of period, remove offset with n = 1 

8936 # before adding the real offset 

8937 end_date = end = self.index[0] - offset.base + offset 

8938 else: 

8939 end_date = end = self.index[0] + offset 

8940 

8941 # Tick-like, e.g. 3 weeks 

8942 if isinstance(offset, Tick) and end_date in self.index: 

8943 end = self.index.searchsorted(end_date, side="left") 

8944 return self.iloc[:end] 

8945 

8946 return self.loc[:end] 

8947 

8948 @final 

8949 def last(self: NDFrameT, offset) -> NDFrameT: 

8950 """ 

8951 Select final periods of time series data based on a date offset. 

8952 

8953 For a DataFrame with a sorted DatetimeIndex, this function 

8954 selects the last few rows based on a date offset. 

8955 

8956 Parameters 

8957 ---------- 

8958 offset : str, DateOffset, dateutil.relativedelta 

8959 The offset length of the data that will be selected. For instance, 

8960 '3D' will display all the rows having their index within the last 3 days. 

8961 

8962 Returns 

8963 ------- 

8964 Series or DataFrame 

8965 A subset of the caller. 

8966 

8967 Raises 

8968 ------ 

8969 TypeError 

8970 If the index is not a :class:`DatetimeIndex` 

8971 

8972 See Also 

8973 -------- 

8974 first : Select initial periods of time series based on a date offset. 

8975 at_time : Select values at a particular time of the day. 

8976 between_time : Select values between particular times of the day. 

8977 

8978 Examples 

8979 -------- 

8980 >>> i = pd.date_range('2018-04-09', periods=4, freq='2D') 

8981 >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i) 

8982 >>> ts 

8983 A 

8984 2018-04-09 1 

8985 2018-04-11 2 

8986 2018-04-13 3 

8987 2018-04-15 4 

8988 

8989 Get the rows for the last 3 days: 

8990 

8991 >>> ts.last('3D') 

8992 A 

8993 2018-04-13 3 

8994 2018-04-15 4 

8995 

8996 Notice the data for 3 last calendar days were returned, not the last 

8997 3 observed days in the dataset, and therefore data for 2018-04-11 was 

8998 not returned. 

8999 """ 

9000 if not isinstance(self.index, DatetimeIndex): 

9001 raise TypeError("'last' only supports a DatetimeIndex index") 

9002 

9003 if len(self.index) == 0: 

9004 return self 

9005 

9006 offset = to_offset(offset) 

9007 

9008 start_date = self.index[-1] - offset 

9009 start = self.index.searchsorted(start_date, side="right") 

9010 return self.iloc[start:] 

9011 

9012 @final 

9013 def rank( 

9014 self: NDFrameT, 

9015 axis=0, 

9016 method: str = "average", 

9017 numeric_only: bool_t | None | lib.NoDefault = lib.no_default, 

9018 na_option: str = "keep", 

9019 ascending: bool_t = True, 

9020 pct: bool_t = False, 

9021 ) -> NDFrameT: 

9022 """ 

9023 Compute numerical data ranks (1 through n) along axis. 

9024 

9025 By default, equal values are assigned a rank that is the average of the 

9026 ranks of those values. 

9027 

9028 Parameters 

9029 ---------- 

9030 axis : {0 or 'index', 1 or 'columns'}, default 0 

9031 Index to direct ranking. 

9032 For `Series` this parameter is unused and defaults to 0. 

9033 method : {'average', 'min', 'max', 'first', 'dense'}, default 'average' 

9034 How to rank the group of records that have the same value (i.e. ties): 

9035 

9036 * average: average rank of the group 

9037 * min: lowest rank in the group 

9038 * max: highest rank in the group 

9039 * first: ranks assigned in order they appear in the array 

9040 * dense: like 'min', but rank always increases by 1 between groups. 

9041 

9042 numeric_only : bool, optional 

9043 For DataFrame objects, rank only numeric columns if set to True. 

9044 na_option : {'keep', 'top', 'bottom'}, default 'keep' 

9045 How to rank NaN values: 

9046 

9047 * keep: assign NaN rank to NaN values 

9048 * top: assign lowest rank to NaN values 

9049 * bottom: assign highest rank to NaN values 

9050 

9051 ascending : bool, default True 

9052 Whether or not the elements should be ranked in ascending order. 

9053 pct : bool, default False 

9054 Whether or not to display the returned rankings in percentile 

9055 form. 

9056 

9057 Returns 

9058 ------- 

9059 same type as caller 

9060 Return a Series or DataFrame with data ranks as values. 

9061 

9062 See Also 

9063 -------- 

9064 core.groupby.GroupBy.rank : Rank of values within each group. 

9065 

9066 Examples 

9067 -------- 

9068 >>> df = pd.DataFrame(data={'Animal': ['cat', 'penguin', 'dog', 

9069 ... 'spider', 'snake'], 

9070 ... 'Number_legs': [4, 2, 4, 8, np.nan]}) 

9071 >>> df 

9072 Animal Number_legs 

9073 0 cat 4.0 

9074 1 penguin 2.0 

9075 2 dog 4.0 

9076 3 spider 8.0 

9077 4 snake NaN 

9078 

9079 Ties are assigned the mean of the ranks (by default) for the group. 

9080 

9081 >>> s = pd.Series(range(5), index=list("abcde")) 

9082 >>> s["d"] = s["b"] 

9083 >>> s.rank() 

9084 a 1.0 

9085 b 2.5 

9086 c 4.0 

9087 d 2.5 

9088 e 5.0 

9089 dtype: float64 

9090 

9091 The following example shows how the method behaves with the above 

9092 parameters: 

9093 

9094 * default_rank: this is the default behaviour obtained without using 

9095 any parameter. 

9096 * max_rank: setting ``method = 'max'`` the records that have the 

9097 same values are ranked using the highest rank (e.g.: since 'cat' 

9098 and 'dog' are both in the 2nd and 3rd position, rank 3 is assigned.) 

9099 * NA_bottom: choosing ``na_option = 'bottom'``, if there are records 

9100 with NaN values they are placed at the bottom of the ranking. 

9101 * pct_rank: when setting ``pct = True``, the ranking is expressed as 

9102 percentile rank. 

9103 

9104 >>> df['default_rank'] = df['Number_legs'].rank() 

9105 >>> df['max_rank'] = df['Number_legs'].rank(method='max') 

9106 >>> df['NA_bottom'] = df['Number_legs'].rank(na_option='bottom') 

9107 >>> df['pct_rank'] = df['Number_legs'].rank(pct=True) 

9108 >>> df 

9109 Animal Number_legs default_rank max_rank NA_bottom pct_rank 

9110 0 cat 4.0 2.5 3.0 2.5 0.625 

9111 1 penguin 2.0 1.0 1.0 1.0 0.250 

9112 2 dog 4.0 2.5 3.0 2.5 0.625 

9113 3 spider 8.0 4.0 4.0 4.0 1.000 

9114 4 snake NaN NaN NaN 5.0 NaN 

9115 """ 

9116 warned = False 

9117 if numeric_only is None: 

9118 # GH#45036 

9119 warnings.warn( 

9120 f"'numeric_only=None' in {type(self).__name__}.rank is deprecated " 

9121 "and will raise in a future version. Pass either 'True' or " 

9122 "'False'. 'False' will be the default.", 

9123 FutureWarning, 

9124 stacklevel=find_stack_level(), 

9125 ) 

9126 warned = True 

9127 elif numeric_only is lib.no_default: 

9128 numeric_only = None 

9129 

9130 axis = self._get_axis_number(axis) 

9131 

9132 if na_option not in {"keep", "top", "bottom"}: 

9133 msg = "na_option must be one of 'keep', 'top', or 'bottom'" 

9134 raise ValueError(msg) 

9135 

9136 def ranker(data): 

9137 if data.ndim == 2: 

9138 # i.e. DataFrame, we cast to ndarray 

9139 values = data.values 

9140 else: 

9141 # i.e. Series, can dispatch to EA 

9142 values = data._values 

9143 

9144 if isinstance(values, ExtensionArray): 

9145 ranks = values._rank( 

9146 axis=axis, 

9147 method=method, 

9148 ascending=ascending, 

9149 na_option=na_option, 

9150 pct=pct, 

9151 ) 

9152 else: 

9153 ranks = algos.rank( 

9154 values, 

9155 axis=axis, 

9156 method=method, 

9157 ascending=ascending, 

9158 na_option=na_option, 

9159 pct=pct, 

9160 ) 

9161 

9162 ranks_obj = self._constructor(ranks, **data._construct_axes_dict()) 

9163 return ranks_obj.__finalize__(self, method="rank") 

9164 

9165 # if numeric_only is None, and we can't get anything, we try with 

9166 # numeric_only=True 

9167 if numeric_only is None: 

9168 try: 

9169 return ranker(self) 

9170 except TypeError: 

9171 numeric_only = True 

9172 if not warned: 

9173 # Only warn here if we didn't already issue a warning above 

9174 # GH#45036 

9175 warnings.warn( 

9176 f"Dropping of nuisance columns in {type(self).__name__}.rank " 

9177 "is deprecated; in a future version this will raise TypeError. " 

9178 "Select only valid columns before calling rank.", 

9179 FutureWarning, 

9180 stacklevel=find_stack_level(), 

9181 ) 

9182 

9183 if numeric_only: 

9184 if self.ndim == 1 and not is_numeric_dtype(self.dtype): 

9185 # GH#47500 

9186 warnings.warn( 

9187 f"Calling Series.rank with numeric_only={numeric_only} and dtype " 

9188 f"{self.dtype} is deprecated and will raise a TypeError in a " 

9189 "future version of pandas", 

9190 category=FutureWarning, 

9191 stacklevel=find_stack_level(), 

9192 ) 

9193 data = self._get_numeric_data() 

9194 else: 

9195 data = self 

9196 

9197 return ranker(data) 

9198 

9199 @doc(_shared_docs["compare"], klass=_shared_doc_kwargs["klass"]) 

9200 def compare( 

9201 self, 

9202 other, 

9203 align_axis: Axis = 1, 

9204 keep_shape: bool_t = False, 

9205 keep_equal: bool_t = False, 

9206 result_names: Suffixes = ("self", "other"), 

9207 ): 

9208 from pandas.core.reshape.concat import concat 

9209 

9210 if type(self) is not type(other): 

9211 cls_self, cls_other = type(self).__name__, type(other).__name__ 

9212 raise TypeError( 

9213 f"can only compare '{cls_self}' (not '{cls_other}') with '{cls_self}'" 

9214 ) 

9215 

9216 mask = ~((self == other) | (self.isna() & other.isna())) 

9217 

9218 if not keep_equal: 

9219 self = self.where(mask) 

9220 other = other.where(mask) 

9221 

9222 if not keep_shape: 

9223 if isinstance(self, ABCDataFrame): 

9224 cmask = mask.any() 

9225 rmask = mask.any(axis=1) 

9226 self = self.loc[rmask, cmask] 

9227 other = other.loc[rmask, cmask] 

9228 else: 

9229 self = self[mask] 

9230 other = other[mask] 

9231 if not isinstance(result_names, tuple): 

9232 raise TypeError( 

9233 f"Passing 'result_names' as a {type(result_names)} is not " 

9234 "supported. Provide 'result_names' as a tuple instead." 

9235 ) 

9236 

9237 if align_axis in (1, "columns"): # This is needed for Series 

9238 axis = 1 

9239 else: 

9240 axis = self._get_axis_number(align_axis) 

9241 

9242 diff = concat([self, other], axis=axis, keys=result_names) 

9243 

9244 if axis >= self.ndim: 

9245 # No need to reorganize data if stacking on new axis 

9246 # This currently applies for stacking two Series on columns 

9247 return diff 

9248 

9249 ax = diff._get_axis(axis) 

9250 ax_names = np.array(ax.names) 

9251 

9252 # set index names to positions to avoid confusion 

9253 ax.names = np.arange(len(ax_names)) 

9254 

9255 # bring self-other to inner level 

9256 order = list(range(1, ax.nlevels)) + [0] 

9257 if isinstance(diff, ABCDataFrame): 

9258 diff = diff.reorder_levels(order, axis=axis) 

9259 else: 

9260 diff = diff.reorder_levels(order) 

9261 

9262 # restore the index names in order 

9263 diff._get_axis(axis=axis).names = ax_names[order] 

9264 

9265 # reorder axis to keep things organized 

9266 indices = ( 

9267 np.arange(diff.shape[axis]).reshape([2, diff.shape[axis] // 2]).T.flatten() 

9268 ) 

9269 diff = diff.take(indices, axis=axis) 

9270 

9271 return diff 

9272 

9273 @doc(**_shared_doc_kwargs) 

9274 def align( 

9275 self: NDFrameT, 

9276 other: NDFrameT, 

9277 join: Literal["outer", "inner", "left", "right"] = "outer", 

9278 axis: Axis | None = None, 

9279 level: Level = None, 

9280 copy: bool_t = True, 

9281 fill_value: Hashable = None, 

9282 method: FillnaOptions | None = None, 

9283 limit: int | None = None, 

9284 fill_axis: Axis = 0, 

9285 broadcast_axis: Axis | None = None, 

9286 ) -> NDFrameT: 

9287 """ 

9288 Align two objects on their axes with the specified join method. 

9289 

9290 Join method is specified for each axis Index. 

9291 

9292 Parameters 

9293 ---------- 

9294 other : DataFrame or Series 

9295 join : {{'outer', 'inner', 'left', 'right'}}, default 'outer' 

9296 axis : allowed axis of the other object, default None 

9297 Align on index (0), columns (1), or both (None). 

9298 level : int or level name, default None 

9299 Broadcast across a level, matching Index values on the 

9300 passed MultiIndex level. 

9301 copy : bool, default True 

9302 Always returns new objects. If copy=False and no reindexing is 

9303 required then original objects are returned. 

9304 fill_value : scalar, default np.NaN 

9305 Value to use for missing values. Defaults to NaN, but can be any 

9306 "compatible" value. 

9307 method : {{'backfill', 'bfill', 'pad', 'ffill', None}}, default None 

9308 Method to use for filling holes in reindexed Series: 

9309 

9310 - pad / ffill: propagate last valid observation forward to next valid. 

9311 - backfill / bfill: use NEXT valid observation to fill gap. 

9312 

9313 limit : int, default None 

9314 If method is specified, this is the maximum number of consecutive 

9315 NaN values to forward/backward fill. In other words, if there is 

9316 a gap with more than this number of consecutive NaNs, it will only 

9317 be partially filled. If method is not specified, this is the 

9318 maximum number of entries along the entire axis where NaNs will be 

9319 filled. Must be greater than 0 if not None. 

9320 fill_axis : {axes_single_arg}, default 0 

9321 Filling axis, method and limit. 

9322 broadcast_axis : {axes_single_arg}, default None 

9323 Broadcast values along this axis, if aligning two objects of 

9324 different dimensions. 

9325 

9326 Returns 

9327 ------- 

9328 (left, right) : ({klass}, type of other) 

9329 Aligned objects. 

9330 

9331 Examples 

9332 -------- 

9333 >>> df = pd.DataFrame( 

9334 ... [[1, 2, 3, 4], [6, 7, 8, 9]], columns=["D", "B", "E", "A"], index=[1, 2] 

9335 ... ) 

9336 >>> other = pd.DataFrame( 

9337 ... [[10, 20, 30, 40], [60, 70, 80, 90], [600, 700, 800, 900]], 

9338 ... columns=["A", "B", "C", "D"], 

9339 ... index=[2, 3, 4], 

9340 ... ) 

9341 >>> df 

9342 D B E A 

9343 1 1 2 3 4 

9344 2 6 7 8 9 

9345 >>> other 

9346 A B C D 

9347 2 10 20 30 40 

9348 3 60 70 80 90 

9349 4 600 700 800 900 

9350 

9351 Align on columns: 

9352 

9353 >>> left, right = df.align(other, join="outer", axis=1) 

9354 >>> left 

9355 A B C D E 

9356 1 4 2 NaN 1 3 

9357 2 9 7 NaN 6 8 

9358 >>> right 

9359 A B C D E 

9360 2 10 20 30 40 NaN 

9361 3 60 70 80 90 NaN 

9362 4 600 700 800 900 NaN 

9363 

9364 We can also align on the index: 

9365 

9366 >>> left, right = df.align(other, join="outer", axis=0) 

9367 >>> left 

9368 D B E A 

9369 1 1.0 2.0 3.0 4.0 

9370 2 6.0 7.0 8.0 9.0 

9371 3 NaN NaN NaN NaN 

9372 4 NaN NaN NaN NaN 

9373 >>> right 

9374 A B C D 

9375 1 NaN NaN NaN NaN 

9376 2 10.0 20.0 30.0 40.0 

9377 3 60.0 70.0 80.0 90.0 

9378 4 600.0 700.0 800.0 900.0 

9379 

9380 Finally, the default `axis=None` will align on both index and columns: 

9381 

9382 >>> left, right = df.align(other, join="outer", axis=None) 

9383 >>> left 

9384 A B C D E 

9385 1 4.0 2.0 NaN 1.0 3.0 

9386 2 9.0 7.0 NaN 6.0 8.0 

9387 3 NaN NaN NaN NaN NaN 

9388 4 NaN NaN NaN NaN NaN 

9389 >>> right 

9390 A B C D E 

9391 1 NaN NaN NaN NaN NaN 

9392 2 10.0 20.0 30.0 40.0 NaN 

9393 3 60.0 70.0 80.0 90.0 NaN 

9394 4 600.0 700.0 800.0 900.0 NaN 

9395 """ 

9396 

9397 method = missing.clean_fill_method(method) 

9398 

9399 if broadcast_axis == 1 and self.ndim != other.ndim: 

9400 if isinstance(self, ABCSeries): 

9401 # this means other is a DataFrame, and we need to broadcast 

9402 # self 

9403 cons = self._constructor_expanddim 

9404 df = cons( 

9405 {c: self for c in other.columns}, **other._construct_axes_dict() 

9406 ) 

9407 return df._align_frame( 

9408 other, 

9409 join=join, 

9410 axis=axis, 

9411 level=level, 

9412 copy=copy, 

9413 fill_value=fill_value, 

9414 method=method, 

9415 limit=limit, 

9416 fill_axis=fill_axis, 

9417 ) 

9418 elif isinstance(other, ABCSeries): 

9419 # this means self is a DataFrame, and we need to broadcast 

9420 # other 

9421 cons = other._constructor_expanddim 

9422 df = cons( 

9423 {c: other for c in self.columns}, **self._construct_axes_dict() 

9424 ) 

9425 return self._align_frame( 

9426 df, 

9427 join=join, 

9428 axis=axis, 

9429 level=level, 

9430 copy=copy, 

9431 fill_value=fill_value, 

9432 method=method, 

9433 limit=limit, 

9434 fill_axis=fill_axis, 

9435 ) 

9436 

9437 if axis is not None: 

9438 axis = self._get_axis_number(axis) 

9439 if isinstance(other, ABCDataFrame): 

9440 return self._align_frame( 

9441 other, 

9442 join=join, 

9443 axis=axis, 

9444 level=level, 

9445 copy=copy, 

9446 fill_value=fill_value, 

9447 method=method, 

9448 limit=limit, 

9449 fill_axis=fill_axis, 

9450 ) 

9451 elif isinstance(other, ABCSeries): 

9452 return self._align_series( 

9453 other, 

9454 join=join, 

9455 axis=axis, 

9456 level=level, 

9457 copy=copy, 

9458 fill_value=fill_value, 

9459 method=method, 

9460 limit=limit, 

9461 fill_axis=fill_axis, 

9462 ) 

9463 else: # pragma: no cover 

9464 raise TypeError(f"unsupported type: {type(other)}") 

9465 

9466 @final 

9467 def _align_frame( 

9468 self, 

9469 other, 

9470 join="outer", 

9471 axis=None, 

9472 level=None, 

9473 copy: bool_t = True, 

9474 fill_value=None, 

9475 method=None, 

9476 limit=None, 

9477 fill_axis=0, 

9478 ): 

9479 # defaults 

9480 join_index, join_columns = None, None 

9481 ilidx, iridx = None, None 

9482 clidx, cridx = None, None 

9483 

9484 is_series = isinstance(self, ABCSeries) 

9485 

9486 if (axis is None or axis == 0) and not self.index.equals(other.index): 

9487 join_index, ilidx, iridx = self.index.join( 

9488 other.index, how=join, level=level, return_indexers=True 

9489 ) 

9490 

9491 if ( 

9492 (axis is None or axis == 1) 

9493 and not is_series 

9494 and not self.columns.equals(other.columns) 

9495 ): 

9496 join_columns, clidx, cridx = self.columns.join( 

9497 other.columns, how=join, level=level, return_indexers=True 

9498 ) 

9499 

9500 if is_series: 

9501 reindexers = {0: [join_index, ilidx]} 

9502 else: 

9503 reindexers = {0: [join_index, ilidx], 1: [join_columns, clidx]} 

9504 

9505 left = self._reindex_with_indexers( 

9506 reindexers, copy=copy, fill_value=fill_value, allow_dups=True 

9507 ) 

9508 # other must be always DataFrame 

9509 right = other._reindex_with_indexers( 

9510 {0: [join_index, iridx], 1: [join_columns, cridx]}, 

9511 copy=copy, 

9512 fill_value=fill_value, 

9513 allow_dups=True, 

9514 ) 

9515 

9516 if method is not None: 

9517 _left = left.fillna(method=method, axis=fill_axis, limit=limit) 

9518 assert _left is not None # needed for mypy 

9519 left = _left 

9520 right = right.fillna(method=method, axis=fill_axis, limit=limit) 

9521 

9522 # if DatetimeIndex have different tz, convert to UTC 

9523 left, right = _align_as_utc(left, right, join_index) 

9524 

9525 return ( 

9526 left.__finalize__(self), 

9527 right.__finalize__(other), 

9528 ) 

9529 

9530 @final 

9531 def _align_series( 

9532 self, 

9533 other, 

9534 join="outer", 

9535 axis=None, 

9536 level=None, 

9537 copy: bool_t = True, 

9538 fill_value=None, 

9539 method=None, 

9540 limit=None, 

9541 fill_axis=0, 

9542 ): 

9543 

9544 is_series = isinstance(self, ABCSeries) 

9545 

9546 if (not is_series and axis is None) or axis not in [None, 0, 1]: 

9547 raise ValueError("Must specify axis=0 or 1") 

9548 

9549 if is_series and axis == 1: 

9550 raise ValueError("cannot align series to a series other than axis 0") 

9551 

9552 # series/series compat, other must always be a Series 

9553 if not axis: 

9554 

9555 # equal 

9556 if self.index.equals(other.index): 

9557 join_index, lidx, ridx = None, None, None 

9558 else: 

9559 join_index, lidx, ridx = self.index.join( 

9560 other.index, how=join, level=level, return_indexers=True 

9561 ) 

9562 

9563 if is_series: 

9564 left = self._reindex_indexer(join_index, lidx, copy) 

9565 elif lidx is None or join_index is None: 

9566 left = self.copy() if copy else self 

9567 else: 

9568 left = self._constructor( 

9569 self._mgr.reindex_indexer(join_index, lidx, axis=1, copy=copy) 

9570 ) 

9571 

9572 right = other._reindex_indexer(join_index, ridx, copy) 

9573 

9574 else: 

9575 

9576 # one has > 1 ndim 

9577 fdata = self._mgr 

9578 join_index = self.axes[1] 

9579 lidx, ridx = None, None 

9580 if not join_index.equals(other.index): 

9581 join_index, lidx, ridx = join_index.join( 

9582 other.index, how=join, level=level, return_indexers=True 

9583 ) 

9584 

9585 if lidx is not None: 

9586 bm_axis = self._get_block_manager_axis(1) 

9587 fdata = fdata.reindex_indexer(join_index, lidx, axis=bm_axis) 

9588 

9589 if copy and fdata is self._mgr: 

9590 fdata = fdata.copy() 

9591 

9592 left = self._constructor(fdata) 

9593 

9594 if ridx is None: 

9595 right = other 

9596 else: 

9597 right = other.reindex(join_index, level=level) 

9598 

9599 # fill 

9600 fill_na = notna(fill_value) or (method is not None) 

9601 if fill_na: 

9602 left = left.fillna(fill_value, method=method, limit=limit, axis=fill_axis) 

9603 right = right.fillna(fill_value, method=method, limit=limit) 

9604 

9605 # if DatetimeIndex have different tz, convert to UTC 

9606 if is_series or (not is_series and axis == 0): 

9607 left, right = _align_as_utc(left, right, join_index) 

9608 

9609 return ( 

9610 left.__finalize__(self), 

9611 right.__finalize__(other), 

9612 ) 

9613 

9614 @final 

9615 def _where( 

9616 self, 

9617 cond, 

9618 other=lib.no_default, 

9619 inplace=False, 

9620 axis=None, 

9621 level=None, 

9622 ): 

9623 """ 

9624 Equivalent to public method `where`, except that `other` is not 

9625 applied as a function even if callable. Used in __setitem__. 

9626 """ 

9627 inplace = validate_bool_kwarg(inplace, "inplace") 

9628 

9629 if axis is not None: 

9630 axis = self._get_axis_number(axis) 

9631 

9632 # align the cond to same shape as myself 

9633 cond = com.apply_if_callable(cond, self) 

9634 if isinstance(cond, NDFrame): 

9635 cond, _ = cond.align(self, join="right", broadcast_axis=1, copy=False) 

9636 else: 

9637 if not hasattr(cond, "shape"): 

9638 cond = np.asanyarray(cond) 

9639 if cond.shape != self.shape: 

9640 raise ValueError("Array conditional must be same shape as self") 

9641 cond = self._constructor(cond, **self._construct_axes_dict()) 

9642 

9643 # make sure we are boolean 

9644 fill_value = bool(inplace) 

9645 cond = cond.fillna(fill_value) 

9646 

9647 msg = "Boolean array expected for the condition, not {dtype}" 

9648 

9649 if not cond.empty: 

9650 if not isinstance(cond, ABCDataFrame): 

9651 # This is a single-dimensional object. 

9652 if not is_bool_dtype(cond): 

9653 raise ValueError(msg.format(dtype=cond.dtype)) 

9654 else: 

9655 for dt in cond.dtypes: 

9656 if not is_bool_dtype(dt): 

9657 raise ValueError(msg.format(dtype=dt)) 

9658 else: 

9659 # GH#21947 we have an empty DataFrame/Series, could be object-dtype 

9660 cond = cond.astype(bool) 

9661 

9662 cond = -cond if inplace else cond 

9663 cond = cond.reindex(self._info_axis, axis=self._info_axis_number, copy=False) 

9664 

9665 # try to align with other 

9666 if isinstance(other, NDFrame): 

9667 

9668 # align with me 

9669 if other.ndim <= self.ndim: 

9670 

9671 _, other = self.align( 

9672 other, 

9673 join="left", 

9674 axis=axis, 

9675 level=level, 

9676 fill_value=None, 

9677 copy=False, 

9678 ) 

9679 

9680 # if we are NOT aligned, raise as we cannot where index 

9681 if axis is None and not other._indexed_same(self): 

9682 raise InvalidIndexError 

9683 

9684 elif other.ndim < self.ndim: 

9685 # TODO(EA2D): avoid object-dtype cast in EA case GH#38729 

9686 other = other._values 

9687 if axis == 0: 

9688 other = np.reshape(other, (-1, 1)) 

9689 elif axis == 1: 

9690 other = np.reshape(other, (1, -1)) 

9691 

9692 other = np.broadcast_to(other, self.shape) 

9693 

9694 # slice me out of the other 

9695 else: 

9696 raise NotImplementedError( 

9697 "cannot align with a higher dimensional NDFrame" 

9698 ) 

9699 

9700 elif not isinstance(other, (MultiIndex, NDFrame)): 

9701 # mainly just catching Index here 

9702 other = extract_array(other, extract_numpy=True) 

9703 

9704 if isinstance(other, (np.ndarray, ExtensionArray)): 

9705 

9706 if other.shape != self.shape: 

9707 if self.ndim != 1: 

9708 # In the ndim == 1 case we may have 

9709 # other length 1, which we treat as scalar (GH#2745, GH#4192) 

9710 # or len(other) == icond.sum(), which we treat like 

9711 # __setitem__ (GH#3235) 

9712 raise ValueError( 

9713 "other must be the same shape as self when an ndarray" 

9714 ) 

9715 

9716 # we are the same shape, so create an actual object for alignment 

9717 else: 

9718 other = self._constructor(other, **self._construct_axes_dict()) 

9719 

9720 if axis is None: 

9721 axis = 0 

9722 

9723 if self.ndim == getattr(other, "ndim", 0): 

9724 align = True 

9725 else: 

9726 align = self._get_axis_number(axis) == 1 

9727 

9728 if inplace: 

9729 # we may have different type blocks come out of putmask, so 

9730 # reconstruct the block manager 

9731 

9732 self._check_inplace_setting(other) 

9733 new_data = self._mgr.putmask(mask=cond, new=other, align=align) 

9734 result = self._constructor(new_data) 

9735 return self._update_inplace(result) 

9736 

9737 else: 

9738 new_data = self._mgr.where( 

9739 other=other, 

9740 cond=cond, 

9741 align=align, 

9742 ) 

9743 result = self._constructor(new_data) 

9744 return result.__finalize__(self) 

9745 

9746 @overload 

9747 def where( 

9748 self: NDFrameT, 

9749 cond, 

9750 other=..., 

9751 *, 

9752 inplace: Literal[False] = ..., 

9753 axis: Axis | None = ..., 

9754 level: Level = ..., 

9755 errors: IgnoreRaise | lib.NoDefault = ..., 

9756 try_cast: bool_t | lib.NoDefault = ..., 

9757 ) -> NDFrameT: 

9758 ... 

9759 

9760 @overload 

9761 def where( 

9762 self, 

9763 cond, 

9764 other=..., 

9765 *, 

9766 inplace: Literal[True], 

9767 axis: Axis | None = ..., 

9768 level: Level = ..., 

9769 errors: IgnoreRaise | lib.NoDefault = ..., 

9770 try_cast: bool_t | lib.NoDefault = ..., 

9771 ) -> None: 

9772 ... 

9773 

9774 @overload 

9775 def where( 

9776 self: NDFrameT, 

9777 cond, 

9778 other=..., 

9779 *, 

9780 inplace: bool_t = ..., 

9781 axis: Axis | None = ..., 

9782 level: Level = ..., 

9783 errors: IgnoreRaise | lib.NoDefault = ..., 

9784 try_cast: bool_t | lib.NoDefault = ..., 

9785 ) -> NDFrameT | None: 

9786 ... 

9787 

9788 @deprecate_kwarg(old_arg_name="errors", new_arg_name=None) 

9789 @deprecate_nonkeyword_arguments( 

9790 version=None, allowed_args=["self", "cond", "other"] 

9791 ) 

9792 @doc( 

9793 klass=_shared_doc_kwargs["klass"], 

9794 cond="True", 

9795 cond_rev="False", 

9796 name="where", 

9797 name_other="mask", 

9798 ) 

9799 def where( 

9800 self: NDFrameT, 

9801 cond, 

9802 other=np.nan, 

9803 inplace: bool_t = False, 

9804 axis: Axis | None = None, 

9805 level: Level = None, 

9806 errors: IgnoreRaise | lib.NoDefault = "raise", 

9807 try_cast: bool_t | lib.NoDefault = lib.no_default, 

9808 ) -> NDFrameT | None: 

9809 """ 

9810 Replace values where the condition is {cond_rev}. 

9811 

9812 Parameters 

9813 ---------- 

9814 cond : bool {klass}, array-like, or callable 

9815 Where `cond` is {cond}, keep the original value. Where 

9816 {cond_rev}, replace with corresponding value from `other`. 

9817 If `cond` is callable, it is computed on the {klass} and 

9818 should return boolean {klass} or array. The callable must 

9819 not change input {klass} (though pandas doesn't check it). 

9820 other : scalar, {klass}, or callable 

9821 Entries where `cond` is {cond_rev} are replaced with 

9822 corresponding value from `other`. 

9823 If other is callable, it is computed on the {klass} and 

9824 should return scalar or {klass}. The callable must not 

9825 change input {klass} (though pandas doesn't check it). 

9826 inplace : bool, default False 

9827 Whether to perform the operation in place on the data. 

9828 axis : int, default None 

9829 Alignment axis if needed. For `Series` this parameter is 

9830 unused and defaults to 0. 

9831 level : int, default None 

9832 Alignment level if needed. 

9833 errors : str, {{'raise', 'ignore'}}, default 'raise' 

9834 Note that currently this parameter won't affect 

9835 the results and will always coerce to a suitable dtype. 

9836 

9837 - 'raise' : allow exceptions to be raised. 

9838 - 'ignore' : suppress exceptions. On error return original object. 

9839 

9840 .. deprecated:: 1.5.0 

9841 This argument had no effect. 

9842 

9843 try_cast : bool, default None 

9844 Try to cast the result back to the input type (if possible). 

9845 

9846 .. deprecated:: 1.3.0 

9847 Manually cast back if necessary. 

9848 

9849 Returns 

9850 ------- 

9851 Same type as caller or None if ``inplace=True``. 

9852 

9853 See Also 

9854 -------- 

9855 :func:`DataFrame.{name_other}` : Return an object of same shape as 

9856 self. 

9857 

9858 Notes 

9859 ----- 

9860 The {name} method is an application of the if-then idiom. For each 

9861 element in the calling DataFrame, if ``cond`` is ``{cond}`` the 

9862 element is used; otherwise the corresponding element from the DataFrame 

9863 ``other`` is used. If the axis of ``other`` does not align with axis of 

9864 ``cond`` {klass}, the misaligned index positions will be filled with 

9865 {cond_rev}. 

9866 

9867 The signature for :func:`DataFrame.where` differs from 

9868 :func:`numpy.where`. Roughly ``df1.where(m, df2)`` is equivalent to 

9869 ``np.where(m, df1, df2)``. 

9870 

9871 For further details and examples see the ``{name}`` documentation in 

9872 :ref:`indexing <indexing.where_mask>`. 

9873 

9874 The dtype of the object takes precedence. The fill value is casted to 

9875 the object's dtype, if this can be done losslessly. 

9876 

9877 Examples 

9878 -------- 

9879 >>> s = pd.Series(range(5)) 

9880 >>> s.where(s > 0) 

9881 0 NaN 

9882 1 1.0 

9883 2 2.0 

9884 3 3.0 

9885 4 4.0 

9886 dtype: float64 

9887 >>> s.mask(s > 0) 

9888 0 0.0 

9889 1 NaN 

9890 2 NaN 

9891 3 NaN 

9892 4 NaN 

9893 dtype: float64 

9894 

9895 >>> s = pd.Series(range(5)) 

9896 >>> t = pd.Series([True, False]) 

9897 >>> s.where(t, 99) 

9898 0 0 

9899 1 99 

9900 2 99 

9901 3 99 

9902 4 99 

9903 dtype: int64 

9904 >>> s.mask(t, 99) 

9905 0 99 

9906 1 1 

9907 2 99 

9908 3 99 

9909 4 99 

9910 dtype: int64 

9911 

9912 >>> s.where(s > 1, 10) 

9913 0 10 

9914 1 10 

9915 2 2 

9916 3 3 

9917 4 4 

9918 dtype: int64 

9919 >>> s.mask(s > 1, 10) 

9920 0 0 

9921 1 1 

9922 2 10 

9923 3 10 

9924 4 10 

9925 dtype: int64 

9926 

9927 >>> df = pd.DataFrame(np.arange(10).reshape(-1, 2), columns=['A', 'B']) 

9928 >>> df 

9929 A B 

9930 0 0 1 

9931 1 2 3 

9932 2 4 5 

9933 3 6 7 

9934 4 8 9 

9935 >>> m = df % 3 == 0 

9936 >>> df.where(m, -df) 

9937 A B 

9938 0 0 -1 

9939 1 -2 3 

9940 2 -4 -5 

9941 3 6 -7 

9942 4 -8 9 

9943 >>> df.where(m, -df) == np.where(m, df, -df) 

9944 A B 

9945 0 True True 

9946 1 True True 

9947 2 True True 

9948 3 True True 

9949 4 True True 

9950 >>> df.where(m, -df) == df.mask(~m, -df) 

9951 A B 

9952 0 True True 

9953 1 True True 

9954 2 True True 

9955 3 True True 

9956 4 True True 

9957 """ 

9958 other = com.apply_if_callable(other, self) 

9959 

9960 if try_cast is not lib.no_default: 

9961 warnings.warn( 

9962 "try_cast keyword is deprecated and will be removed in a " 

9963 "future version.", 

9964 FutureWarning, 

9965 stacklevel=find_stack_level(), 

9966 ) 

9967 

9968 return self._where(cond, other, inplace, axis, level) 

9969 

9970 @overload 

9971 def mask( 

9972 self: NDFrameT, 

9973 cond, 

9974 other=..., 

9975 *, 

9976 inplace: Literal[False] = ..., 

9977 axis: Axis | None = ..., 

9978 level: Level = ..., 

9979 errors: IgnoreRaise | lib.NoDefault = ..., 

9980 try_cast: bool_t | lib.NoDefault = ..., 

9981 ) -> NDFrameT: 

9982 ... 

9983 

9984 @overload 

9985 def mask( 

9986 self, 

9987 cond, 

9988 other=..., 

9989 *, 

9990 inplace: Literal[True], 

9991 axis: Axis | None = ..., 

9992 level: Level = ..., 

9993 errors: IgnoreRaise | lib.NoDefault = ..., 

9994 try_cast: bool_t | lib.NoDefault = ..., 

9995 ) -> None: 

9996 ... 

9997 

9998 @overload 

9999 def mask( 

10000 self: NDFrameT, 

10001 cond, 

10002 other=..., 

10003 *, 

10004 inplace: bool_t = ..., 

10005 axis: Axis | None = ..., 

10006 level: Level = ..., 

10007 errors: IgnoreRaise | lib.NoDefault = ..., 

10008 try_cast: bool_t | lib.NoDefault = ..., 

10009 ) -> NDFrameT | None: 

10010 ... 

10011 

10012 @deprecate_kwarg(old_arg_name="errors", new_arg_name=None) 

10013 @deprecate_nonkeyword_arguments( 

10014 version=None, allowed_args=["self", "cond", "other"] 

10015 ) 

10016 @doc( 

10017 where, 

10018 klass=_shared_doc_kwargs["klass"], 

10019 cond="False", 

10020 cond_rev="True", 

10021 name="mask", 

10022 name_other="where", 

10023 ) 

10024 def mask( 

10025 self: NDFrameT, 

10026 cond, 

10027 other=np.nan, 

10028 inplace: bool_t = False, 

10029 axis: Axis | None = None, 

10030 level: Level = None, 

10031 errors: IgnoreRaise | lib.NoDefault = "raise", 

10032 try_cast: bool_t | lib.NoDefault = lib.no_default, 

10033 ) -> NDFrameT | None: 

10034 

10035 inplace = validate_bool_kwarg(inplace, "inplace") 

10036 cond = com.apply_if_callable(cond, self) 

10037 

10038 if try_cast is not lib.no_default: 

10039 warnings.warn( 

10040 "try_cast keyword is deprecated and will be removed in a " 

10041 "future version.", 

10042 FutureWarning, 

10043 stacklevel=find_stack_level(), 

10044 ) 

10045 

10046 # see gh-21891 

10047 if not hasattr(cond, "__invert__"): 

10048 cond = np.array(cond) 

10049 

10050 return self.where( 

10051 ~cond, 

10052 other=other, 

10053 inplace=inplace, 

10054 axis=axis, 

10055 level=level, 

10056 ) 

10057 

10058 @doc(klass=_shared_doc_kwargs["klass"]) 

10059 def shift( 

10060 self: NDFrameT, 

10061 periods: int = 1, 

10062 freq=None, 

10063 axis: Axis = 0, 

10064 fill_value: Hashable = None, 

10065 ) -> NDFrameT: 

10066 """ 

10067 Shift index by desired number of periods with an optional time `freq`. 

10068 

10069 When `freq` is not passed, shift the index without realigning the data. 

10070 If `freq` is passed (in this case, the index must be date or datetime, 

10071 or it will raise a `NotImplementedError`), the index will be 

10072 increased using the periods and the `freq`. `freq` can be inferred 

10073 when specified as "infer" as long as either freq or inferred_freq 

10074 attribute is set in the index. 

10075 

10076 Parameters 

10077 ---------- 

10078 periods : int 

10079 Number of periods to shift. Can be positive or negative. 

10080 freq : DateOffset, tseries.offsets, timedelta, or str, optional 

10081 Offset to use from the tseries module or time rule (e.g. 'EOM'). 

10082 If `freq` is specified then the index values are shifted but the 

10083 data is not realigned. That is, use `freq` if you would like to 

10084 extend the index when shifting and preserve the original data. 

10085 If `freq` is specified as "infer" then it will be inferred from 

10086 the freq or inferred_freq attributes of the index. If neither of 

10087 those attributes exist, a ValueError is thrown. 

10088 axis : {{0 or 'index', 1 or 'columns', None}}, default None 

10089 Shift direction. For `Series` this parameter is unused and defaults to 0. 

10090 fill_value : object, optional 

10091 The scalar value to use for newly introduced missing values. 

10092 the default depends on the dtype of `self`. 

10093 For numeric data, ``np.nan`` is used. 

10094 For datetime, timedelta, or period data, etc. :attr:`NaT` is used. 

10095 For extension dtypes, ``self.dtype.na_value`` is used. 

10096 

10097 .. versionchanged:: 1.1.0 

10098 

10099 Returns 

10100 ------- 

10101 {klass} 

10102 Copy of input object, shifted. 

10103 

10104 See Also 

10105 -------- 

10106 Index.shift : Shift values of Index. 

10107 DatetimeIndex.shift : Shift values of DatetimeIndex. 

10108 PeriodIndex.shift : Shift values of PeriodIndex. 

10109 tshift : Shift the time index, using the index's frequency if 

10110 available. 

10111 

10112 Examples 

10113 -------- 

10114 >>> df = pd.DataFrame({{"Col1": [10, 20, 15, 30, 45], 

10115 ... "Col2": [13, 23, 18, 33, 48], 

10116 ... "Col3": [17, 27, 22, 37, 52]}}, 

10117 ... index=pd.date_range("2020-01-01", "2020-01-05")) 

10118 >>> df 

10119 Col1 Col2 Col3 

10120 2020-01-01 10 13 17 

10121 2020-01-02 20 23 27 

10122 2020-01-03 15 18 22 

10123 2020-01-04 30 33 37 

10124 2020-01-05 45 48 52 

10125 

10126 >>> df.shift(periods=3) 

10127 Col1 Col2 Col3 

10128 2020-01-01 NaN NaN NaN 

10129 2020-01-02 NaN NaN NaN 

10130 2020-01-03 NaN NaN NaN 

10131 2020-01-04 10.0 13.0 17.0 

10132 2020-01-05 20.0 23.0 27.0 

10133 

10134 >>> df.shift(periods=1, axis="columns") 

10135 Col1 Col2 Col3 

10136 2020-01-01 NaN 10 13 

10137 2020-01-02 NaN 20 23 

10138 2020-01-03 NaN 15 18 

10139 2020-01-04 NaN 30 33 

10140 2020-01-05 NaN 45 48 

10141 

10142 >>> df.shift(periods=3, fill_value=0) 

10143 Col1 Col2 Col3 

10144 2020-01-01 0 0 0 

10145 2020-01-02 0 0 0 

10146 2020-01-03 0 0 0 

10147 2020-01-04 10 13 17 

10148 2020-01-05 20 23 27 

10149 

10150 >>> df.shift(periods=3, freq="D") 

10151 Col1 Col2 Col3 

10152 2020-01-04 10 13 17 

10153 2020-01-05 20 23 27 

10154 2020-01-06 15 18 22 

10155 2020-01-07 30 33 37 

10156 2020-01-08 45 48 52 

10157 

10158 >>> df.shift(periods=3, freq="infer") 

10159 Col1 Col2 Col3 

10160 2020-01-04 10 13 17 

10161 2020-01-05 20 23 27 

10162 2020-01-06 15 18 22 

10163 2020-01-07 30 33 37 

10164 2020-01-08 45 48 52 

10165 """ 

10166 if periods == 0: 

10167 return self.copy() 

10168 

10169 if freq is None: 

10170 # when freq is None, data is shifted, index is not 

10171 axis = self._get_axis_number(axis) 

10172 new_data = self._mgr.shift( 

10173 periods=periods, axis=axis, fill_value=fill_value 

10174 ) 

10175 return self._constructor(new_data).__finalize__(self, method="shift") 

10176 

10177 # when freq is given, index is shifted, data is not 

10178 index = self._get_axis(axis) 

10179 

10180 if freq == "infer": 

10181 freq = getattr(index, "freq", None) 

10182 

10183 if freq is None: 

10184 freq = getattr(index, "inferred_freq", None) 

10185 

10186 if freq is None: 

10187 msg = "Freq was not set in the index hence cannot be inferred" 

10188 raise ValueError(msg) 

10189 

10190 elif isinstance(freq, str): 

10191 freq = to_offset(freq) 

10192 

10193 if isinstance(index, PeriodIndex): 

10194 orig_freq = to_offset(index.freq) 

10195 if freq != orig_freq: 

10196 assert orig_freq is not None # for mypy 

10197 raise ValueError( 

10198 f"Given freq {freq.rule_code} does not match " 

10199 f"PeriodIndex freq {orig_freq.rule_code}" 

10200 ) 

10201 new_ax = index.shift(periods) 

10202 else: 

10203 new_ax = index.shift(periods, freq) 

10204 

10205 result = self.set_axis(new_ax, axis=axis) 

10206 return result.__finalize__(self, method="shift") 

10207 

10208 @final 

10209 def slice_shift(self: NDFrameT, periods: int = 1, axis=0) -> NDFrameT: 

10210 """ 

10211 Equivalent to `shift` without copying data. 

10212 

10213 .. deprecated:: 1.2.0 

10214 slice_shift is deprecated, 

10215 use DataFrame/Series.shift instead. 

10216 

10217 The shifted data will not include the dropped periods and the 

10218 shifted axis will be smaller than the original. 

10219 

10220 Parameters 

10221 ---------- 

10222 periods : int 

10223 Number of periods to move, can be positive or negative. 

10224 axis : {0 or 'index', 1 or 'columns', None}, default 0 

10225 For `Series` this parameter is unused and defaults to 0. 

10226 

10227 Returns 

10228 ------- 

10229 shifted : same type as caller 

10230 

10231 Notes 

10232 ----- 

10233 While the `slice_shift` is faster than `shift`, you may pay for it 

10234 later during alignment. 

10235 """ 

10236 

10237 msg = ( 

10238 "The 'slice_shift' method is deprecated " 

10239 "and will be removed in a future version. " 

10240 "You can use DataFrame/Series.shift instead." 

10241 ) 

10242 warnings.warn(msg, FutureWarning, stacklevel=find_stack_level()) 

10243 

10244 if periods == 0: 

10245 return self 

10246 

10247 if periods > 0: 

10248 vslicer = slice(None, -periods) 

10249 islicer = slice(periods, None) 

10250 else: 

10251 vslicer = slice(-periods, None) 

10252 islicer = slice(None, periods) 

10253 

10254 new_obj = self._slice(vslicer, axis=axis) 

10255 shifted_axis = self._get_axis(axis)[islicer] 

10256 new_obj = new_obj.set_axis(shifted_axis, axis=axis, copy=False) 

10257 return new_obj.__finalize__(self, method="slice_shift") 

10258 

10259 @final 

10260 def tshift(self: NDFrameT, periods: int = 1, freq=None, axis: Axis = 0) -> NDFrameT: 

10261 """ 

10262 Shift the time index, using the index's frequency if available. 

10263 

10264 .. deprecated:: 1.1.0 

10265 Use `shift` instead. 

10266 

10267 Parameters 

10268 ---------- 

10269 periods : int 

10270 Number of periods to move, can be positive or negative. 

10271 freq : DateOffset, timedelta, or str, default None 

10272 Increment to use from the tseries module 

10273 or time rule expressed as a string (e.g. 'EOM'). 

10274 axis : {0 or ‘index’, 1 or ‘columns’, None}, default 0 

10275 Corresponds to the axis that contains the Index. 

10276 For `Series` this parameter is unused and defaults to 0. 

10277 

10278 Returns 

10279 ------- 

10280 shifted : Series/DataFrame 

10281 

10282 Notes 

10283 ----- 

10284 If freq is not specified then tries to use the freq or inferred_freq 

10285 attributes of the index. If neither of those attributes exist, a 

10286 ValueError is thrown 

10287 """ 

10288 warnings.warn( 

10289 ( 

10290 "tshift is deprecated and will be removed in a future version. " 

10291 "Please use shift instead." 

10292 ), 

10293 FutureWarning, 

10294 stacklevel=find_stack_level(), 

10295 ) 

10296 

10297 if freq is None: 

10298 freq = "infer" 

10299 

10300 return self.shift(periods, freq, axis) 

10301 

10302 def truncate( 

10303 self: NDFrameT, before=None, after=None, axis=None, copy: bool_t = True 

10304 ) -> NDFrameT: 

10305 """ 

10306 Truncate a Series or DataFrame before and after some index value. 

10307 

10308 This is a useful shorthand for boolean indexing based on index 

10309 values above or below certain thresholds. 

10310 

10311 Parameters 

10312 ---------- 

10313 before : date, str, int 

10314 Truncate all rows before this index value. 

10315 after : date, str, int 

10316 Truncate all rows after this index value. 

10317 axis : {0 or 'index', 1 or 'columns'}, optional 

10318 Axis to truncate. Truncates the index (rows) by default. 

10319 For `Series` this parameter is unused and defaults to 0. 

10320 copy : bool, default is True, 

10321 Return a copy of the truncated section. 

10322 

10323 Returns 

10324 ------- 

10325 type of caller 

10326 The truncated Series or DataFrame. 

10327 

10328 See Also 

10329 -------- 

10330 DataFrame.loc : Select a subset of a DataFrame by label. 

10331 DataFrame.iloc : Select a subset of a DataFrame by position. 

10332 

10333 Notes 

10334 ----- 

10335 If the index being truncated contains only datetime values, 

10336 `before` and `after` may be specified as strings instead of 

10337 Timestamps. 

10338 

10339 Examples 

10340 -------- 

10341 >>> df = pd.DataFrame({'A': ['a', 'b', 'c', 'd', 'e'], 

10342 ... 'B': ['f', 'g', 'h', 'i', 'j'], 

10343 ... 'C': ['k', 'l', 'm', 'n', 'o']}, 

10344 ... index=[1, 2, 3, 4, 5]) 

10345 >>> df 

10346 A B C 

10347 1 a f k 

10348 2 b g l 

10349 3 c h m 

10350 4 d i n 

10351 5 e j o 

10352 

10353 >>> df.truncate(before=2, after=4) 

10354 A B C 

10355 2 b g l 

10356 3 c h m 

10357 4 d i n 

10358 

10359 The columns of a DataFrame can be truncated. 

10360 

10361 >>> df.truncate(before="A", after="B", axis="columns") 

10362 A B 

10363 1 a f 

10364 2 b g 

10365 3 c h 

10366 4 d i 

10367 5 e j 

10368 

10369 For Series, only rows can be truncated. 

10370 

10371 >>> df['A'].truncate(before=2, after=4) 

10372 2 b 

10373 3 c 

10374 4 d 

10375 Name: A, dtype: object 

10376 

10377 The index values in ``truncate`` can be datetimes or string 

10378 dates. 

10379 

10380 >>> dates = pd.date_range('2016-01-01', '2016-02-01', freq='s') 

10381 >>> df = pd.DataFrame(index=dates, data={'A': 1}) 

10382 >>> df.tail() 

10383 A 

10384 2016-01-31 23:59:56 1 

10385 2016-01-31 23:59:57 1 

10386 2016-01-31 23:59:58 1 

10387 2016-01-31 23:59:59 1 

10388 2016-02-01 00:00:00 1 

10389 

10390 >>> df.truncate(before=pd.Timestamp('2016-01-05'), 

10391 ... after=pd.Timestamp('2016-01-10')).tail() 

10392 A 

10393 2016-01-09 23:59:56 1 

10394 2016-01-09 23:59:57 1 

10395 2016-01-09 23:59:58 1 

10396 2016-01-09 23:59:59 1 

10397 2016-01-10 00:00:00 1 

10398 

10399 Because the index is a DatetimeIndex containing only dates, we can 

10400 specify `before` and `after` as strings. They will be coerced to 

10401 Timestamps before truncation. 

10402 

10403 >>> df.truncate('2016-01-05', '2016-01-10').tail() 

10404 A 

10405 2016-01-09 23:59:56 1 

10406 2016-01-09 23:59:57 1 

10407 2016-01-09 23:59:58 1 

10408 2016-01-09 23:59:59 1 

10409 2016-01-10 00:00:00 1 

10410 

10411 Note that ``truncate`` assumes a 0 value for any unspecified time 

10412 component (midnight). This differs from partial string slicing, which 

10413 returns any partially matching dates. 

10414 

10415 >>> df.loc['2016-01-05':'2016-01-10', :].tail() 

10416 A 

10417 2016-01-10 23:59:55 1 

10418 2016-01-10 23:59:56 1 

10419 2016-01-10 23:59:57 1 

10420 2016-01-10 23:59:58 1 

10421 2016-01-10 23:59:59 1 

10422 """ 

10423 if axis is None: 

10424 axis = self._stat_axis_number 

10425 axis = self._get_axis_number(axis) 

10426 ax = self._get_axis(axis) 

10427 

10428 # GH 17935 

10429 # Check that index is sorted 

10430 if not ax.is_monotonic_increasing and not ax.is_monotonic_decreasing: 

10431 raise ValueError("truncate requires a sorted index") 

10432 

10433 # if we have a date index, convert to dates, otherwise 

10434 # treat like a slice 

10435 if ax._is_all_dates: 

10436 from pandas.core.tools.datetimes import to_datetime 

10437 

10438 before = to_datetime(before) 

10439 after = to_datetime(after) 

10440 

10441 if before is not None and after is not None and before > after: 

10442 raise ValueError(f"Truncate: {after} must be after {before}") 

10443 

10444 if len(ax) > 1 and ax.is_monotonic_decreasing and ax.nunique() > 1: 

10445 before, after = after, before 

10446 

10447 slicer = [slice(None, None)] * self._AXIS_LEN 

10448 slicer[axis] = slice(before, after) 

10449 result = self.loc[tuple(slicer)] 

10450 

10451 if isinstance(ax, MultiIndex): 

10452 setattr(result, self._get_axis_name(axis), ax.truncate(before, after)) 

10453 

10454 if copy: 

10455 result = result.copy() 

10456 

10457 return result 

10458 

10459 @final 

10460 @doc(klass=_shared_doc_kwargs["klass"]) 

10461 def tz_convert( 

10462 self: NDFrameT, tz, axis=0, level=None, copy: bool_t = True 

10463 ) -> NDFrameT: 

10464 """ 

10465 Convert tz-aware axis to target time zone. 

10466 

10467 Parameters 

10468 ---------- 

10469 tz : str or tzinfo object 

10470 axis : the axis to convert 

10471 level : int, str, default None 

10472 If axis is a MultiIndex, convert a specific level. Otherwise 

10473 must be None. 

10474 copy : bool, default True 

10475 Also make a copy of the underlying data. 

10476 

10477 Returns 

10478 ------- 

10479 {klass} 

10480 Object with time zone converted axis. 

10481 

10482 Raises 

10483 ------ 

10484 TypeError 

10485 If the axis is tz-naive. 

10486 """ 

10487 axis = self._get_axis_number(axis) 

10488 ax = self._get_axis(axis) 

10489 

10490 def _tz_convert(ax, tz): 

10491 if not hasattr(ax, "tz_convert"): 

10492 if len(ax) > 0: 

10493 ax_name = self._get_axis_name(axis) 

10494 raise TypeError( 

10495 f"{ax_name} is not a valid DatetimeIndex or PeriodIndex" 

10496 ) 

10497 else: 

10498 ax = DatetimeIndex([], tz=tz) 

10499 else: 

10500 ax = ax.tz_convert(tz) 

10501 return ax 

10502 

10503 # if a level is given it must be a MultiIndex level or 

10504 # equivalent to the axis name 

10505 if isinstance(ax, MultiIndex): 

10506 level = ax._get_level_number(level) 

10507 new_level = _tz_convert(ax.levels[level], tz) 

10508 ax = ax.set_levels(new_level, level=level) 

10509 else: 

10510 if level not in (None, 0, ax.name): 

10511 raise ValueError(f"The level {level} is not valid") 

10512 ax = _tz_convert(ax, tz) 

10513 

10514 result = self.copy(deep=copy) 

10515 result = result.set_axis(ax, axis=axis, copy=False) 

10516 return result.__finalize__(self, method="tz_convert") 

10517 

10518 @final 

10519 @doc(klass=_shared_doc_kwargs["klass"]) 

10520 def tz_localize( 

10521 self: NDFrameT, 

10522 tz, 

10523 axis=0, 

10524 level=None, 

10525 copy: bool_t = True, 

10526 ambiguous="raise", 

10527 nonexistent: str = "raise", 

10528 ) -> NDFrameT: 

10529 """ 

10530 Localize tz-naive index of a Series or DataFrame to target time zone. 

10531 

10532 This operation localizes the Index. To localize the values in a 

10533 timezone-naive Series, use :meth:`Series.dt.tz_localize`. 

10534 

10535 Parameters 

10536 ---------- 

10537 tz : str or tzinfo 

10538 axis : the axis to localize 

10539 level : int, str, default None 

10540 If axis ia a MultiIndex, localize a specific level. Otherwise 

10541 must be None. 

10542 copy : bool, default True 

10543 Also make a copy of the underlying data. 

10544 ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise' 

10545 When clocks moved backward due to DST, ambiguous times may arise. 

10546 For example in Central European Time (UTC+01), when going from 

10547 03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at 

10548 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the 

10549 `ambiguous` parameter dictates how ambiguous times should be 

10550 handled. 

10551 

10552 - 'infer' will attempt to infer fall dst-transition hours based on 

10553 order 

10554 - bool-ndarray where True signifies a DST time, False designates 

10555 a non-DST time (note that this flag is only applicable for 

10556 ambiguous times) 

10557 - 'NaT' will return NaT where there are ambiguous times 

10558 - 'raise' will raise an AmbiguousTimeError if there are ambiguous 

10559 times. 

10560 nonexistent : str, default 'raise' 

10561 A nonexistent time does not exist in a particular timezone 

10562 where clocks moved forward due to DST. Valid values are: 

10563 

10564 - 'shift_forward' will shift the nonexistent time forward to the 

10565 closest existing time 

10566 - 'shift_backward' will shift the nonexistent time backward to the 

10567 closest existing time 

10568 - 'NaT' will return NaT where there are nonexistent times 

10569 - timedelta objects will shift nonexistent times by the timedelta 

10570 - 'raise' will raise an NonExistentTimeError if there are 

10571 nonexistent times. 

10572 

10573 Returns 

10574 ------- 

10575 {klass} 

10576 Same type as the input. 

10577 

10578 Raises 

10579 ------ 

10580 TypeError 

10581 If the TimeSeries is tz-aware and tz is not None. 

10582 

10583 Examples 

10584 -------- 

10585 Localize local times: 

10586 

10587 >>> s = pd.Series([1], 

10588 ... index=pd.DatetimeIndex(['2018-09-15 01:30:00'])) 

10589 >>> s.tz_localize('CET') 

10590 2018-09-15 01:30:00+02:00 1 

10591 dtype: int64 

10592 

10593 Be careful with DST changes. When there is sequential data, pandas 

10594 can infer the DST time: 

10595 

10596 >>> s = pd.Series(range(7), 

10597 ... index=pd.DatetimeIndex(['2018-10-28 01:30:00', 

10598 ... '2018-10-28 02:00:00', 

10599 ... '2018-10-28 02:30:00', 

10600 ... '2018-10-28 02:00:00', 

10601 ... '2018-10-28 02:30:00', 

10602 ... '2018-10-28 03:00:00', 

10603 ... '2018-10-28 03:30:00'])) 

10604 >>> s.tz_localize('CET', ambiguous='infer') 

10605 2018-10-28 01:30:00+02:00 0 

10606 2018-10-28 02:00:00+02:00 1 

10607 2018-10-28 02:30:00+02:00 2 

10608 2018-10-28 02:00:00+01:00 3 

10609 2018-10-28 02:30:00+01:00 4 

10610 2018-10-28 03:00:00+01:00 5 

10611 2018-10-28 03:30:00+01:00 6 

10612 dtype: int64 

10613 

10614 In some cases, inferring the DST is impossible. In such cases, you can 

10615 pass an ndarray to the ambiguous parameter to set the DST explicitly 

10616 

10617 >>> s = pd.Series(range(3), 

10618 ... index=pd.DatetimeIndex(['2018-10-28 01:20:00', 

10619 ... '2018-10-28 02:36:00', 

10620 ... '2018-10-28 03:46:00'])) 

10621 >>> s.tz_localize('CET', ambiguous=np.array([True, True, False])) 

10622 2018-10-28 01:20:00+02:00 0 

10623 2018-10-28 02:36:00+02:00 1 

10624 2018-10-28 03:46:00+01:00 2 

10625 dtype: int64 

10626 

10627 If the DST transition causes nonexistent times, you can shift these 

10628 dates forward or backward with a timedelta object or `'shift_forward'` 

10629 or `'shift_backward'`. 

10630 

10631 >>> s = pd.Series(range(2), 

10632 ... index=pd.DatetimeIndex(['2015-03-29 02:30:00', 

10633 ... '2015-03-29 03:30:00'])) 

10634 >>> s.tz_localize('Europe/Warsaw', nonexistent='shift_forward') 

10635 2015-03-29 03:00:00+02:00 0 

10636 2015-03-29 03:30:00+02:00 1 

10637 dtype: int64 

10638 >>> s.tz_localize('Europe/Warsaw', nonexistent='shift_backward') 

10639 2015-03-29 01:59:59.999999999+01:00 0 

10640 2015-03-29 03:30:00+02:00 1 

10641 dtype: int64 

10642 >>> s.tz_localize('Europe/Warsaw', nonexistent=pd.Timedelta('1H')) 

10643 2015-03-29 03:30:00+02:00 0 

10644 2015-03-29 03:30:00+02:00 1 

10645 dtype: int64 

10646 """ 

10647 nonexistent_options = ("raise", "NaT", "shift_forward", "shift_backward") 

10648 if nonexistent not in nonexistent_options and not isinstance( 

10649 nonexistent, timedelta 

10650 ): 

10651 raise ValueError( 

10652 "The nonexistent argument must be one of 'raise', " 

10653 "'NaT', 'shift_forward', 'shift_backward' or " 

10654 "a timedelta object" 

10655 ) 

10656 

10657 axis = self._get_axis_number(axis) 

10658 ax = self._get_axis(axis) 

10659 

10660 def _tz_localize(ax, tz, ambiguous, nonexistent): 

10661 if not hasattr(ax, "tz_localize"): 

10662 if len(ax) > 0: 

10663 ax_name = self._get_axis_name(axis) 

10664 raise TypeError( 

10665 f"{ax_name} is not a valid DatetimeIndex or PeriodIndex" 

10666 ) 

10667 else: 

10668 ax = DatetimeIndex([], tz=tz) 

10669 else: 

10670 ax = ax.tz_localize(tz, ambiguous=ambiguous, nonexistent=nonexistent) 

10671 return ax 

10672 

10673 # if a level is given it must be a MultiIndex level or 

10674 # equivalent to the axis name 

10675 if isinstance(ax, MultiIndex): 

10676 level = ax._get_level_number(level) 

10677 new_level = _tz_localize(ax.levels[level], tz, ambiguous, nonexistent) 

10678 ax = ax.set_levels(new_level, level=level) 

10679 else: 

10680 if level not in (None, 0, ax.name): 

10681 raise ValueError(f"The level {level} is not valid") 

10682 ax = _tz_localize(ax, tz, ambiguous, nonexistent) 

10683 

10684 result = self.copy(deep=copy) 

10685 result = result.set_axis(ax, axis=axis, copy=False) 

10686 return result.__finalize__(self, method="tz_localize") 

10687 

10688 # ---------------------------------------------------------------------- 

10689 # Numeric Methods 

10690 

10691 @final 

10692 def describe( 

10693 self: NDFrameT, 

10694 percentiles=None, 

10695 include=None, 

10696 exclude=None, 

10697 datetime_is_numeric: bool_t = False, 

10698 ) -> NDFrameT: 

10699 """ 

10700 Generate descriptive statistics. 

10701 

10702 Descriptive statistics include those that summarize the central 

10703 tendency, dispersion and shape of a 

10704 dataset's distribution, excluding ``NaN`` values. 

10705 

10706 Analyzes both numeric and object series, as well 

10707 as ``DataFrame`` column sets of mixed data types. The output 

10708 will vary depending on what is provided. Refer to the notes 

10709 below for more detail. 

10710 

10711 Parameters 

10712 ---------- 

10713 percentiles : list-like of numbers, optional 

10714 The percentiles to include in the output. All should 

10715 fall between 0 and 1. The default is 

10716 ``[.25, .5, .75]``, which returns the 25th, 50th, and 

10717 75th percentiles. 

10718 include : 'all', list-like of dtypes or None (default), optional 

10719 A white list of data types to include in the result. Ignored 

10720 for ``Series``. Here are the options: 

10721 

10722 - 'all' : All columns of the input will be included in the output. 

10723 - A list-like of dtypes : Limits the results to the 

10724 provided data types. 

10725 To limit the result to numeric types submit 

10726 ``numpy.number``. To limit it instead to object columns submit 

10727 the ``numpy.object`` data type. Strings 

10728 can also be used in the style of 

10729 ``select_dtypes`` (e.g. ``df.describe(include=['O'])``). To 

10730 select pandas categorical columns, use ``'category'`` 

10731 - None (default) : The result will include all numeric columns. 

10732 exclude : list-like of dtypes or None (default), optional, 

10733 A black list of data types to omit from the result. Ignored 

10734 for ``Series``. Here are the options: 

10735 

10736 - A list-like of dtypes : Excludes the provided data types 

10737 from the result. To exclude numeric types submit 

10738 ``numpy.number``. To exclude object columns submit the data 

10739 type ``numpy.object``. Strings can also be used in the style of 

10740 ``select_dtypes`` (e.g. ``df.describe(exclude=['O'])``). To 

10741 exclude pandas categorical columns, use ``'category'`` 

10742 - None (default) : The result will exclude nothing. 

10743 datetime_is_numeric : bool, default False 

10744 Whether to treat datetime dtypes as numeric. This affects statistics 

10745 calculated for the column. For DataFrame input, this also 

10746 controls whether datetime columns are included by default. 

10747 

10748 .. versionadded:: 1.1.0 

10749 

10750 Returns 

10751 ------- 

10752 Series or DataFrame 

10753 Summary statistics of the Series or Dataframe provided. 

10754 

10755 See Also 

10756 -------- 

10757 DataFrame.count: Count number of non-NA/null observations. 

10758 DataFrame.max: Maximum of the values in the object. 

10759 DataFrame.min: Minimum of the values in the object. 

10760 DataFrame.mean: Mean of the values. 

10761 DataFrame.std: Standard deviation of the observations. 

10762 DataFrame.select_dtypes: Subset of a DataFrame including/excluding 

10763 columns based on their dtype. 

10764 

10765 Notes 

10766 ----- 

10767 For numeric data, the result's index will include ``count``, 

10768 ``mean``, ``std``, ``min``, ``max`` as well as lower, ``50`` and 

10769 upper percentiles. By default the lower percentile is ``25`` and the 

10770 upper percentile is ``75``. The ``50`` percentile is the 

10771 same as the median. 

10772 

10773 For object data (e.g. strings or timestamps), the result's index 

10774 will include ``count``, ``unique``, ``top``, and ``freq``. The ``top`` 

10775 is the most common value. The ``freq`` is the most common value's 

10776 frequency. Timestamps also include the ``first`` and ``last`` items. 

10777 

10778 If multiple object values have the highest count, then the 

10779 ``count`` and ``top`` results will be arbitrarily chosen from 

10780 among those with the highest count. 

10781 

10782 For mixed data types provided via a ``DataFrame``, the default is to 

10783 return only an analysis of numeric columns. If the dataframe consists 

10784 only of object and categorical data without any numeric columns, the 

10785 default is to return an analysis of both the object and categorical 

10786 columns. If ``include='all'`` is provided as an option, the result 

10787 will include a union of attributes of each type. 

10788 

10789 The `include` and `exclude` parameters can be used to limit 

10790 which columns in a ``DataFrame`` are analyzed for the output. 

10791 The parameters are ignored when analyzing a ``Series``. 

10792 

10793 Examples 

10794 -------- 

10795 Describing a numeric ``Series``. 

10796 

10797 >>> s = pd.Series([1, 2, 3]) 

10798 >>> s.describe() 

10799 count 3.0 

10800 mean 2.0 

10801 std 1.0 

10802 min 1.0 

10803 25% 1.5 

10804 50% 2.0 

10805 75% 2.5 

10806 max 3.0 

10807 dtype: float64 

10808 

10809 Describing a categorical ``Series``. 

10810 

10811 >>> s = pd.Series(['a', 'a', 'b', 'c']) 

10812 >>> s.describe() 

10813 count 4 

10814 unique 3 

10815 top a 

10816 freq 2 

10817 dtype: object 

10818 

10819 Describing a timestamp ``Series``. 

10820 

10821 >>> s = pd.Series([ 

10822 ... np.datetime64("2000-01-01"), 

10823 ... np.datetime64("2010-01-01"), 

10824 ... np.datetime64("2010-01-01") 

10825 ... ]) 

10826 >>> s.describe(datetime_is_numeric=True) 

10827 count 3 

10828 mean 2006-09-01 08:00:00 

10829 min 2000-01-01 00:00:00 

10830 25% 2004-12-31 12:00:00 

10831 50% 2010-01-01 00:00:00 

10832 75% 2010-01-01 00:00:00 

10833 max 2010-01-01 00:00:00 

10834 dtype: object 

10835 

10836 Describing a ``DataFrame``. By default only numeric fields 

10837 are returned. 

10838 

10839 >>> df = pd.DataFrame({'categorical': pd.Categorical(['d','e','f']), 

10840 ... 'numeric': [1, 2, 3], 

10841 ... 'object': ['a', 'b', 'c'] 

10842 ... }) 

10843 >>> df.describe() 

10844 numeric 

10845 count 3.0 

10846 mean 2.0 

10847 std 1.0 

10848 min 1.0 

10849 25% 1.5 

10850 50% 2.0 

10851 75% 2.5 

10852 max 3.0 

10853 

10854 Describing all columns of a ``DataFrame`` regardless of data type. 

10855 

10856 >>> df.describe(include='all') # doctest: +SKIP 

10857 categorical numeric object 

10858 count 3 3.0 3 

10859 unique 3 NaN 3 

10860 top f NaN a 

10861 freq 1 NaN 1 

10862 mean NaN 2.0 NaN 

10863 std NaN 1.0 NaN 

10864 min NaN 1.0 NaN 

10865 25% NaN 1.5 NaN 

10866 50% NaN 2.0 NaN 

10867 75% NaN 2.5 NaN 

10868 max NaN 3.0 NaN 

10869 

10870 Describing a column from a ``DataFrame`` by accessing it as 

10871 an attribute. 

10872 

10873 >>> df.numeric.describe() 

10874 count 3.0 

10875 mean 2.0 

10876 std 1.0 

10877 min 1.0 

10878 25% 1.5 

10879 50% 2.0 

10880 75% 2.5 

10881 max 3.0 

10882 Name: numeric, dtype: float64 

10883 

10884 Including only numeric columns in a ``DataFrame`` description. 

10885 

10886 >>> df.describe(include=[np.number]) 

10887 numeric 

10888 count 3.0 

10889 mean 2.0 

10890 std 1.0 

10891 min 1.0 

10892 25% 1.5 

10893 50% 2.0 

10894 75% 2.5 

10895 max 3.0 

10896 

10897 Including only string columns in a ``DataFrame`` description. 

10898 

10899 >>> df.describe(include=[object]) # doctest: +SKIP 

10900 object 

10901 count 3 

10902 unique 3 

10903 top a 

10904 freq 1 

10905 

10906 Including only categorical columns from a ``DataFrame`` description. 

10907 

10908 >>> df.describe(include=['category']) 

10909 categorical 

10910 count 3 

10911 unique 3 

10912 top d 

10913 freq 1 

10914 

10915 Excluding numeric columns from a ``DataFrame`` description. 

10916 

10917 >>> df.describe(exclude=[np.number]) # doctest: +SKIP 

10918 categorical object 

10919 count 3 3 

10920 unique 3 3 

10921 top f a 

10922 freq 1 1 

10923 

10924 Excluding object columns from a ``DataFrame`` description. 

10925 

10926 >>> df.describe(exclude=[object]) # doctest: +SKIP 

10927 categorical numeric 

10928 count 3 3.0 

10929 unique 3 NaN 

10930 top f NaN 

10931 freq 1 NaN 

10932 mean NaN 2.0 

10933 std NaN 1.0 

10934 min NaN 1.0 

10935 25% NaN 1.5 

10936 50% NaN 2.0 

10937 75% NaN 2.5 

10938 max NaN 3.0 

10939 """ 

10940 return describe_ndframe( 

10941 obj=self, 

10942 include=include, 

10943 exclude=exclude, 

10944 datetime_is_numeric=datetime_is_numeric, 

10945 percentiles=percentiles, 

10946 ) 

10947 

10948 @final 

10949 def pct_change( 

10950 self: NDFrameT, 

10951 periods=1, 

10952 fill_method="pad", 

10953 limit=None, 

10954 freq=None, 

10955 **kwargs, 

10956 ) -> NDFrameT: 

10957 """ 

10958 Percentage change between the current and a prior element. 

10959 

10960 Computes the percentage change from the immediately previous row by 

10961 default. This is useful in comparing the percentage of change in a time 

10962 series of elements. 

10963 

10964 Parameters 

10965 ---------- 

10966 periods : int, default 1 

10967 Periods to shift for forming percent change. 

10968 fill_method : str, default 'pad' 

10969 How to handle NAs **before** computing percent changes. 

10970 limit : int, default None 

10971 The number of consecutive NAs to fill before stopping. 

10972 freq : DateOffset, timedelta, or str, optional 

10973 Increment to use from time series API (e.g. 'M' or BDay()). 

10974 **kwargs 

10975 Additional keyword arguments are passed into 

10976 `DataFrame.shift` or `Series.shift`. 

10977 

10978 Returns 

10979 ------- 

10980 chg : Series or DataFrame 

10981 The same type as the calling object. 

10982 

10983 See Also 

10984 -------- 

10985 Series.diff : Compute the difference of two elements in a Series. 

10986 DataFrame.diff : Compute the difference of two elements in a DataFrame. 

10987 Series.shift : Shift the index by some number of periods. 

10988 DataFrame.shift : Shift the index by some number of periods. 

10989 

10990 Examples 

10991 -------- 

10992 **Series** 

10993 

10994 >>> s = pd.Series([90, 91, 85]) 

10995 >>> s 

10996 0 90 

10997 1 91 

10998 2 85 

10999 dtype: int64 

11000 

11001 >>> s.pct_change() 

11002 0 NaN 

11003 1 0.011111 

11004 2 -0.065934 

11005 dtype: float64 

11006 

11007 >>> s.pct_change(periods=2) 

11008 0 NaN 

11009 1 NaN 

11010 2 -0.055556 

11011 dtype: float64 

11012 

11013 See the percentage change in a Series where filling NAs with last 

11014 valid observation forward to next valid. 

11015 

11016 >>> s = pd.Series([90, 91, None, 85]) 

11017 >>> s 

11018 0 90.0 

11019 1 91.0 

11020 2 NaN 

11021 3 85.0 

11022 dtype: float64 

11023 

11024 >>> s.pct_change(fill_method='ffill') 

11025 0 NaN 

11026 1 0.011111 

11027 2 0.000000 

11028 3 -0.065934 

11029 dtype: float64 

11030 

11031 **DataFrame** 

11032 

11033 Percentage change in French franc, Deutsche Mark, and Italian lira from 

11034 1980-01-01 to 1980-03-01. 

11035 

11036 >>> df = pd.DataFrame({ 

11037 ... 'FR': [4.0405, 4.0963, 4.3149], 

11038 ... 'GR': [1.7246, 1.7482, 1.8519], 

11039 ... 'IT': [804.74, 810.01, 860.13]}, 

11040 ... index=['1980-01-01', '1980-02-01', '1980-03-01']) 

11041 >>> df 

11042 FR GR IT 

11043 1980-01-01 4.0405 1.7246 804.74 

11044 1980-02-01 4.0963 1.7482 810.01 

11045 1980-03-01 4.3149 1.8519 860.13 

11046 

11047 >>> df.pct_change() 

11048 FR GR IT 

11049 1980-01-01 NaN NaN NaN 

11050 1980-02-01 0.013810 0.013684 0.006549 

11051 1980-03-01 0.053365 0.059318 0.061876 

11052 

11053 Percentage of change in GOOG and APPL stock volume. Shows computing 

11054 the percentage change between columns. 

11055 

11056 >>> df = pd.DataFrame({ 

11057 ... '2016': [1769950, 30586265], 

11058 ... '2015': [1500923, 40912316], 

11059 ... '2014': [1371819, 41403351]}, 

11060 ... index=['GOOG', 'APPL']) 

11061 >>> df 

11062 2016 2015 2014 

11063 GOOG 1769950 1500923 1371819 

11064 APPL 30586265 40912316 41403351 

11065 

11066 >>> df.pct_change(axis='columns', periods=-1) 

11067 2016 2015 2014 

11068 GOOG 0.179241 0.094112 NaN 

11069 APPL -0.252395 -0.011860 NaN 

11070 """ 

11071 axis = self._get_axis_number(kwargs.pop("axis", self._stat_axis_name)) 

11072 if fill_method is None: 

11073 data = self 

11074 else: 

11075 _data = self.fillna(method=fill_method, axis=axis, limit=limit) 

11076 assert _data is not None # needed for mypy 

11077 data = _data 

11078 

11079 shifted = data.shift(periods=periods, freq=freq, axis=axis, **kwargs) 

11080 # Unsupported left operand type for / ("NDFrameT") 

11081 rs = data / shifted - 1 # type: ignore[operator] 

11082 if freq is not None: 

11083 # Shift method is implemented differently when freq is not None 

11084 # We want to restore the original index 

11085 rs = rs.loc[~rs.index.duplicated()] 

11086 rs = rs.reindex_like(data) 

11087 return rs.__finalize__(self, method="pct_change") 

11088 

11089 @final 

11090 def _agg_by_level( 

11091 self, 

11092 name: str, 

11093 axis: Axis = 0, 

11094 level: Level = 0, 

11095 skipna: bool_t = True, 

11096 **kwargs, 

11097 ): 

11098 if axis is None: 

11099 raise ValueError("Must specify 'axis' when aggregating by level.") 

11100 grouped = self.groupby(level=level, axis=axis, sort=False) 

11101 if hasattr(grouped, name) and skipna: 

11102 return getattr(grouped, name)(**kwargs) 

11103 axis = self._get_axis_number(axis) 

11104 method = getattr(type(self), name) 

11105 applyf = lambda x: method(x, axis=axis, skipna=skipna, **kwargs) 

11106 return grouped.aggregate(applyf) 

11107 

11108 @final 

11109 def _logical_func( 

11110 self, 

11111 name: str, 

11112 func, 

11113 axis: Axis = 0, 

11114 bool_only: bool_t | None = None, 

11115 skipna: bool_t = True, 

11116 level: Level | None = None, 

11117 **kwargs, 

11118 ) -> Series | bool_t: 

11119 nv.validate_logical_func((), kwargs, fname=name) 

11120 validate_bool_kwarg(skipna, "skipna", none_allowed=False) 

11121 if level is not None: 

11122 warnings.warn( 

11123 "Using the level keyword in DataFrame and Series aggregations is " 

11124 "deprecated and will be removed in a future version. Use groupby " 

11125 "instead. df.any(level=1) should use df.groupby(level=1).any()", 

11126 FutureWarning, 

11127 stacklevel=find_stack_level(), 

11128 ) 

11129 if bool_only is not None: 

11130 raise NotImplementedError( 

11131 "Option bool_only is not implemented with option level." 

11132 ) 

11133 return self._agg_by_level(name, axis=axis, level=level, skipna=skipna) 

11134 

11135 if self.ndim > 1 and axis is None: 

11136 # Reduce along one dimension then the other, to simplify DataFrame._reduce 

11137 res = self._logical_func( 

11138 name, func, axis=0, bool_only=bool_only, skipna=skipna, **kwargs 

11139 ) 

11140 return res._logical_func(name, func, skipna=skipna, **kwargs) 

11141 

11142 if ( 

11143 self.ndim > 1 

11144 and axis == 1 

11145 and len(self._mgr.arrays) > 1 

11146 # TODO(EA2D): special-case not needed 

11147 and all(x.ndim == 2 for x in self._mgr.arrays) 

11148 and bool_only is not None 

11149 and not kwargs 

11150 ): 

11151 # Fastpath avoiding potentially expensive transpose 

11152 obj = self 

11153 if bool_only: 

11154 obj = self._get_bool_data() 

11155 return obj._reduce_axis1(name, func, skipna=skipna) 

11156 

11157 return self._reduce( 

11158 func, 

11159 name=name, 

11160 axis=axis, 

11161 skipna=skipna, 

11162 numeric_only=bool_only, 

11163 filter_type="bool", 

11164 ) 

11165 

11166 def any( 

11167 self, 

11168 axis: Axis = 0, 

11169 bool_only: bool_t | None = None, 

11170 skipna: bool_t = True, 

11171 level: Level | None = None, 

11172 **kwargs, 

11173 ) -> DataFrame | Series | bool_t: 

11174 return self._logical_func( 

11175 "any", nanops.nanany, axis, bool_only, skipna, level, **kwargs 

11176 ) 

11177 

11178 def all( 

11179 self, 

11180 axis: Axis = 0, 

11181 bool_only: bool_t | None = None, 

11182 skipna: bool_t = True, 

11183 level: Level | None = None, 

11184 **kwargs, 

11185 ) -> Series | bool_t: 

11186 return self._logical_func( 

11187 "all", nanops.nanall, axis, bool_only, skipna, level, **kwargs 

11188 ) 

11189 

11190 @final 

11191 def _accum_func( 

11192 self, 

11193 name: str, 

11194 func, 

11195 axis: Axis | None = None, 

11196 skipna: bool_t = True, 

11197 *args, 

11198 **kwargs, 

11199 ): 

11200 skipna = nv.validate_cum_func_with_skipna(skipna, args, kwargs, name) 

11201 if axis is None: 

11202 axis = self._stat_axis_number 

11203 else: 

11204 axis = self._get_axis_number(axis) 

11205 

11206 if axis == 1: 

11207 return self.T._accum_func( 

11208 name, func, axis=0, skipna=skipna, *args, **kwargs 

11209 ).T 

11210 

11211 def block_accum_func(blk_values): 

11212 values = blk_values.T if hasattr(blk_values, "T") else blk_values 

11213 

11214 result = nanops.na_accum_func(values, func, skipna=skipna) 

11215 

11216 result = result.T if hasattr(result, "T") else result 

11217 return result 

11218 

11219 result = self._mgr.apply(block_accum_func) 

11220 

11221 return self._constructor(result).__finalize__(self, method=name) 

11222 

11223 def cummax(self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs): 

11224 return self._accum_func( 

11225 "cummax", np.maximum.accumulate, axis, skipna, *args, **kwargs 

11226 ) 

11227 

11228 def cummin(self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs): 

11229 return self._accum_func( 

11230 "cummin", np.minimum.accumulate, axis, skipna, *args, **kwargs 

11231 ) 

11232 

11233 def cumsum(self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs): 

11234 return self._accum_func("cumsum", np.cumsum, axis, skipna, *args, **kwargs) 

11235 

11236 def cumprod(self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs): 

11237 return self._accum_func("cumprod", np.cumprod, axis, skipna, *args, **kwargs) 

11238 

11239 @final 

11240 def _stat_function_ddof( 

11241 self, 

11242 name: str, 

11243 func, 

11244 axis: Axis | None = None, 

11245 skipna: bool_t = True, 

11246 level: Level | None = None, 

11247 ddof: int = 1, 

11248 numeric_only: bool_t | None = None, 

11249 **kwargs, 

11250 ) -> Series | float: 

11251 nv.validate_stat_ddof_func((), kwargs, fname=name) 

11252 validate_bool_kwarg(skipna, "skipna", none_allowed=False) 

11253 if axis is None: 

11254 axis = self._stat_axis_number 

11255 if level is not None: 

11256 warnings.warn( 

11257 "Using the level keyword in DataFrame and Series aggregations is " 

11258 "deprecated and will be removed in a future version. Use groupby " 

11259 "instead. df.var(level=1) should use df.groupby(level=1).var().", 

11260 FutureWarning, 

11261 stacklevel=find_stack_level(), 

11262 ) 

11263 return self._agg_by_level( 

11264 name, axis=axis, level=level, skipna=skipna, ddof=ddof 

11265 ) 

11266 return self._reduce( 

11267 func, name, axis=axis, numeric_only=numeric_only, skipna=skipna, ddof=ddof 

11268 ) 

11269 

11270 def sem( 

11271 self, 

11272 axis: Axis | None = None, 

11273 skipna: bool_t = True, 

11274 level: Level | None = None, 

11275 ddof: int = 1, 

11276 numeric_only: bool_t | None = None, 

11277 **kwargs, 

11278 ) -> Series | float: 

11279 return self._stat_function_ddof( 

11280 "sem", nanops.nansem, axis, skipna, level, ddof, numeric_only, **kwargs 

11281 ) 

11282 

11283 def var( 

11284 self, 

11285 axis: Axis | None = None, 

11286 skipna: bool_t = True, 

11287 level: Level | None = None, 

11288 ddof: int = 1, 

11289 numeric_only: bool_t | None = None, 

11290 **kwargs, 

11291 ) -> Series | float: 

11292 return self._stat_function_ddof( 

11293 "var", nanops.nanvar, axis, skipna, level, ddof, numeric_only, **kwargs 

11294 ) 

11295 

11296 def std( 

11297 self, 

11298 axis: Axis | None = None, 

11299 skipna: bool_t = True, 

11300 level: Level | None = None, 

11301 ddof: int = 1, 

11302 numeric_only: bool_t | None = None, 

11303 **kwargs, 

11304 ) -> Series | float: 

11305 return self._stat_function_ddof( 

11306 "std", nanops.nanstd, axis, skipna, level, ddof, numeric_only, **kwargs 

11307 ) 

11308 

11309 @final 

11310 def _stat_function( 

11311 self, 

11312 name: str, 

11313 func, 

11314 axis: Axis | None | lib.NoDefault = None, 

11315 skipna: bool_t = True, 

11316 level: Level | None = None, 

11317 numeric_only: bool_t | None = None, 

11318 **kwargs, 

11319 ): 

11320 if name == "median": 

11321 nv.validate_median((), kwargs) 

11322 else: 

11323 nv.validate_stat_func((), kwargs, fname=name) 

11324 

11325 validate_bool_kwarg(skipna, "skipna", none_allowed=False) 

11326 

11327 if axis is None and level is None and self.ndim > 1: 

11328 # user must have explicitly passed axis=None 

11329 # GH#21597 

11330 warnings.warn( 

11331 f"In a future version, DataFrame.{name}(axis=None) will return a " 

11332 f"scalar {name} over the entire DataFrame. To retain the old " 

11333 f"behavior, use 'frame.{name}(axis=0)' or just 'frame.{name}()'", 

11334 FutureWarning, 

11335 stacklevel=find_stack_level(), 

11336 ) 

11337 if axis is lib.no_default: 

11338 axis = None 

11339 

11340 if axis is None: 

11341 axis = self._stat_axis_number 

11342 if level is not None: 

11343 warnings.warn( 

11344 "Using the level keyword in DataFrame and Series aggregations is " 

11345 "deprecated and will be removed in a future version. Use groupby " 

11346 "instead. df.median(level=1) should use df.groupby(level=1).median().", 

11347 FutureWarning, 

11348 stacklevel=find_stack_level(), 

11349 ) 

11350 return self._agg_by_level( 

11351 name, axis=axis, level=level, skipna=skipna, numeric_only=numeric_only 

11352 ) 

11353 return self._reduce( 

11354 func, name=name, axis=axis, skipna=skipna, numeric_only=numeric_only 

11355 ) 

11356 

11357 def min( 

11358 self, 

11359 axis: Axis | None | lib.NoDefault = lib.no_default, 

11360 skipna: bool_t = True, 

11361 level: Level | None = None, 

11362 numeric_only: bool_t | None = None, 

11363 **kwargs, 

11364 ): 

11365 return self._stat_function( 

11366 "min", 

11367 nanops.nanmin, 

11368 axis, 

11369 skipna, 

11370 level, 

11371 numeric_only, 

11372 **kwargs, 

11373 ) 

11374 

11375 def max( 

11376 self, 

11377 axis: Axis | None | lib.NoDefault = lib.no_default, 

11378 skipna: bool_t = True, 

11379 level: Level | None = None, 

11380 numeric_only: bool_t | None = None, 

11381 **kwargs, 

11382 ): 

11383 return self._stat_function( 

11384 "max", 

11385 nanops.nanmax, 

11386 axis, 

11387 skipna, 

11388 level, 

11389 numeric_only, 

11390 **kwargs, 

11391 ) 

11392 

11393 def mean( 

11394 self, 

11395 axis: Axis | None | lib.NoDefault = lib.no_default, 

11396 skipna: bool_t = True, 

11397 level: Level | None = None, 

11398 numeric_only: bool_t | None = None, 

11399 **kwargs, 

11400 ) -> Series | float: 

11401 return self._stat_function( 

11402 "mean", nanops.nanmean, axis, skipna, level, numeric_only, **kwargs 

11403 ) 

11404 

11405 def median( 

11406 self, 

11407 axis: Axis | None | lib.NoDefault = lib.no_default, 

11408 skipna: bool_t = True, 

11409 level: Level | None = None, 

11410 numeric_only: bool_t | None = None, 

11411 **kwargs, 

11412 ) -> Series | float: 

11413 return self._stat_function( 

11414 "median", nanops.nanmedian, axis, skipna, level, numeric_only, **kwargs 

11415 ) 

11416 

11417 def skew( 

11418 self, 

11419 axis: Axis | None | lib.NoDefault = lib.no_default, 

11420 skipna: bool_t = True, 

11421 level: Level | None = None, 

11422 numeric_only: bool_t | None = None, 

11423 **kwargs, 

11424 ) -> Series | float: 

11425 return self._stat_function( 

11426 "skew", nanops.nanskew, axis, skipna, level, numeric_only, **kwargs 

11427 ) 

11428 

11429 def kurt( 

11430 self, 

11431 axis: Axis | None | lib.NoDefault = lib.no_default, 

11432 skipna: bool_t = True, 

11433 level: Level | None = None, 

11434 numeric_only: bool_t | None = None, 

11435 **kwargs, 

11436 ) -> Series | float: 

11437 return self._stat_function( 

11438 "kurt", nanops.nankurt, axis, skipna, level, numeric_only, **kwargs 

11439 ) 

11440 

11441 kurtosis = kurt 

11442 

11443 @final 

11444 def _min_count_stat_function( 

11445 self, 

11446 name: str, 

11447 func, 

11448 axis: Axis | None = None, 

11449 skipna: bool_t = True, 

11450 level: Level | None = None, 

11451 numeric_only: bool_t | None = None, 

11452 min_count: int = 0, 

11453 **kwargs, 

11454 ): 

11455 if name == "sum": 

11456 nv.validate_sum((), kwargs) 

11457 elif name == "prod": 

11458 nv.validate_prod((), kwargs) 

11459 else: 

11460 nv.validate_stat_func((), kwargs, fname=name) 

11461 

11462 validate_bool_kwarg(skipna, "skipna", none_allowed=False) 

11463 

11464 if axis is None: 

11465 axis = self._stat_axis_number 

11466 if level is not None: 

11467 warnings.warn( 

11468 "Using the level keyword in DataFrame and Series aggregations is " 

11469 "deprecated and will be removed in a future version. Use groupby " 

11470 "instead. df.sum(level=1) should use df.groupby(level=1).sum().", 

11471 FutureWarning, 

11472 stacklevel=find_stack_level(), 

11473 ) 

11474 return self._agg_by_level( 

11475 name, 

11476 axis=axis, 

11477 level=level, 

11478 skipna=skipna, 

11479 min_count=min_count, 

11480 numeric_only=numeric_only, 

11481 ) 

11482 

11483 return self._reduce( 

11484 func, 

11485 name=name, 

11486 axis=axis, 

11487 skipna=skipna, 

11488 numeric_only=numeric_only, 

11489 min_count=min_count, 

11490 ) 

11491 

11492 def sum( 

11493 self, 

11494 axis: Axis | None = None, 

11495 skipna: bool_t = True, 

11496 level: Level | None = None, 

11497 numeric_only: bool_t | None = None, 

11498 min_count=0, 

11499 **kwargs, 

11500 ): 

11501 return self._min_count_stat_function( 

11502 "sum", nanops.nansum, axis, skipna, level, numeric_only, min_count, **kwargs 

11503 ) 

11504 

11505 def prod( 

11506 self, 

11507 axis: Axis | None = None, 

11508 skipna: bool_t = True, 

11509 level: Level | None = None, 

11510 numeric_only: bool_t | None = None, 

11511 min_count: int = 0, 

11512 **kwargs, 

11513 ): 

11514 return self._min_count_stat_function( 

11515 "prod", 

11516 nanops.nanprod, 

11517 axis, 

11518 skipna, 

11519 level, 

11520 numeric_only, 

11521 min_count, 

11522 **kwargs, 

11523 ) 

11524 

11525 product = prod 

11526 

11527 def mad( 

11528 self, 

11529 axis: Axis | None = None, 

11530 skipna: bool_t = True, 

11531 level: Level | None = None, 

11532 ) -> Series | float: 

11533 """ 

11534 {desc} 

11535 

11536 .. deprecated:: 1.5.0 

11537 mad is deprecated. 

11538 

11539 Parameters 

11540 ---------- 

11541 axis : {axis_descr} 

11542 Axis for the function to be applied on. 

11543 For `Series` this parameter is unused and defaults to 0. 

11544 skipna : bool, default True 

11545 Exclude NA/null values when computing the result. 

11546 level : int or level name, default None 

11547 If the axis is a MultiIndex (hierarchical), count along a 

11548 particular level, collapsing into a {name1}. 

11549 

11550 Returns 

11551 ------- 

11552 {name1} or {name2} (if level specified)\ 

11553 {see_also}\ 

11554 {examples} 

11555 """ 

11556 msg = ( 

11557 "The 'mad' method is deprecated and will be removed in a future version. " 

11558 "To compute the same result, you may do `(df - df.mean()).abs().mean()`." 

11559 ) 

11560 warnings.warn(msg, FutureWarning, stacklevel=find_stack_level()) 

11561 

11562 if not is_bool(skipna): 

11563 warnings.warn( 

11564 "Passing None for skipna is deprecated and will raise in a future" 

11565 "version. Pass True instead. Only boolean values will be allowed " 

11566 "in the future.", 

11567 FutureWarning, 

11568 stacklevel=find_stack_level(), 

11569 ) 

11570 skipna = True 

11571 if axis is None: 

11572 axis = self._stat_axis_number 

11573 if level is not None: 

11574 warnings.warn( 

11575 "Using the level keyword in DataFrame and Series aggregations is " 

11576 "deprecated and will be removed in a future version. Use groupby " 

11577 "instead. df.mad(level=1) should use df.groupby(level=1).mad()", 

11578 FutureWarning, 

11579 stacklevel=find_stack_level(), 

11580 ) 

11581 return self._agg_by_level("mad", axis=axis, level=level, skipna=skipna) 

11582 

11583 data = self._get_numeric_data() 

11584 if axis == 0: 

11585 # error: Unsupported operand types for - ("NDFrame" and "float") 

11586 demeaned = data - data.mean(axis=0) # type: ignore[operator] 

11587 else: 

11588 demeaned = data.sub(data.mean(axis=1), axis=0) 

11589 return np.abs(demeaned).mean(axis=axis, skipna=skipna) 

11590 

11591 @classmethod 

11592 def _add_numeric_operations(cls): 

11593 """ 

11594 Add the operations to the cls; evaluate the doc strings again 

11595 """ 

11596 axis_descr, name1, name2 = _doc_params(cls) 

11597 

11598 @deprecate_nonkeyword_arguments( 

11599 version=None, 

11600 allowed_args=["self"], 

11601 name="DataFrame.any and Series.any", 

11602 ) 

11603 @doc( 

11604 _bool_doc, 

11605 desc=_any_desc, 

11606 name1=name1, 

11607 name2=name2, 

11608 axis_descr=axis_descr, 

11609 see_also=_any_see_also, 

11610 examples=_any_examples, 

11611 empty_value=False, 

11612 ) 

11613 def any(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs): 

11614 return NDFrame.any(self, axis, bool_only, skipna, level, **kwargs) 

11615 

11616 setattr(cls, "any", any) 

11617 

11618 @doc( 

11619 _bool_doc, 

11620 desc=_all_desc, 

11621 name1=name1, 

11622 name2=name2, 

11623 axis_descr=axis_descr, 

11624 see_also=_all_see_also, 

11625 examples=_all_examples, 

11626 empty_value=True, 

11627 ) 

11628 def all(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs): 

11629 return NDFrame.all(self, axis, bool_only, skipna, level, **kwargs) 

11630 

11631 setattr(cls, "all", all) 

11632 

11633 # error: Argument 1 to "doc" has incompatible type "Optional[str]"; expected 

11634 # "Union[str, Callable[..., Any]]" 

11635 @doc( 

11636 NDFrame.mad.__doc__, # type: ignore[arg-type] 

11637 desc="Return the mean absolute deviation of the values " 

11638 "over the requested axis.", 

11639 name1=name1, 

11640 name2=name2, 

11641 axis_descr=axis_descr, 

11642 see_also="", 

11643 examples="", 

11644 ) 

11645 def mad(self, axis=None, skipna=True, level=None): 

11646 return NDFrame.mad(self, axis, skipna, level) 

11647 

11648 setattr(cls, "mad", mad) 

11649 

11650 @doc( 

11651 _num_ddof_doc, 

11652 desc="Return unbiased standard error of the mean over requested " 

11653 "axis.\n\nNormalized by N-1 by default. This can be changed " 

11654 "using the ddof argument", 

11655 name1=name1, 

11656 name2=name2, 

11657 axis_descr=axis_descr, 

11658 notes="", 

11659 examples="", 

11660 ) 

11661 def sem( 

11662 self, 

11663 axis=None, 

11664 skipna=True, 

11665 level=None, 

11666 ddof=1, 

11667 numeric_only=None, 

11668 **kwargs, 

11669 ): 

11670 return NDFrame.sem(self, axis, skipna, level, ddof, numeric_only, **kwargs) 

11671 

11672 setattr(cls, "sem", sem) 

11673 

11674 @doc( 

11675 _num_ddof_doc, 

11676 desc="Return unbiased variance over requested axis.\n\nNormalized by " 

11677 "N-1 by default. This can be changed using the ddof argument.", 

11678 name1=name1, 

11679 name2=name2, 

11680 axis_descr=axis_descr, 

11681 notes="", 

11682 examples=_var_examples, 

11683 ) 

11684 def var( 

11685 self, 

11686 axis=None, 

11687 skipna=True, 

11688 level=None, 

11689 ddof=1, 

11690 numeric_only=None, 

11691 **kwargs, 

11692 ): 

11693 return NDFrame.var(self, axis, skipna, level, ddof, numeric_only, **kwargs) 

11694 

11695 setattr(cls, "var", var) 

11696 

11697 @doc( 

11698 _num_ddof_doc, 

11699 desc="Return sample standard deviation over requested axis." 

11700 "\n\nNormalized by N-1 by default. This can be changed using the " 

11701 "ddof argument.", 

11702 name1=name1, 

11703 name2=name2, 

11704 axis_descr=axis_descr, 

11705 notes=_std_notes, 

11706 examples=_std_examples, 

11707 ) 

11708 def std( 

11709 self, 

11710 axis=None, 

11711 skipna=True, 

11712 level=None, 

11713 ddof=1, 

11714 numeric_only=None, 

11715 **kwargs, 

11716 ): 

11717 return NDFrame.std(self, axis, skipna, level, ddof, numeric_only, **kwargs) 

11718 

11719 setattr(cls, "std", std) 

11720 

11721 @doc( 

11722 _cnum_doc, 

11723 desc="minimum", 

11724 name1=name1, 

11725 name2=name2, 

11726 axis_descr=axis_descr, 

11727 accum_func_name="min", 

11728 examples=_cummin_examples, 

11729 ) 

11730 def cummin(self, axis=None, skipna=True, *args, **kwargs): 

11731 return NDFrame.cummin(self, axis, skipna, *args, **kwargs) 

11732 

11733 setattr(cls, "cummin", cummin) 

11734 

11735 @doc( 

11736 _cnum_doc, 

11737 desc="maximum", 

11738 name1=name1, 

11739 name2=name2, 

11740 axis_descr=axis_descr, 

11741 accum_func_name="max", 

11742 examples=_cummax_examples, 

11743 ) 

11744 def cummax(self, axis=None, skipna=True, *args, **kwargs): 

11745 return NDFrame.cummax(self, axis, skipna, *args, **kwargs) 

11746 

11747 setattr(cls, "cummax", cummax) 

11748 

11749 @doc( 

11750 _cnum_doc, 

11751 desc="sum", 

11752 name1=name1, 

11753 name2=name2, 

11754 axis_descr=axis_descr, 

11755 accum_func_name="sum", 

11756 examples=_cumsum_examples, 

11757 ) 

11758 def cumsum(self, axis=None, skipna=True, *args, **kwargs): 

11759 return NDFrame.cumsum(self, axis, skipna, *args, **kwargs) 

11760 

11761 setattr(cls, "cumsum", cumsum) 

11762 

11763 @doc( 

11764 _cnum_doc, 

11765 desc="product", 

11766 name1=name1, 

11767 name2=name2, 

11768 axis_descr=axis_descr, 

11769 accum_func_name="prod", 

11770 examples=_cumprod_examples, 

11771 ) 

11772 def cumprod(self, axis=None, skipna=True, *args, **kwargs): 

11773 return NDFrame.cumprod(self, axis, skipna, *args, **kwargs) 

11774 

11775 setattr(cls, "cumprod", cumprod) 

11776 

11777 @doc( 

11778 _num_doc, 

11779 desc="Return the sum of the values over the requested axis.\n\n" 

11780 "This is equivalent to the method ``numpy.sum``.", 

11781 name1=name1, 

11782 name2=name2, 

11783 axis_descr=axis_descr, 

11784 min_count=_min_count_stub, 

11785 see_also=_stat_func_see_also, 

11786 examples=_sum_examples, 

11787 ) 

11788 def sum( 

11789 self, 

11790 axis=None, 

11791 skipna=True, 

11792 level=None, 

11793 numeric_only=None, 

11794 min_count=0, 

11795 **kwargs, 

11796 ): 

11797 return NDFrame.sum( 

11798 self, axis, skipna, level, numeric_only, min_count, **kwargs 

11799 ) 

11800 

11801 setattr(cls, "sum", sum) 

11802 

11803 @doc( 

11804 _num_doc, 

11805 desc="Return the product of the values over the requested axis.", 

11806 name1=name1, 

11807 name2=name2, 

11808 axis_descr=axis_descr, 

11809 min_count=_min_count_stub, 

11810 see_also=_stat_func_see_also, 

11811 examples=_prod_examples, 

11812 ) 

11813 def prod( 

11814 self, 

11815 axis=None, 

11816 skipna=True, 

11817 level=None, 

11818 numeric_only=None, 

11819 min_count=0, 

11820 **kwargs, 

11821 ): 

11822 return NDFrame.prod( 

11823 self, axis, skipna, level, numeric_only, min_count, **kwargs 

11824 ) 

11825 

11826 setattr(cls, "prod", prod) 

11827 cls.product = prod 

11828 

11829 @doc( 

11830 _num_doc, 

11831 desc="Return the mean of the values over the requested axis.", 

11832 name1=name1, 

11833 name2=name2, 

11834 axis_descr=axis_descr, 

11835 min_count="", 

11836 see_also="", 

11837 examples="", 

11838 ) 

11839 def mean( 

11840 self, 

11841 axis: int | None | lib.NoDefault = lib.no_default, 

11842 skipna=True, 

11843 level=None, 

11844 numeric_only=None, 

11845 **kwargs, 

11846 ): 

11847 return NDFrame.mean(self, axis, skipna, level, numeric_only, **kwargs) 

11848 

11849 setattr(cls, "mean", mean) 

11850 

11851 @doc( 

11852 _num_doc, 

11853 desc="Return unbiased skew over requested axis.\n\nNormalized by N-1.", 

11854 name1=name1, 

11855 name2=name2, 

11856 axis_descr=axis_descr, 

11857 min_count="", 

11858 see_also="", 

11859 examples="", 

11860 ) 

11861 def skew( 

11862 self, 

11863 axis: int | None | lib.NoDefault = lib.no_default, 

11864 skipna=True, 

11865 level=None, 

11866 numeric_only=None, 

11867 **kwargs, 

11868 ): 

11869 return NDFrame.skew(self, axis, skipna, level, numeric_only, **kwargs) 

11870 

11871 setattr(cls, "skew", skew) 

11872 

11873 @doc( 

11874 _num_doc, 

11875 desc="Return unbiased kurtosis over requested axis.\n\n" 

11876 "Kurtosis obtained using Fisher's definition of\n" 

11877 "kurtosis (kurtosis of normal == 0.0). Normalized " 

11878 "by N-1.", 

11879 name1=name1, 

11880 name2=name2, 

11881 axis_descr=axis_descr, 

11882 min_count="", 

11883 see_also="", 

11884 examples="", 

11885 ) 

11886 def kurt( 

11887 self, 

11888 axis: Axis | None | lib.NoDefault = lib.no_default, 

11889 skipna=True, 

11890 level=None, 

11891 numeric_only=None, 

11892 **kwargs, 

11893 ): 

11894 return NDFrame.kurt(self, axis, skipna, level, numeric_only, **kwargs) 

11895 

11896 setattr(cls, "kurt", kurt) 

11897 cls.kurtosis = kurt 

11898 

11899 @doc( 

11900 _num_doc, 

11901 desc="Return the median of the values over the requested axis.", 

11902 name1=name1, 

11903 name2=name2, 

11904 axis_descr=axis_descr, 

11905 min_count="", 

11906 see_also="", 

11907 examples="", 

11908 ) 

11909 def median( 

11910 self, 

11911 axis: int | None | lib.NoDefault = lib.no_default, 

11912 skipna=True, 

11913 level=None, 

11914 numeric_only=None, 

11915 **kwargs, 

11916 ): 

11917 return NDFrame.median(self, axis, skipna, level, numeric_only, **kwargs) 

11918 

11919 setattr(cls, "median", median) 

11920 

11921 @doc( 

11922 _num_doc, 

11923 desc="Return the maximum of the values over the requested axis.\n\n" 

11924 "If you want the *index* of the maximum, use ``idxmax``. This is " 

11925 "the equivalent of the ``numpy.ndarray`` method ``argmax``.", 

11926 name1=name1, 

11927 name2=name2, 

11928 axis_descr=axis_descr, 

11929 min_count="", 

11930 see_also=_stat_func_see_also, 

11931 examples=_max_examples, 

11932 ) 

11933 def max( 

11934 self, 

11935 axis: int | None | lib.NoDefault = lib.no_default, 

11936 skipna=True, 

11937 level=None, 

11938 numeric_only=None, 

11939 **kwargs, 

11940 ): 

11941 return NDFrame.max(self, axis, skipna, level, numeric_only, **kwargs) 

11942 

11943 setattr(cls, "max", max) 

11944 

11945 @doc( 

11946 _num_doc, 

11947 desc="Return the minimum of the values over the requested axis.\n\n" 

11948 "If you want the *index* of the minimum, use ``idxmin``. This is " 

11949 "the equivalent of the ``numpy.ndarray`` method ``argmin``.", 

11950 name1=name1, 

11951 name2=name2, 

11952 axis_descr=axis_descr, 

11953 min_count="", 

11954 see_also=_stat_func_see_also, 

11955 examples=_min_examples, 

11956 ) 

11957 def min( 

11958 self, 

11959 axis: int | None | lib.NoDefault = lib.no_default, 

11960 skipna=True, 

11961 level=None, 

11962 numeric_only=None, 

11963 **kwargs, 

11964 ): 

11965 return NDFrame.min(self, axis, skipna, level, numeric_only, **kwargs) 

11966 

11967 setattr(cls, "min", min) 

11968 

11969 @final 

11970 @doc(Rolling) 

11971 def rolling( 

11972 self, 

11973 window: int | timedelta | BaseOffset | BaseIndexer, 

11974 min_periods: int | None = None, 

11975 center: bool_t = False, 

11976 win_type: str | None = None, 

11977 on: str | None = None, 

11978 axis: Axis = 0, 

11979 closed: str | None = None, 

11980 step: int | None = None, 

11981 method: str = "single", 

11982 ) -> Window | Rolling: 

11983 axis = self._get_axis_number(axis) 

11984 

11985 if win_type is not None: 

11986 return Window( 

11987 self, 

11988 window=window, 

11989 min_periods=min_periods, 

11990 center=center, 

11991 win_type=win_type, 

11992 on=on, 

11993 axis=axis, 

11994 closed=closed, 

11995 step=step, 

11996 method=method, 

11997 ) 

11998 

11999 return Rolling( 

12000 self, 

12001 window=window, 

12002 min_periods=min_periods, 

12003 center=center, 

12004 win_type=win_type, 

12005 on=on, 

12006 axis=axis, 

12007 closed=closed, 

12008 step=step, 

12009 method=method, 

12010 ) 

12011 

12012 @final 

12013 @doc(Expanding) 

12014 def expanding( 

12015 self, 

12016 min_periods: int = 1, 

12017 center: bool_t | None = None, 

12018 axis: Axis = 0, 

12019 method: str = "single", 

12020 ) -> Expanding: 

12021 axis = self._get_axis_number(axis) 

12022 if center is not None: 

12023 warnings.warn( 

12024 "The `center` argument on `expanding` will be removed in the future.", 

12025 FutureWarning, 

12026 stacklevel=find_stack_level(), 

12027 ) 

12028 else: 

12029 center = False 

12030 

12031 return Expanding( 

12032 self, min_periods=min_periods, center=center, axis=axis, method=method 

12033 ) 

12034 

12035 @final 

12036 @doc(ExponentialMovingWindow) 

12037 def ewm( 

12038 self, 

12039 com: float | None = None, 

12040 span: float | None = None, 

12041 halflife: float | TimedeltaConvertibleTypes | None = None, 

12042 alpha: float | None = None, 

12043 min_periods: int | None = 0, 

12044 adjust: bool_t = True, 

12045 ignore_na: bool_t = False, 

12046 axis: Axis = 0, 

12047 times: str | np.ndarray | DataFrame | Series | None = None, 

12048 method: str = "single", 

12049 ) -> ExponentialMovingWindow: 

12050 axis = self._get_axis_number(axis) 

12051 return ExponentialMovingWindow( 

12052 self, 

12053 com=com, 

12054 span=span, 

12055 halflife=halflife, 

12056 alpha=alpha, 

12057 min_periods=min_periods, 

12058 adjust=adjust, 

12059 ignore_na=ignore_na, 

12060 axis=axis, 

12061 times=times, 

12062 method=method, 

12063 ) 

12064 

12065 # ---------------------------------------------------------------------- 

12066 # Arithmetic Methods 

12067 

12068 @final 

12069 def _inplace_method(self, other, op): 

12070 """ 

12071 Wrap arithmetic method to operate inplace. 

12072 """ 

12073 result = op(self, other) 

12074 

12075 if ( 

12076 self.ndim == 1 

12077 and result._indexed_same(self) 

12078 and is_dtype_equal(result.dtype, self.dtype) 

12079 ): 

12080 # GH#36498 this inplace op can _actually_ be inplace. 

12081 self._values[:] = result._values 

12082 return self 

12083 

12084 # Delete cacher 

12085 self._reset_cacher() 

12086 

12087 # this makes sure that we are aligned like the input 

12088 # we are updating inplace so we want to ignore is_copy 

12089 self._update_inplace( 

12090 result.reindex_like(self, copy=False), verify_is_copy=False 

12091 ) 

12092 return self 

12093 

12094 def __iadd__(self: NDFrameT, other) -> NDFrameT: 

12095 # error: Unsupported left operand type for + ("Type[NDFrame]") 

12096 return self._inplace_method(other, type(self).__add__) # type: ignore[operator] 

12097 

12098 def __isub__(self: NDFrameT, other) -> NDFrameT: 

12099 # error: Unsupported left operand type for - ("Type[NDFrame]") 

12100 return self._inplace_method(other, type(self).__sub__) # type: ignore[operator] 

12101 

12102 def __imul__(self: NDFrameT, other) -> NDFrameT: 

12103 # error: Unsupported left operand type for * ("Type[NDFrame]") 

12104 return self._inplace_method(other, type(self).__mul__) # type: ignore[operator] 

12105 

12106 def __itruediv__(self: NDFrameT, other) -> NDFrameT: 

12107 # error: Unsupported left operand type for / ("Type[NDFrame]") 

12108 return self._inplace_method( 

12109 other, type(self).__truediv__ # type: ignore[operator] 

12110 ) 

12111 

12112 def __ifloordiv__(self: NDFrameT, other) -> NDFrameT: 

12113 # error: Unsupported left operand type for // ("Type[NDFrame]") 

12114 return self._inplace_method( 

12115 other, type(self).__floordiv__ # type: ignore[operator] 

12116 ) 

12117 

12118 def __imod__(self: NDFrameT, other) -> NDFrameT: 

12119 # error: Unsupported left operand type for % ("Type[NDFrame]") 

12120 return self._inplace_method(other, type(self).__mod__) # type: ignore[operator] 

12121 

12122 def __ipow__(self: NDFrameT, other) -> NDFrameT: 

12123 # error: Unsupported left operand type for ** ("Type[NDFrame]") 

12124 return self._inplace_method(other, type(self).__pow__) # type: ignore[operator] 

12125 

12126 def __iand__(self: NDFrameT, other) -> NDFrameT: 

12127 # error: Unsupported left operand type for & ("Type[NDFrame]") 

12128 return self._inplace_method(other, type(self).__and__) # type: ignore[operator] 

12129 

12130 def __ior__(self: NDFrameT, other) -> NDFrameT: 

12131 # error: Unsupported left operand type for | ("Type[NDFrame]") 

12132 return self._inplace_method(other, type(self).__or__) # type: ignore[operator] 

12133 

12134 def __ixor__(self: NDFrameT, other) -> NDFrameT: 

12135 # error: Unsupported left operand type for ^ ("Type[NDFrame]") 

12136 return self._inplace_method(other, type(self).__xor__) # type: ignore[operator] 

12137 

12138 # ---------------------------------------------------------------------- 

12139 # Misc methods 

12140 

12141 @final 

12142 def _find_valid_index(self, *, how: str) -> Hashable | None: 

12143 """ 

12144 Retrieves the index of the first valid value. 

12145 

12146 Parameters 

12147 ---------- 

12148 how : {'first', 'last'} 

12149 Use this parameter to change between the first or last valid index. 

12150 

12151 Returns 

12152 ------- 

12153 idx_first_valid : type of index 

12154 """ 

12155 idxpos = find_valid_index(self._values, how=how) 

12156 if idxpos is None: 

12157 return None 

12158 return self.index[idxpos] 

12159 

12160 @final 

12161 @doc(position="first", klass=_shared_doc_kwargs["klass"]) 

12162 def first_valid_index(self) -> Hashable | None: 

12163 """ 

12164 Return index for {position} non-NA value or None, if no non-NA value is found. 

12165 

12166 Returns 

12167 ------- 

12168 scalar : type of index 

12169 

12170 Notes 

12171 ----- 

12172 If all elements are non-NA/null, returns None. 

12173 Also returns None for empty {klass}. 

12174 """ 

12175 return self._find_valid_index(how="first") 

12176 

12177 @final 

12178 @doc(first_valid_index, position="last", klass=_shared_doc_kwargs["klass"]) 

12179 def last_valid_index(self) -> Hashable | None: 

12180 return self._find_valid_index(how="last") 

12181 

12182 

12183def _doc_params(cls): 

12184 """Return a tuple of the doc params.""" 

12185 axis_descr = ( 

12186 f"{{{', '.join([f'{a} ({i})' for i, a in enumerate(cls._AXIS_ORDERS)])}}}" 

12187 ) 

12188 name = cls._constructor_sliced.__name__ if cls._AXIS_LEN > 1 else "scalar" 

12189 name2 = cls.__name__ 

12190 return axis_descr, name, name2 

12191 

12192 

12193_num_doc = """ 

12194{desc} 

12195 

12196Parameters 

12197---------- 

12198axis : {axis_descr} 

12199 Axis for the function to be applied on. 

12200 For `Series` this parameter is unused and defaults to 0. 

12201skipna : bool, default True 

12202 Exclude NA/null values when computing the result. 

12203level : int or level name, default None 

12204 If the axis is a MultiIndex (hierarchical), count along a 

12205 particular level, collapsing into a {name1}. 

12206 

12207 .. deprecated:: 1.3.0 

12208 The level keyword is deprecated. Use groupby instead. 

12209numeric_only : bool, default None 

12210 Include only float, int, boolean columns. If None, will attempt to use 

12211 everything, then use only numeric data. Not implemented for Series. 

12212 

12213 .. deprecated:: 1.5.0 

12214 Specifying ``numeric_only=None`` is deprecated. The default value will be 

12215 ``False`` in a future version of pandas. 

12216 

12217{min_count}\ 

12218**kwargs 

12219 Additional keyword arguments to be passed to the function. 

12220 

12221Returns 

12222------- 

12223{name1} or {name2} (if level specified)\ 

12224{see_also}\ 

12225{examples} 

12226""" 

12227 

12228_num_ddof_doc = """ 

12229{desc} 

12230 

12231Parameters 

12232---------- 

12233axis : {axis_descr} 

12234 For `Series` this parameter is unused and defaults to 0. 

12235skipna : bool, default True 

12236 Exclude NA/null values. If an entire row/column is NA, the result 

12237 will be NA. 

12238level : int or level name, default None 

12239 If the axis is a MultiIndex (hierarchical), count along a 

12240 particular level, collapsing into a {name1}. 

12241 

12242 .. deprecated:: 1.3.0 

12243 The level keyword is deprecated. Use groupby instead. 

12244ddof : int, default 1 

12245 Delta Degrees of Freedom. The divisor used in calculations is N - ddof, 

12246 where N represents the number of elements. 

12247numeric_only : bool, default None 

12248 Include only float, int, boolean columns. If None, will attempt to use 

12249 everything, then use only numeric data. Not implemented for Series. 

12250 

12251 .. deprecated:: 1.5.0 

12252 Specifying ``numeric_only=None`` is deprecated. The default value will be 

12253 ``False`` in a future version of pandas. 

12254 

12255Returns 

12256------- 

12257{name1} or {name2} (if level specified) \ 

12258{notes}\ 

12259{examples} 

12260""" 

12261 

12262_std_notes = """ 

12263 

12264Notes 

12265----- 

12266To have the same behaviour as `numpy.std`, use `ddof=0` (instead of the 

12267default `ddof=1`)""" 

12268 

12269_std_examples = """ 

12270 

12271Examples 

12272-------- 

12273>>> df = pd.DataFrame({'person_id': [0, 1, 2, 3], 

12274... 'age': [21, 25, 62, 43], 

12275... 'height': [1.61, 1.87, 1.49, 2.01]} 

12276... ).set_index('person_id') 

12277>>> df 

12278 age height 

12279person_id 

122800 21 1.61 

122811 25 1.87 

122822 62 1.49 

122833 43 2.01 

12284 

12285The standard deviation of the columns can be found as follows: 

12286 

12287>>> df.std() 

12288age 18.786076 

12289height 0.237417 

12290 

12291Alternatively, `ddof=0` can be set to normalize by N instead of N-1: 

12292 

12293>>> df.std(ddof=0) 

12294age 16.269219 

12295height 0.205609""" 

12296 

12297_var_examples = """ 

12298 

12299Examples 

12300-------- 

12301>>> df = pd.DataFrame({'person_id': [0, 1, 2, 3], 

12302... 'age': [21, 25, 62, 43], 

12303... 'height': [1.61, 1.87, 1.49, 2.01]} 

12304... ).set_index('person_id') 

12305>>> df 

12306 age height 

12307person_id 

123080 21 1.61 

123091 25 1.87 

123102 62 1.49 

123113 43 2.01 

12312 

12313>>> df.var() 

12314age 352.916667 

12315height 0.056367 

12316 

12317Alternatively, ``ddof=0`` can be set to normalize by N instead of N-1: 

12318 

12319>>> df.var(ddof=0) 

12320age 264.687500 

12321height 0.042275""" 

12322 

12323_bool_doc = """ 

12324{desc} 

12325 

12326Parameters 

12327---------- 

12328axis : {{0 or 'index', 1 or 'columns', None}}, default 0 

12329 Indicate which axis or axes should be reduced. For `Series` this parameter 

12330 is unused and defaults to 0. 

12331 

12332 * 0 / 'index' : reduce the index, return a Series whose index is the 

12333 original column labels. 

12334 * 1 / 'columns' : reduce the columns, return a Series whose index is the 

12335 original index. 

12336 * None : reduce all axes, return a scalar. 

12337 

12338bool_only : bool, default None 

12339 Include only boolean columns. If None, will attempt to use everything, 

12340 then use only boolean data. Not implemented for Series. 

12341skipna : bool, default True 

12342 Exclude NA/null values. If the entire row/column is NA and skipna is 

12343 True, then the result will be {empty_value}, as for an empty row/column. 

12344 If skipna is False, then NA are treated as True, because these are not 

12345 equal to zero. 

12346level : int or level name, default None 

12347 If the axis is a MultiIndex (hierarchical), count along a 

12348 particular level, collapsing into a {name1}. 

12349 

12350 .. deprecated:: 1.3.0 

12351 The level keyword is deprecated. Use groupby instead. 

12352**kwargs : any, default None 

12353 Additional keywords have no effect but might be accepted for 

12354 compatibility with NumPy. 

12355 

12356Returns 

12357------- 

12358{name1} or {name2} 

12359 If level is specified, then, {name2} is returned; otherwise, {name1} 

12360 is returned. 

12361 

12362{see_also} 

12363{examples}""" 

12364 

12365_all_desc = """\ 

12366Return whether all elements are True, potentially over an axis. 

12367 

12368Returns True unless there at least one element within a series or 

12369along a Dataframe axis that is False or equivalent (e.g. zero or 

12370empty).""" 

12371 

12372_all_examples = """\ 

12373Examples 

12374-------- 

12375**Series** 

12376 

12377>>> pd.Series([True, True]).all() 

12378True 

12379>>> pd.Series([True, False]).all() 

12380False 

12381>>> pd.Series([], dtype="float64").all() 

12382True 

12383>>> pd.Series([np.nan]).all() 

12384True 

12385>>> pd.Series([np.nan]).all(skipna=False) 

12386True 

12387 

12388**DataFrames** 

12389 

12390Create a dataframe from a dictionary. 

12391 

12392>>> df = pd.DataFrame({'col1': [True, True], 'col2': [True, False]}) 

12393>>> df 

12394 col1 col2 

123950 True True 

123961 True False 

12397 

12398Default behaviour checks if values in each column all return True. 

12399 

12400>>> df.all() 

12401col1 True 

12402col2 False 

12403dtype: bool 

12404 

12405Specify ``axis='columns'`` to check if values in each row all return True. 

12406 

12407>>> df.all(axis='columns') 

124080 True 

124091 False 

12410dtype: bool 

12411 

12412Or ``axis=None`` for whether every value is True. 

12413 

12414>>> df.all(axis=None) 

12415False 

12416""" 

12417 

12418_all_see_also = """\ 

12419See Also 

12420-------- 

12421Series.all : Return True if all elements are True. 

12422DataFrame.any : Return True if one (or more) elements are True. 

12423""" 

12424 

12425_cnum_doc = """ 

12426Return cumulative {desc} over a DataFrame or Series axis. 

12427 

12428Returns a DataFrame or Series of the same size containing the cumulative 

12429{desc}. 

12430 

12431Parameters 

12432---------- 

12433axis : {{0 or 'index', 1 or 'columns'}}, default 0 

12434 The index or the name of the axis. 0 is equivalent to None or 'index'. 

12435 For `Series` this parameter is unused and defaults to 0. 

12436skipna : bool, default True 

12437 Exclude NA/null values. If an entire row/column is NA, the result 

12438 will be NA. 

12439*args, **kwargs 

12440 Additional keywords have no effect but might be accepted for 

12441 compatibility with NumPy. 

12442 

12443Returns 

12444------- 

12445{name1} or {name2} 

12446 Return cumulative {desc} of {name1} or {name2}. 

12447 

12448See Also 

12449-------- 

12450core.window.expanding.Expanding.{accum_func_name} : Similar functionality 

12451 but ignores ``NaN`` values. 

12452{name2}.{accum_func_name} : Return the {desc} over 

12453 {name2} axis. 

12454{name2}.cummax : Return cumulative maximum over {name2} axis. 

12455{name2}.cummin : Return cumulative minimum over {name2} axis. 

12456{name2}.cumsum : Return cumulative sum over {name2} axis. 

12457{name2}.cumprod : Return cumulative product over {name2} axis. 

12458 

12459{examples}""" 

12460 

12461_cummin_examples = """\ 

12462Examples 

12463-------- 

12464**Series** 

12465 

12466>>> s = pd.Series([2, np.nan, 5, -1, 0]) 

12467>>> s 

124680 2.0 

124691 NaN 

124702 5.0 

124713 -1.0 

124724 0.0 

12473dtype: float64 

12474 

12475By default, NA values are ignored. 

12476 

12477>>> s.cummin() 

124780 2.0 

124791 NaN 

124802 2.0 

124813 -1.0 

124824 -1.0 

12483dtype: float64 

12484 

12485To include NA values in the operation, use ``skipna=False`` 

12486 

12487>>> s.cummin(skipna=False) 

124880 2.0 

124891 NaN 

124902 NaN 

124913 NaN 

124924 NaN 

12493dtype: float64 

12494 

12495**DataFrame** 

12496 

12497>>> df = pd.DataFrame([[2.0, 1.0], 

12498... [3.0, np.nan], 

12499... [1.0, 0.0]], 

12500... columns=list('AB')) 

12501>>> df 

12502 A B 

125030 2.0 1.0 

125041 3.0 NaN 

125052 1.0 0.0 

12506 

12507By default, iterates over rows and finds the minimum 

12508in each column. This is equivalent to ``axis=None`` or ``axis='index'``. 

12509 

12510>>> df.cummin() 

12511 A B 

125120 2.0 1.0 

125131 2.0 NaN 

125142 1.0 0.0 

12515 

12516To iterate over columns and find the minimum in each row, 

12517use ``axis=1`` 

12518 

12519>>> df.cummin(axis=1) 

12520 A B 

125210 2.0 1.0 

125221 3.0 NaN 

125232 1.0 0.0 

12524""" 

12525 

12526_cumsum_examples = """\ 

12527Examples 

12528-------- 

12529**Series** 

12530 

12531>>> s = pd.Series([2, np.nan, 5, -1, 0]) 

12532>>> s 

125330 2.0 

125341 NaN 

125352 5.0 

125363 -1.0 

125374 0.0 

12538dtype: float64 

12539 

12540By default, NA values are ignored. 

12541 

12542>>> s.cumsum() 

125430 2.0 

125441 NaN 

125452 7.0 

125463 6.0 

125474 6.0 

12548dtype: float64 

12549 

12550To include NA values in the operation, use ``skipna=False`` 

12551 

12552>>> s.cumsum(skipna=False) 

125530 2.0 

125541 NaN 

125552 NaN 

125563 NaN 

125574 NaN 

12558dtype: float64 

12559 

12560**DataFrame** 

12561 

12562>>> df = pd.DataFrame([[2.0, 1.0], 

12563... [3.0, np.nan], 

12564... [1.0, 0.0]], 

12565... columns=list('AB')) 

12566>>> df 

12567 A B 

125680 2.0 1.0 

125691 3.0 NaN 

125702 1.0 0.0 

12571 

12572By default, iterates over rows and finds the sum 

12573in each column. This is equivalent to ``axis=None`` or ``axis='index'``. 

12574 

12575>>> df.cumsum() 

12576 A B 

125770 2.0 1.0 

125781 5.0 NaN 

125792 6.0 1.0 

12580 

12581To iterate over columns and find the sum in each row, 

12582use ``axis=1`` 

12583 

12584>>> df.cumsum(axis=1) 

12585 A B 

125860 2.0 3.0 

125871 3.0 NaN 

125882 1.0 1.0 

12589""" 

12590 

12591_cumprod_examples = """\ 

12592Examples 

12593-------- 

12594**Series** 

12595 

12596>>> s = pd.Series([2, np.nan, 5, -1, 0]) 

12597>>> s 

125980 2.0 

125991 NaN 

126002 5.0 

126013 -1.0 

126024 0.0 

12603dtype: float64 

12604 

12605By default, NA values are ignored. 

12606 

12607>>> s.cumprod() 

126080 2.0 

126091 NaN 

126102 10.0 

126113 -10.0 

126124 -0.0 

12613dtype: float64 

12614 

12615To include NA values in the operation, use ``skipna=False`` 

12616 

12617>>> s.cumprod(skipna=False) 

126180 2.0 

126191 NaN 

126202 NaN 

126213 NaN 

126224 NaN 

12623dtype: float64 

12624 

12625**DataFrame** 

12626 

12627>>> df = pd.DataFrame([[2.0, 1.0], 

12628... [3.0, np.nan], 

12629... [1.0, 0.0]], 

12630... columns=list('AB')) 

12631>>> df 

12632 A B 

126330 2.0 1.0 

126341 3.0 NaN 

126352 1.0 0.0 

12636 

12637By default, iterates over rows and finds the product 

12638in each column. This is equivalent to ``axis=None`` or ``axis='index'``. 

12639 

12640>>> df.cumprod() 

12641 A B 

126420 2.0 1.0 

126431 6.0 NaN 

126442 6.0 0.0 

12645 

12646To iterate over columns and find the product in each row, 

12647use ``axis=1`` 

12648 

12649>>> df.cumprod(axis=1) 

12650 A B 

126510 2.0 2.0 

126521 3.0 NaN 

126532 1.0 0.0 

12654""" 

12655 

12656_cummax_examples = """\ 

12657Examples 

12658-------- 

12659**Series** 

12660 

12661>>> s = pd.Series([2, np.nan, 5, -1, 0]) 

12662>>> s 

126630 2.0 

126641 NaN 

126652 5.0 

126663 -1.0 

126674 0.0 

12668dtype: float64 

12669 

12670By default, NA values are ignored. 

12671 

12672>>> s.cummax() 

126730 2.0 

126741 NaN 

126752 5.0 

126763 5.0 

126774 5.0 

12678dtype: float64 

12679 

12680To include NA values in the operation, use ``skipna=False`` 

12681 

12682>>> s.cummax(skipna=False) 

126830 2.0 

126841 NaN 

126852 NaN 

126863 NaN 

126874 NaN 

12688dtype: float64 

12689 

12690**DataFrame** 

12691 

12692>>> df = pd.DataFrame([[2.0, 1.0], 

12693... [3.0, np.nan], 

12694... [1.0, 0.0]], 

12695... columns=list('AB')) 

12696>>> df 

12697 A B 

126980 2.0 1.0 

126991 3.0 NaN 

127002 1.0 0.0 

12701 

12702By default, iterates over rows and finds the maximum 

12703in each column. This is equivalent to ``axis=None`` or ``axis='index'``. 

12704 

12705>>> df.cummax() 

12706 A B 

127070 2.0 1.0 

127081 3.0 NaN 

127092 3.0 1.0 

12710 

12711To iterate over columns and find the maximum in each row, 

12712use ``axis=1`` 

12713 

12714>>> df.cummax(axis=1) 

12715 A B 

127160 2.0 2.0 

127171 3.0 NaN 

127182 1.0 1.0 

12719""" 

12720 

12721_any_see_also = """\ 

12722See Also 

12723-------- 

12724numpy.any : Numpy version of this method. 

12725Series.any : Return whether any element is True. 

12726Series.all : Return whether all elements are True. 

12727DataFrame.any : Return whether any element is True over requested axis. 

12728DataFrame.all : Return whether all elements are True over requested axis. 

12729""" 

12730 

12731_any_desc = """\ 

12732Return whether any element is True, potentially over an axis. 

12733 

12734Returns False unless there is at least one element within a series or 

12735along a Dataframe axis that is True or equivalent (e.g. non-zero or 

12736non-empty).""" 

12737 

12738_any_examples = """\ 

12739Examples 

12740-------- 

12741**Series** 

12742 

12743For Series input, the output is a scalar indicating whether any element 

12744is True. 

12745 

12746>>> pd.Series([False, False]).any() 

12747False 

12748>>> pd.Series([True, False]).any() 

12749True 

12750>>> pd.Series([], dtype="float64").any() 

12751False 

12752>>> pd.Series([np.nan]).any() 

12753False 

12754>>> pd.Series([np.nan]).any(skipna=False) 

12755True 

12756 

12757**DataFrame** 

12758 

12759Whether each column contains at least one True element (the default). 

12760 

12761>>> df = pd.DataFrame({"A": [1, 2], "B": [0, 2], "C": [0, 0]}) 

12762>>> df 

12763 A B C 

127640 1 0 0 

127651 2 2 0 

12766 

12767>>> df.any() 

12768A True 

12769B True 

12770C False 

12771dtype: bool 

12772 

12773Aggregating over the columns. 

12774 

12775>>> df = pd.DataFrame({"A": [True, False], "B": [1, 2]}) 

12776>>> df 

12777 A B 

127780 True 1 

127791 False 2 

12780 

12781>>> df.any(axis='columns') 

127820 True 

127831 True 

12784dtype: bool 

12785 

12786>>> df = pd.DataFrame({"A": [True, False], "B": [1, 0]}) 

12787>>> df 

12788 A B 

127890 True 1 

127901 False 0 

12791 

12792>>> df.any(axis='columns') 

127930 True 

127941 False 

12795dtype: bool 

12796 

12797Aggregating over the entire DataFrame with ``axis=None``. 

12798 

12799>>> df.any(axis=None) 

12800True 

12801 

12802`any` for an empty DataFrame is an empty Series. 

12803 

12804>>> pd.DataFrame([]).any() 

12805Series([], dtype: bool) 

12806""" 

12807 

12808_shared_docs[ 

12809 "stat_func_example" 

12810] = """ 

12811 

12812Examples 

12813-------- 

12814>>> idx = pd.MultiIndex.from_arrays([ 

12815... ['warm', 'warm', 'cold', 'cold'], 

12816... ['dog', 'falcon', 'fish', 'spider']], 

12817... names=['blooded', 'animal']) 

12818>>> s = pd.Series([4, 2, 0, 8], name='legs', index=idx) 

12819>>> s 

12820blooded animal 

12821warm dog 4 

12822 falcon 2 

12823cold fish 0 

12824 spider 8 

12825Name: legs, dtype: int64 

12826 

12827>>> s.{stat_func}() 

12828{default_output}""" 

12829 

12830_sum_examples = _shared_docs["stat_func_example"].format( 

12831 stat_func="sum", verb="Sum", default_output=14, level_output_0=6, level_output_1=8 

12832) 

12833 

12834_sum_examples += """ 

12835 

12836By default, the sum of an empty or all-NA Series is ``0``. 

12837 

12838>>> pd.Series([], dtype="float64").sum() # min_count=0 is the default 

128390.0 

12840 

12841This can be controlled with the ``min_count`` parameter. For example, if 

12842you'd like the sum of an empty series to be NaN, pass ``min_count=1``. 

12843 

12844>>> pd.Series([], dtype="float64").sum(min_count=1) 

12845nan 

12846 

12847Thanks to the ``skipna`` parameter, ``min_count`` handles all-NA and 

12848empty series identically. 

12849 

12850>>> pd.Series([np.nan]).sum() 

128510.0 

12852 

12853>>> pd.Series([np.nan]).sum(min_count=1) 

12854nan""" 

12855 

12856_max_examples: str = _shared_docs["stat_func_example"].format( 

12857 stat_func="max", verb="Max", default_output=8, level_output_0=4, level_output_1=8 

12858) 

12859 

12860_min_examples: str = _shared_docs["stat_func_example"].format( 

12861 stat_func="min", verb="Min", default_output=0, level_output_0=2, level_output_1=0 

12862) 

12863 

12864_stat_func_see_also = """ 

12865 

12866See Also 

12867-------- 

12868Series.sum : Return the sum. 

12869Series.min : Return the minimum. 

12870Series.max : Return the maximum. 

12871Series.idxmin : Return the index of the minimum. 

12872Series.idxmax : Return the index of the maximum. 

12873DataFrame.sum : Return the sum over the requested axis. 

12874DataFrame.min : Return the minimum over the requested axis. 

12875DataFrame.max : Return the maximum over the requested axis. 

12876DataFrame.idxmin : Return the index of the minimum over the requested axis. 

12877DataFrame.idxmax : Return the index of the maximum over the requested axis.""" 

12878 

12879_prod_examples = """ 

12880 

12881Examples 

12882-------- 

12883By default, the product of an empty or all-NA Series is ``1`` 

12884 

12885>>> pd.Series([], dtype="float64").prod() 

128861.0 

12887 

12888This can be controlled with the ``min_count`` parameter 

12889 

12890>>> pd.Series([], dtype="float64").prod(min_count=1) 

12891nan 

12892 

12893Thanks to the ``skipna`` parameter, ``min_count`` handles all-NA and 

12894empty series identically. 

12895 

12896>>> pd.Series([np.nan]).prod() 

128971.0 

12898 

12899>>> pd.Series([np.nan]).prod(min_count=1) 

12900nan""" 

12901 

12902_min_count_stub = """\ 

12903min_count : int, default 0 

12904 The required number of valid values to perform the operation. If fewer than 

12905 ``min_count`` non-NA values are present the result will be NA. 

12906""" 

12907 

12908 

12909def _align_as_utc( 

12910 left: NDFrameT, right: NDFrameT, join_index: Index | None 

12911) -> tuple[NDFrameT, NDFrameT]: 

12912 """ 

12913 If we are aligning timezone-aware DatetimeIndexes and the timezones 

12914 do not match, convert both to UTC. 

12915 """ 

12916 if is_datetime64tz_dtype(left.index.dtype): 

12917 if left.index.tz != right.index.tz: 

12918 if join_index is not None: 

12919 # GH#33671 ensure we don't change the index on 

12920 # our original Series (NB: by default deep=False) 

12921 left = left.copy() 

12922 right = right.copy() 

12923 left.index = join_index 

12924 right.index = join_index 

12925 

12926 return left, right