Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/base.py: 32%

303 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1""" 

2Base and utility classes for pandas objects. 

3""" 

4 

5from __future__ import annotations 

6 

7import textwrap 

8from typing import ( 

9 TYPE_CHECKING, 

10 Any, 

11 Generic, 

12 Hashable, 

13 Literal, 

14 TypeVar, 

15 cast, 

16 final, 

17 overload, 

18) 

19import warnings 

20 

21import numpy as np 

22 

23import pandas._libs.lib as lib 

24from pandas._typing import ( 

25 ArrayLike, 

26 DtypeObj, 

27 IndexLabel, 

28 NDFrameT, 

29 Shape, 

30 npt, 

31) 

32from pandas.compat import PYPY 

33from pandas.compat.numpy import function as nv 

34from pandas.errors import AbstractMethodError 

35from pandas.util._decorators import ( 

36 cache_readonly, 

37 doc, 

38) 

39from pandas.util._exceptions import find_stack_level 

40 

41from pandas.core.dtypes.common import ( 

42 is_categorical_dtype, 

43 is_dict_like, 

44 is_extension_array_dtype, 

45 is_object_dtype, 

46 is_scalar, 

47) 

48from pandas.core.dtypes.generic import ( 

49 ABCDataFrame, 

50 ABCIndex, 

51 ABCSeries, 

52) 

53from pandas.core.dtypes.missing import ( 

54 isna, 

55 remove_na_arraylike, 

56) 

57 

58from pandas.core import ( 

59 algorithms, 

60 nanops, 

61 ops, 

62) 

63from pandas.core.accessor import DirNamesMixin 

64from pandas.core.algorithms import ( 

65 duplicated, 

66 unique1d, 

67 value_counts, 

68) 

69from pandas.core.arraylike import OpsMixin 

70from pandas.core.arrays import ExtensionArray 

71from pandas.core.construction import ( 

72 create_series_with_explicit_dtype, 

73 ensure_wrapped_if_datetimelike, 

74 extract_array, 

75) 

76 

77if TYPE_CHECKING: 77 ↛ 79line 77 didn't jump to line 79, because the condition on line 77 was never true

78 

79 from pandas._typing import ( 

80 NumpySorter, 

81 NumpyValueArrayLike, 

82 ScalarLike_co, 

83 ) 

84 

85 from pandas import ( 

86 Categorical, 

87 Series, 

88 ) 

89 

90 

91_shared_docs: dict[str, str] = {} 

92_indexops_doc_kwargs = { 

93 "klass": "IndexOpsMixin", 

94 "inplace": "", 

95 "unique": "IndexOpsMixin", 

96 "duplicated": "IndexOpsMixin", 

97} 

98 

99_T = TypeVar("_T", bound="IndexOpsMixin") 

100 

101 

102class PandasObject(DirNamesMixin): 

103 """ 

104 Baseclass for various pandas objects. 

105 """ 

106 

107 # results from calls to methods decorated with cache_readonly get added to _cache 

108 _cache: dict[str, Any] 

109 

110 @property 

111 def _constructor(self): 

112 """ 

113 Class constructor (for this class it's just `__class__`. 

114 """ 

115 return type(self) 

116 

117 def __repr__(self) -> str: 

118 """ 

119 Return a string representation for a particular object. 

120 """ 

121 # Should be overwritten by base classes 

122 return object.__repr__(self) 

123 

124 def _reset_cache(self, key: str | None = None) -> None: 

125 """ 

126 Reset cached properties. If ``key`` is passed, only clears that key. 

127 """ 

128 if not hasattr(self, "_cache"): 

129 return 

130 if key is None: 

131 self._cache.clear() 

132 else: 

133 self._cache.pop(key, None) 

134 

135 def __sizeof__(self) -> int: 

136 """ 

137 Generates the total memory usage for an object that returns 

138 either a value or Series of values 

139 """ 

140 memory_usage = getattr(self, "memory_usage", None) 

141 if memory_usage: 

142 mem = memory_usage(deep=True) 

143 return int(mem if is_scalar(mem) else mem.sum()) 

144 

145 # no memory_usage attribute, so fall back to object's 'sizeof' 

146 return super().__sizeof__() 

147 

148 

149class NoNewAttributesMixin: 

150 """ 

151 Mixin which prevents adding new attributes. 

152 

153 Prevents additional attributes via xxx.attribute = "something" after a 

154 call to `self.__freeze()`. Mainly used to prevent the user from using 

155 wrong attributes on an accessor (`Series.cat/.str/.dt`). 

156 

157 If you really want to add a new attribute at a later time, you need to use 

158 `object.__setattr__(self, key, value)`. 

159 """ 

160 

161 def _freeze(self): 

162 """ 

163 Prevents setting additional attributes. 

164 """ 

165 object.__setattr__(self, "__frozen", True) 

166 

167 # prevent adding any attribute via s.xxx.new_attribute = ... 

168 def __setattr__(self, key: str, value) -> None: 

169 # _cache is used by a decorator 

170 # We need to check both 1.) cls.__dict__ and 2.) getattr(self, key) 

171 # because 

172 # 1.) getattr is false for attributes that raise errors 

173 # 2.) cls.__dict__ doesn't traverse into base classes 

174 if getattr(self, "__frozen", False) and not ( 

175 key == "_cache" 

176 or key in type(self).__dict__ 

177 or getattr(self, key, None) is not None 

178 ): 

179 raise AttributeError(f"You cannot add any new attribute '{key}'") 

180 object.__setattr__(self, key, value) 

181 

182 

183class SelectionMixin(Generic[NDFrameT]): 

184 """ 

185 mixin implementing the selection & aggregation interface on a group-like 

186 object sub-classes need to define: obj, exclusions 

187 """ 

188 

189 obj: NDFrameT 

190 _selection: IndexLabel | None = None 

191 exclusions: frozenset[Hashable] 

192 _internal_names = ["_cache", "__setstate__"] 

193 _internal_names_set = set(_internal_names) 

194 

195 @final 

196 @property 

197 def _selection_list(self): 

198 if not isinstance( 

199 self._selection, (list, tuple, ABCSeries, ABCIndex, np.ndarray) 

200 ): 

201 return [self._selection] 

202 return self._selection 

203 

204 @cache_readonly 

205 def _selected_obj(self): 

206 if self._selection is None or isinstance(self.obj, ABCSeries): 

207 return self.obj 

208 else: 

209 return self.obj[self._selection] 

210 

211 @final 

212 @cache_readonly 

213 def ndim(self) -> int: 

214 return self._selected_obj.ndim 

215 

216 @final 

217 @cache_readonly 

218 def _obj_with_exclusions(self): 

219 if self._selection is not None and isinstance(self.obj, ABCDataFrame): 

220 return self.obj[self._selection_list] 

221 

222 if len(self.exclusions) > 0: 

223 # equivalent to `self.obj.drop(self.exclusions, axis=1) 

224 # but this avoids consolidating and making a copy 

225 # TODO: following GH#45287 can we now use .drop directly without 

226 # making a copy? 

227 return self.obj._drop_axis(self.exclusions, axis=1, only_slice=True) 

228 else: 

229 return self.obj 

230 

231 def __getitem__(self, key): 

232 if self._selection is not None: 

233 raise IndexError(f"Column(s) {self._selection} already selected") 

234 

235 if isinstance(key, (list, tuple, ABCSeries, ABCIndex, np.ndarray)): 

236 if len(self.obj.columns.intersection(key)) != len(set(key)): 

237 bad_keys = list(set(key).difference(self.obj.columns)) 

238 raise KeyError(f"Columns not found: {str(bad_keys)[1:-1]}") 

239 return self._gotitem(list(key), ndim=2) 

240 

241 elif not getattr(self, "as_index", False): 

242 if key not in self.obj.columns: 

243 raise KeyError(f"Column not found: {key}") 

244 return self._gotitem(key, ndim=2) 

245 

246 else: 

247 if key not in self.obj: 

248 raise KeyError(f"Column not found: {key}") 

249 subset = self.obj[key] 

250 ndim = subset.ndim 

251 return self._gotitem(key, ndim=ndim, subset=subset) 

252 

253 def _gotitem(self, key, ndim: int, subset=None): 

254 """ 

255 sub-classes to define 

256 return a sliced object 

257 

258 Parameters 

259 ---------- 

260 key : str / list of selections 

261 ndim : {1, 2} 

262 requested ndim of result 

263 subset : object, default None 

264 subset to act on 

265 """ 

266 raise AbstractMethodError(self) 

267 

268 def aggregate(self, func, *args, **kwargs): 

269 raise AbstractMethodError(self) 

270 

271 agg = aggregate 

272 

273 

274class IndexOpsMixin(OpsMixin): 

275 """ 

276 Common ops mixin to support a unified interface / docs for Series / Index 

277 """ 

278 

279 # ndarray compatibility 

280 __array_priority__ = 1000 

281 _hidden_attrs: frozenset[str] = frozenset( 

282 ["tolist"] # tolist is not deprecated, just suppressed in the __dir__ 

283 ) 

284 

285 @property 

286 def dtype(self) -> DtypeObj: 

287 # must be defined here as a property for mypy 

288 raise AbstractMethodError(self) 

289 

290 @property 

291 def _values(self) -> ExtensionArray | np.ndarray: 

292 # must be defined here as a property for mypy 

293 raise AbstractMethodError(self) 

294 

295 def transpose(self: _T, *args, **kwargs) -> _T: 

296 """ 

297 Return the transpose, which is by definition self. 

298 

299 Returns 

300 ------- 

301 %(klass)s 

302 """ 

303 nv.validate_transpose(args, kwargs) 

304 return self 

305 

306 T = property( 

307 transpose, 

308 doc=""" 

309 Return the transpose, which is by definition self. 

310 """, 

311 ) 

312 

313 @property 

314 def shape(self) -> Shape: 

315 """ 

316 Return a tuple of the shape of the underlying data. 

317 """ 

318 return self._values.shape 

319 

320 def __len__(self) -> int: 

321 # We need this defined here for mypy 

322 raise AbstractMethodError(self) 

323 

324 @property 

325 def ndim(self) -> Literal[1]: 

326 """ 

327 Number of dimensions of the underlying data, by definition 1. 

328 """ 

329 return 1 

330 

331 def item(self): 

332 """ 

333 Return the first element of the underlying data as a Python scalar. 

334 

335 Returns 

336 ------- 

337 scalar 

338 The first element of %(klass)s. 

339 

340 Raises 

341 ------ 

342 ValueError 

343 If the data is not length-1. 

344 """ 

345 if len(self) == 1: 

346 return next(iter(self)) 

347 raise ValueError("can only convert an array of size 1 to a Python scalar") 

348 

349 @property 

350 def nbytes(self) -> int: 

351 """ 

352 Return the number of bytes in the underlying data. 

353 """ 

354 return self._values.nbytes 

355 

356 @property 

357 def size(self) -> int: 

358 """ 

359 Return the number of elements in the underlying data. 

360 """ 

361 return len(self._values) 

362 

363 @property 

364 def array(self) -> ExtensionArray: 

365 """ 

366 The ExtensionArray of the data backing this Series or Index. 

367 

368 Returns 

369 ------- 

370 ExtensionArray 

371 An ExtensionArray of the values stored within. For extension 

372 types, this is the actual array. For NumPy native types, this 

373 is a thin (no copy) wrapper around :class:`numpy.ndarray`. 

374 

375 ``.array`` differs ``.values`` which may require converting the 

376 data to a different form. 

377 

378 See Also 

379 -------- 

380 Index.to_numpy : Similar method that always returns a NumPy array. 

381 Series.to_numpy : Similar method that always returns a NumPy array. 

382 

383 Notes 

384 ----- 

385 This table lays out the different array types for each extension 

386 dtype within pandas. 

387 

388 ================== ============================= 

389 dtype array type 

390 ================== ============================= 

391 category Categorical 

392 period PeriodArray 

393 interval IntervalArray 

394 IntegerNA IntegerArray 

395 string StringArray 

396 boolean BooleanArray 

397 datetime64[ns, tz] DatetimeArray 

398 ================== ============================= 

399 

400 For any 3rd-party extension types, the array type will be an 

401 ExtensionArray. 

402 

403 For all remaining dtypes ``.array`` will be a 

404 :class:`arrays.NumpyExtensionArray` wrapping the actual ndarray 

405 stored within. If you absolutely need a NumPy array (possibly with 

406 copying / coercing data), then use :meth:`Series.to_numpy` instead. 

407 

408 Examples 

409 -------- 

410 For regular NumPy types like int, and float, a PandasArray 

411 is returned. 

412 

413 >>> pd.Series([1, 2, 3]).array 

414 <PandasArray> 

415 [1, 2, 3] 

416 Length: 3, dtype: int64 

417 

418 For extension types, like Categorical, the actual ExtensionArray 

419 is returned 

420 

421 >>> ser = pd.Series(pd.Categorical(['a', 'b', 'a'])) 

422 >>> ser.array 

423 ['a', 'b', 'a'] 

424 Categories (2, object): ['a', 'b'] 

425 """ 

426 raise AbstractMethodError(self) 

427 

428 def to_numpy( 

429 self, 

430 dtype: npt.DTypeLike | None = None, 

431 copy: bool = False, 

432 na_value: object = lib.no_default, 

433 **kwargs, 

434 ) -> np.ndarray: 

435 """ 

436 A NumPy ndarray representing the values in this Series or Index. 

437 

438 Parameters 

439 ---------- 

440 dtype : str or numpy.dtype, optional 

441 The dtype to pass to :meth:`numpy.asarray`. 

442 copy : bool, default False 

443 Whether to ensure that the returned value is not a view on 

444 another array. Note that ``copy=False`` does not *ensure* that 

445 ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that 

446 a copy is made, even if not strictly necessary. 

447 na_value : Any, optional 

448 The value to use for missing values. The default value depends 

449 on `dtype` and the type of the array. 

450 

451 .. versionadded:: 1.0.0 

452 

453 **kwargs 

454 Additional keywords passed through to the ``to_numpy`` method 

455 of the underlying array (for extension arrays). 

456 

457 .. versionadded:: 1.0.0 

458 

459 Returns 

460 ------- 

461 numpy.ndarray 

462 

463 See Also 

464 -------- 

465 Series.array : Get the actual data stored within. 

466 Index.array : Get the actual data stored within. 

467 DataFrame.to_numpy : Similar method for DataFrame. 

468 

469 Notes 

470 ----- 

471 The returned array will be the same up to equality (values equal 

472 in `self` will be equal in the returned array; likewise for values 

473 that are not equal). When `self` contains an ExtensionArray, the 

474 dtype may be different. For example, for a category-dtype Series, 

475 ``to_numpy()`` will return a NumPy array and the categorical dtype 

476 will be lost. 

477 

478 For NumPy dtypes, this will be a reference to the actual data stored 

479 in this Series or Index (assuming ``copy=False``). Modifying the result 

480 in place will modify the data stored in the Series or Index (not that 

481 we recommend doing that). 

482 

483 For extension types, ``to_numpy()`` *may* require copying data and 

484 coercing the result to a NumPy type (possibly object), which may be 

485 expensive. When you need a no-copy reference to the underlying data, 

486 :attr:`Series.array` should be used instead. 

487 

488 This table lays out the different dtypes and default return types of 

489 ``to_numpy()`` for various dtypes within pandas. 

490 

491 ================== ================================ 

492 dtype array type 

493 ================== ================================ 

494 category[T] ndarray[T] (same dtype as input) 

495 period ndarray[object] (Periods) 

496 interval ndarray[object] (Intervals) 

497 IntegerNA ndarray[object] 

498 datetime64[ns] datetime64[ns] 

499 datetime64[ns, tz] ndarray[object] (Timestamps) 

500 ================== ================================ 

501 

502 Examples 

503 -------- 

504 >>> ser = pd.Series(pd.Categorical(['a', 'b', 'a'])) 

505 >>> ser.to_numpy() 

506 array(['a', 'b', 'a'], dtype=object) 

507 

508 Specify the `dtype` to control how datetime-aware data is represented. 

509 Use ``dtype=object`` to return an ndarray of pandas :class:`Timestamp` 

510 objects, each with the correct ``tz``. 

511 

512 >>> ser = pd.Series(pd.date_range('2000', periods=2, tz="CET")) 

513 >>> ser.to_numpy(dtype=object) 

514 array([Timestamp('2000-01-01 00:00:00+0100', tz='CET'), 

515 Timestamp('2000-01-02 00:00:00+0100', tz='CET')], 

516 dtype=object) 

517 

518 Or ``dtype='datetime64[ns]'`` to return an ndarray of native 

519 datetime64 values. The values are converted to UTC and the timezone 

520 info is dropped. 

521 

522 >>> ser.to_numpy(dtype="datetime64[ns]") 

523 ... # doctest: +ELLIPSIS 

524 array(['1999-12-31T23:00:00.000000000', '2000-01-01T23:00:00...'], 

525 dtype='datetime64[ns]') 

526 """ 

527 if is_extension_array_dtype(self.dtype): 

528 return self.array.to_numpy(dtype, copy=copy, na_value=na_value, **kwargs) 

529 elif kwargs: 

530 bad_keys = list(kwargs.keys())[0] 

531 raise TypeError( 

532 f"to_numpy() got an unexpected keyword argument '{bad_keys}'" 

533 ) 

534 

535 result = np.asarray(self._values, dtype=dtype) 

536 # TODO(GH-24345): Avoid potential double copy 

537 if copy or na_value is not lib.no_default: 

538 result = result.copy() 

539 if na_value is not lib.no_default: 

540 result[np.asanyarray(self.isna())] = na_value 

541 return result 

542 

543 @property 

544 def empty(self) -> bool: 

545 return not self.size 

546 

547 def max(self, axis=None, skipna: bool = True, *args, **kwargs): 

548 """ 

549 Return the maximum value of the Index. 

550 

551 Parameters 

552 ---------- 

553 axis : int, optional 

554 For compatibility with NumPy. Only 0 or None are allowed. 

555 skipna : bool, default True 

556 Exclude NA/null values when showing the result. 

557 *args, **kwargs 

558 Additional arguments and keywords for compatibility with NumPy. 

559 

560 Returns 

561 ------- 

562 scalar 

563 Maximum value. 

564 

565 See Also 

566 -------- 

567 Index.min : Return the minimum value in an Index. 

568 Series.max : Return the maximum value in a Series. 

569 DataFrame.max : Return the maximum values in a DataFrame. 

570 

571 Examples 

572 -------- 

573 >>> idx = pd.Index([3, 2, 1]) 

574 >>> idx.max() 

575 3 

576 

577 >>> idx = pd.Index(['c', 'b', 'a']) 

578 >>> idx.max() 

579 'c' 

580 

581 For a MultiIndex, the maximum is determined lexicographically. 

582 

583 >>> idx = pd.MultiIndex.from_product([('a', 'b'), (2, 1)]) 

584 >>> idx.max() 

585 ('b', 2) 

586 """ 

587 nv.validate_minmax_axis(axis) 

588 nv.validate_max(args, kwargs) 

589 return nanops.nanmax(self._values, skipna=skipna) 

590 

591 @doc(op="max", oppose="min", value="largest") 

592 def argmax(self, axis=None, skipna: bool = True, *args, **kwargs) -> int: 

593 """ 

594 Return int position of the {value} value in the Series. 

595 

596 If the {op}imum is achieved in multiple locations, 

597 the first row position is returned. 

598 

599 Parameters 

600 ---------- 

601 axis : {{None}} 

602 Unused. Parameter needed for compatibility with DataFrame. 

603 skipna : bool, default True 

604 Exclude NA/null values when showing the result. 

605 *args, **kwargs 

606 Additional arguments and keywords for compatibility with NumPy. 

607 

608 Returns 

609 ------- 

610 int 

611 Row position of the {op}imum value. 

612 

613 See Also 

614 -------- 

615 Series.arg{op} : Return position of the {op}imum value. 

616 Series.arg{oppose} : Return position of the {oppose}imum value. 

617 numpy.ndarray.arg{op} : Equivalent method for numpy arrays. 

618 Series.idxmax : Return index label of the maximum values. 

619 Series.idxmin : Return index label of the minimum values. 

620 

621 Examples 

622 -------- 

623 Consider dataset containing cereal calories 

624 

625 >>> s = pd.Series({{'Corn Flakes': 100.0, 'Almond Delight': 110.0, 

626 ... 'Cinnamon Toast Crunch': 120.0, 'Cocoa Puff': 110.0}}) 

627 >>> s 

628 Corn Flakes 100.0 

629 Almond Delight 110.0 

630 Cinnamon Toast Crunch 120.0 

631 Cocoa Puff 110.0 

632 dtype: float64 

633 

634 >>> s.argmax() 

635 2 

636 >>> s.argmin() 

637 0 

638 

639 The maximum cereal calories is the third element and 

640 the minimum cereal calories is the first element, 

641 since series is zero-indexed. 

642 """ 

643 delegate = self._values 

644 nv.validate_minmax_axis(axis) 

645 skipna = nv.validate_argmax_with_skipna(skipna, args, kwargs) 

646 

647 if isinstance(delegate, ExtensionArray): 

648 if not skipna and delegate.isna().any(): 

649 return -1 

650 else: 

651 return delegate.argmax() 

652 else: 

653 # error: Incompatible return value type (got "Union[int, ndarray]", expected 

654 # "int") 

655 return nanops.nanargmax( # type: ignore[return-value] 

656 delegate, skipna=skipna 

657 ) 

658 

659 def min(self, axis=None, skipna: bool = True, *args, **kwargs): 

660 """ 

661 Return the minimum value of the Index. 

662 

663 Parameters 

664 ---------- 

665 axis : {None} 

666 Dummy argument for consistency with Series. 

667 skipna : bool, default True 

668 Exclude NA/null values when showing the result. 

669 *args, **kwargs 

670 Additional arguments and keywords for compatibility with NumPy. 

671 

672 Returns 

673 ------- 

674 scalar 

675 Minimum value. 

676 

677 See Also 

678 -------- 

679 Index.max : Return the maximum value of the object. 

680 Series.min : Return the minimum value in a Series. 

681 DataFrame.min : Return the minimum values in a DataFrame. 

682 

683 Examples 

684 -------- 

685 >>> idx = pd.Index([3, 2, 1]) 

686 >>> idx.min() 

687 1 

688 

689 >>> idx = pd.Index(['c', 'b', 'a']) 

690 >>> idx.min() 

691 'a' 

692 

693 For a MultiIndex, the minimum is determined lexicographically. 

694 

695 >>> idx = pd.MultiIndex.from_product([('a', 'b'), (2, 1)]) 

696 >>> idx.min() 

697 ('a', 1) 

698 """ 

699 nv.validate_minmax_axis(axis) 

700 nv.validate_min(args, kwargs) 

701 return nanops.nanmin(self._values, skipna=skipna) 

702 

703 @doc(argmax, op="min", oppose="max", value="smallest") 

704 def argmin(self, axis=None, skipna=True, *args, **kwargs) -> int: 

705 delegate = self._values 

706 nv.validate_minmax_axis(axis) 

707 skipna = nv.validate_argmin_with_skipna(skipna, args, kwargs) 

708 

709 if isinstance(delegate, ExtensionArray): 

710 if not skipna and delegate.isna().any(): 

711 return -1 

712 else: 

713 return delegate.argmin() 

714 else: 

715 # error: Incompatible return value type (got "Union[int, ndarray]", expected 

716 # "int") 

717 return nanops.nanargmin( # type: ignore[return-value] 

718 delegate, skipna=skipna 

719 ) 

720 

721 def tolist(self): 

722 """ 

723 Return a list of the values. 

724 

725 These are each a scalar type, which is a Python scalar 

726 (for str, int, float) or a pandas scalar 

727 (for Timestamp/Timedelta/Interval/Period) 

728 

729 Returns 

730 ------- 

731 list 

732 

733 See Also 

734 -------- 

735 numpy.ndarray.tolist : Return the array as an a.ndim-levels deep 

736 nested list of Python scalars. 

737 """ 

738 return self._values.tolist() 

739 

740 to_list = tolist 

741 

742 def __iter__(self): 

743 """ 

744 Return an iterator of the values. 

745 

746 These are each a scalar type, which is a Python scalar 

747 (for str, int, float) or a pandas scalar 

748 (for Timestamp/Timedelta/Interval/Period) 

749 

750 Returns 

751 ------- 

752 iterator 

753 """ 

754 # We are explicitly making element iterators. 

755 if not isinstance(self._values, np.ndarray): 

756 # Check type instead of dtype to catch DTA/TDA 

757 return iter(self._values) 

758 else: 

759 return map(self._values.item, range(self._values.size)) 

760 

761 @cache_readonly 

762 def hasnans(self) -> bool: 

763 """ 

764 Return True if there are any NaNs. 

765 

766 Enables various performance speedups. 

767 """ 

768 # error: Item "bool" of "Union[bool, ndarray[Any, dtype[bool_]], NDFrame]" 

769 # has no attribute "any" 

770 return bool(isna(self).any()) # type: ignore[union-attr] 

771 

772 def isna(self) -> npt.NDArray[np.bool_]: 

773 return isna(self._values) 

774 

775 def _reduce( 

776 self, 

777 op, 

778 name: str, 

779 *, 

780 axis=0, 

781 skipna=True, 

782 numeric_only=None, 

783 filter_type=None, 

784 **kwds, 

785 ): 

786 """ 

787 Perform the reduction type operation if we can. 

788 """ 

789 func = getattr(self, name, None) 

790 if func is None: 

791 raise TypeError( 

792 f"{type(self).__name__} cannot perform the operation {name}" 

793 ) 

794 return func(skipna=skipna, **kwds) 

795 

796 @final 

797 def _map_values(self, mapper, na_action=None): 

798 """ 

799 An internal function that maps values using the input 

800 correspondence (which can be a dict, Series, or function). 

801 

802 Parameters 

803 ---------- 

804 mapper : function, dict, or Series 

805 The input correspondence object 

806 na_action : {None, 'ignore'} 

807 If 'ignore', propagate NA values, without passing them to the 

808 mapping function 

809 

810 Returns 

811 ------- 

812 Union[Index, MultiIndex], inferred 

813 The output of the mapping function applied to the index. 

814 If the function returns a tuple with more than one element 

815 a MultiIndex will be returned. 

816 """ 

817 # we can fastpath dict/Series to an efficient map 

818 # as we know that we are not going to have to yield 

819 # python types 

820 if is_dict_like(mapper): 

821 if isinstance(mapper, dict) and hasattr(mapper, "__missing__"): 

822 # If a dictionary subclass defines a default value method, 

823 # convert mapper to a lookup function (GH #15999). 

824 dict_with_default = mapper 

825 mapper = lambda x: dict_with_default[x] 

826 else: 

827 # Dictionary does not have a default. Thus it's safe to 

828 # convert to an Series for efficiency. 

829 # we specify the keys here to handle the 

830 # possibility that they are tuples 

831 

832 # The return value of mapping with an empty mapper is 

833 # expected to be pd.Series(np.nan, ...). As np.nan is 

834 # of dtype float64 the return value of this method should 

835 # be float64 as well 

836 mapper = create_series_with_explicit_dtype( 

837 mapper, dtype_if_empty=np.float64 

838 ) 

839 

840 if isinstance(mapper, ABCSeries): 

841 if na_action not in (None, "ignore"): 

842 msg = ( 

843 "na_action must either be 'ignore' or None, " 

844 f"{na_action} was passed" 

845 ) 

846 raise ValueError(msg) 

847 

848 if na_action == "ignore": 

849 mapper = mapper[mapper.index.notna()] 

850 

851 # Since values were input this means we came from either 

852 # a dict or a series and mapper should be an index 

853 if is_categorical_dtype(self.dtype): 

854 # use the built in categorical series mapper which saves 

855 # time by mapping the categories instead of all values 

856 

857 cat = cast("Categorical", self._values) 

858 return cat.map(mapper) 

859 

860 values = self._values 

861 

862 indexer = mapper.index.get_indexer(values) 

863 new_values = algorithms.take_nd(mapper._values, indexer) 

864 

865 return new_values 

866 

867 # we must convert to python types 

868 if is_extension_array_dtype(self.dtype) and hasattr(self._values, "map"): 

869 # GH#23179 some EAs do not have `map` 

870 values = self._values 

871 if na_action is not None: 

872 raise NotImplementedError 

873 map_f = lambda values, f: values.map(f) 

874 else: 

875 values = self._values.astype(object) 

876 if na_action == "ignore": 

877 map_f = lambda values, f: lib.map_infer_mask( 

878 values, f, isna(values).view(np.uint8) 

879 ) 

880 elif na_action is None: 

881 map_f = lib.map_infer 

882 else: 

883 msg = ( 

884 "na_action must either be 'ignore' or None, " 

885 f"{na_action} was passed" 

886 ) 

887 raise ValueError(msg) 

888 

889 # mapper is a function 

890 new_values = map_f(values, mapper) 

891 

892 return new_values 

893 

894 def value_counts( 

895 self, 

896 normalize: bool = False, 

897 sort: bool = True, 

898 ascending: bool = False, 

899 bins=None, 

900 dropna: bool = True, 

901 ) -> Series: 

902 """ 

903 Return a Series containing counts of unique values. 

904 

905 The resulting object will be in descending order so that the 

906 first element is the most frequently-occurring element. 

907 Excludes NA values by default. 

908 

909 Parameters 

910 ---------- 

911 normalize : bool, default False 

912 If True then the object returned will contain the relative 

913 frequencies of the unique values. 

914 sort : bool, default True 

915 Sort by frequencies. 

916 ascending : bool, default False 

917 Sort in ascending order. 

918 bins : int, optional 

919 Rather than count values, group them into half-open bins, 

920 a convenience for ``pd.cut``, only works with numeric data. 

921 dropna : bool, default True 

922 Don't include counts of NaN. 

923 

924 Returns 

925 ------- 

926 Series 

927 

928 See Also 

929 -------- 

930 Series.count: Number of non-NA elements in a Series. 

931 DataFrame.count: Number of non-NA elements in a DataFrame. 

932 DataFrame.value_counts: Equivalent method on DataFrames. 

933 

934 Examples 

935 -------- 

936 >>> index = pd.Index([3, 1, 2, 3, 4, np.nan]) 

937 >>> index.value_counts() 

938 3.0 2 

939 1.0 1 

940 2.0 1 

941 4.0 1 

942 dtype: int64 

943 

944 With `normalize` set to `True`, returns the relative frequency by 

945 dividing all values by the sum of values. 

946 

947 >>> s = pd.Series([3, 1, 2, 3, 4, np.nan]) 

948 >>> s.value_counts(normalize=True) 

949 3.0 0.4 

950 1.0 0.2 

951 2.0 0.2 

952 4.0 0.2 

953 dtype: float64 

954 

955 **bins** 

956 

957 Bins can be useful for going from a continuous variable to a 

958 categorical variable; instead of counting unique 

959 apparitions of values, divide the index in the specified 

960 number of half-open bins. 

961 

962 >>> s.value_counts(bins=3) 

963 (0.996, 2.0] 2 

964 (2.0, 3.0] 2 

965 (3.0, 4.0] 1 

966 dtype: int64 

967 

968 **dropna** 

969 

970 With `dropna` set to `False` we can also see NaN index values. 

971 

972 >>> s.value_counts(dropna=False) 

973 3.0 2 

974 1.0 1 

975 2.0 1 

976 4.0 1 

977 NaN 1 

978 dtype: int64 

979 """ 

980 return value_counts( 

981 self, 

982 sort=sort, 

983 ascending=ascending, 

984 normalize=normalize, 

985 bins=bins, 

986 dropna=dropna, 

987 ) 

988 

989 def unique(self): 

990 values = self._values 

991 

992 if not isinstance(values, np.ndarray): 

993 result: ArrayLike = values.unique() 

994 if ( 

995 isinstance(self.dtype, np.dtype) and self.dtype.kind in ["m", "M"] 

996 ) and isinstance(self, ABCSeries): 

997 # GH#31182 Series._values returns EA 

998 # unpack numpy datetime for backward-compat 

999 result = np.asarray(result) 

1000 else: 

1001 result = unique1d(values) 

1002 

1003 return result 

1004 

1005 def nunique(self, dropna: bool = True) -> int: 

1006 """ 

1007 Return number of unique elements in the object. 

1008 

1009 Excludes NA values by default. 

1010 

1011 Parameters 

1012 ---------- 

1013 dropna : bool, default True 

1014 Don't include NaN in the count. 

1015 

1016 Returns 

1017 ------- 

1018 int 

1019 

1020 See Also 

1021 -------- 

1022 DataFrame.nunique: Method nunique for DataFrame. 

1023 Series.count: Count non-NA/null observations in the Series. 

1024 

1025 Examples 

1026 -------- 

1027 >>> s = pd.Series([1, 3, 5, 7, 7]) 

1028 >>> s 

1029 0 1 

1030 1 3 

1031 2 5 

1032 3 7 

1033 4 7 

1034 dtype: int64 

1035 

1036 >>> s.nunique() 

1037 4 

1038 """ 

1039 uniqs = self.unique() 

1040 if dropna: 

1041 uniqs = remove_na_arraylike(uniqs) 

1042 return len(uniqs) 

1043 

1044 @property 

1045 def is_unique(self) -> bool: 

1046 """ 

1047 Return boolean if values in the object are unique. 

1048 

1049 Returns 

1050 ------- 

1051 bool 

1052 """ 

1053 return self.nunique(dropna=False) == len(self) 

1054 

1055 @property 

1056 def is_monotonic(self) -> bool: 

1057 """ 

1058 Return boolean if values in the object are monotonically increasing. 

1059 

1060 .. deprecated:: 1.5.0 

1061 is_monotonic is deprecated and will be removed in a future version. 

1062 Use is_monotonic_increasing instead. 

1063 

1064 Returns 

1065 ------- 

1066 bool 

1067 """ 

1068 warnings.warn( 

1069 "is_monotonic is deprecated and will be removed in a future version. " 

1070 "Use is_monotonic_increasing instead.", 

1071 FutureWarning, 

1072 stacklevel=find_stack_level(), 

1073 ) 

1074 return self.is_monotonic_increasing 

1075 

1076 @property 

1077 def is_monotonic_increasing(self) -> bool: 

1078 """ 

1079 Return boolean if values in the object are monotonically increasing. 

1080 

1081 Returns 

1082 ------- 

1083 bool 

1084 """ 

1085 from pandas import Index 

1086 

1087 return Index(self).is_monotonic_increasing 

1088 

1089 @property 

1090 def is_monotonic_decreasing(self) -> bool: 

1091 """ 

1092 Return boolean if values in the object are monotonically decreasing. 

1093 

1094 Returns 

1095 ------- 

1096 bool 

1097 """ 

1098 from pandas import Index 

1099 

1100 return Index(self).is_monotonic_decreasing 

1101 

1102 def _memory_usage(self, deep: bool = False) -> int: 

1103 """ 

1104 Memory usage of the values. 

1105 

1106 Parameters 

1107 ---------- 

1108 deep : bool, default False 

1109 Introspect the data deeply, interrogate 

1110 `object` dtypes for system-level memory consumption. 

1111 

1112 Returns 

1113 ------- 

1114 bytes used 

1115 

1116 See Also 

1117 -------- 

1118 numpy.ndarray.nbytes : Total bytes consumed by the elements of the 

1119 array. 

1120 

1121 Notes 

1122 ----- 

1123 Memory usage does not include memory consumed by elements that 

1124 are not components of the array if deep=False or if used on PyPy 

1125 """ 

1126 if hasattr(self.array, "memory_usage"): 

1127 # https://github.com/python/mypy/issues/1424 

1128 # error: "ExtensionArray" has no attribute "memory_usage" 

1129 return self.array.memory_usage(deep=deep) # type: ignore[attr-defined] 

1130 

1131 v = self.array.nbytes 

1132 if deep and is_object_dtype(self) and not PYPY: 

1133 values = cast(np.ndarray, self._values) 

1134 v += lib.memory_usage_of_objects(values) 

1135 return v 

1136 

1137 @doc( 

1138 algorithms.factorize, 

1139 values="", 

1140 order="", 

1141 size_hint="", 

1142 sort=textwrap.dedent( 

1143 """\ 

1144 sort : bool, default False 

1145 Sort `uniques` and shuffle `codes` to maintain the 

1146 relationship. 

1147 """ 

1148 ), 

1149 ) 

1150 def factorize( 

1151 self, 

1152 sort: bool = False, 

1153 na_sentinel: int | lib.NoDefault = lib.no_default, 

1154 use_na_sentinel: bool | lib.NoDefault = lib.no_default, 

1155 ): 

1156 return algorithms.factorize( 

1157 self, sort=sort, na_sentinel=na_sentinel, use_na_sentinel=use_na_sentinel 

1158 ) 

1159 

1160 _shared_docs[ 

1161 "searchsorted" 

1162 ] = """ 

1163 Find indices where elements should be inserted to maintain order. 

1164 

1165 Find the indices into a sorted {klass} `self` such that, if the 

1166 corresponding elements in `value` were inserted before the indices, 

1167 the order of `self` would be preserved. 

1168 

1169 .. note:: 

1170 

1171 The {klass} *must* be monotonically sorted, otherwise 

1172 wrong locations will likely be returned. Pandas does *not* 

1173 check this for you. 

1174 

1175 Parameters 

1176 ---------- 

1177 value : array-like or scalar 

1178 Values to insert into `self`. 

1179 side : {{'left', 'right'}}, optional 

1180 If 'left', the index of the first suitable location found is given. 

1181 If 'right', return the last such index. If there is no suitable 

1182 index, return either 0 or N (where N is the length of `self`). 

1183 sorter : 1-D array-like, optional 

1184 Optional array of integer indices that sort `self` into ascending 

1185 order. They are typically the result of ``np.argsort``. 

1186 

1187 Returns 

1188 ------- 

1189 int or array of int 

1190 A scalar or array of insertion points with the 

1191 same shape as `value`. 

1192 

1193 See Also 

1194 -------- 

1195 sort_values : Sort by the values along either axis. 

1196 numpy.searchsorted : Similar method from NumPy. 

1197 

1198 Notes 

1199 ----- 

1200 Binary search is used to find the required insertion points. 

1201 

1202 Examples 

1203 -------- 

1204 >>> ser = pd.Series([1, 2, 3]) 

1205 >>> ser 

1206 0 1 

1207 1 2 

1208 2 3 

1209 dtype: int64 

1210 

1211 >>> ser.searchsorted(4) 

1212 3 

1213 

1214 >>> ser.searchsorted([0, 4]) 

1215 array([0, 3]) 

1216 

1217 >>> ser.searchsorted([1, 3], side='left') 

1218 array([0, 2]) 

1219 

1220 >>> ser.searchsorted([1, 3], side='right') 

1221 array([1, 3]) 

1222 

1223 >>> ser = pd.Series(pd.to_datetime(['3/11/2000', '3/12/2000', '3/13/2000'])) 

1224 >>> ser 

1225 0 2000-03-11 

1226 1 2000-03-12 

1227 2 2000-03-13 

1228 dtype: datetime64[ns] 

1229 

1230 >>> ser.searchsorted('3/14/2000') 

1231 3 

1232 

1233 >>> ser = pd.Categorical( 

1234 ... ['apple', 'bread', 'bread', 'cheese', 'milk'], ordered=True 

1235 ... ) 

1236 >>> ser 

1237 ['apple', 'bread', 'bread', 'cheese', 'milk'] 

1238 Categories (4, object): ['apple' < 'bread' < 'cheese' < 'milk'] 

1239 

1240 >>> ser.searchsorted('bread') 

1241 1 

1242 

1243 >>> ser.searchsorted(['bread'], side='right') 

1244 array([3]) 

1245 

1246 If the values are not monotonically sorted, wrong locations 

1247 may be returned: 

1248 

1249 >>> ser = pd.Series([2, 1, 3]) 

1250 >>> ser 

1251 0 2 

1252 1 1 

1253 2 3 

1254 dtype: int64 

1255 

1256 >>> ser.searchsorted(1) # doctest: +SKIP 

1257 0 # wrong result, correct would be 1 

1258 """ 

1259 

1260 # This overload is needed so that the call to searchsorted in 

1261 # pandas.core.resample.TimeGrouper._get_period_bins picks the correct result 

1262 

1263 @overload 

1264 # The following ignore is also present in numpy/__init__.pyi 

1265 # Possibly a mypy bug?? 

1266 # error: Overloaded function signatures 1 and 2 overlap with incompatible 

1267 # return types [misc] 

1268 def searchsorted( # type: ignore[misc] 

1269 self, 

1270 value: ScalarLike_co, 

1271 side: Literal["left", "right"] = ..., 

1272 sorter: NumpySorter = ..., 

1273 ) -> np.intp: 

1274 ... 

1275 

1276 @overload 

1277 def searchsorted( 

1278 self, 

1279 value: npt.ArrayLike | ExtensionArray, 

1280 side: Literal["left", "right"] = ..., 

1281 sorter: NumpySorter = ..., 

1282 ) -> npt.NDArray[np.intp]: 

1283 ... 

1284 

1285 @doc(_shared_docs["searchsorted"], klass="Index") 

1286 def searchsorted( 

1287 self, 

1288 value: NumpyValueArrayLike | ExtensionArray, 

1289 side: Literal["left", "right"] = "left", 

1290 sorter: NumpySorter = None, 

1291 ) -> npt.NDArray[np.intp] | np.intp: 

1292 

1293 values = self._values 

1294 if not isinstance(values, np.ndarray): 

1295 # Going through EA.searchsorted directly improves performance GH#38083 

1296 return values.searchsorted(value, side=side, sorter=sorter) 

1297 

1298 return algorithms.searchsorted( 

1299 values, 

1300 value, 

1301 side=side, 

1302 sorter=sorter, 

1303 ) 

1304 

1305 def drop_duplicates(self, keep="first"): 

1306 duplicated = self._duplicated(keep=keep) 

1307 # error: Value of type "IndexOpsMixin" is not indexable 

1308 return self[~duplicated] # type: ignore[index] 

1309 

1310 @final 

1311 def _duplicated( 

1312 self, keep: Literal["first", "last", False] = "first" 

1313 ) -> npt.NDArray[np.bool_]: 

1314 return duplicated(self._values, keep=keep) 

1315 

1316 def _arith_method(self, other, op): 

1317 res_name = ops.get_op_result_name(self, other) 

1318 

1319 lvalues = self._values 

1320 rvalues = extract_array(other, extract_numpy=True, extract_range=True) 

1321 rvalues = ops.maybe_prepare_scalar_for_op(rvalues, lvalues.shape) 

1322 rvalues = ensure_wrapped_if_datetimelike(rvalues) 

1323 

1324 with np.errstate(all="ignore"): 

1325 result = ops.arithmetic_op(lvalues, rvalues, op) 

1326 

1327 return self._construct_result(result, name=res_name) 

1328 

1329 def _construct_result(self, result, name): 

1330 """ 

1331 Construct an appropriately-wrapped result from the ArrayLike result 

1332 of an arithmetic-like operation. 

1333 """ 

1334 raise AbstractMethodError(self)