Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/base.py: 32%
303 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1"""
2Base and utility classes for pandas objects.
3"""
5from __future__ import annotations
7import textwrap
8from typing import (
9 TYPE_CHECKING,
10 Any,
11 Generic,
12 Hashable,
13 Literal,
14 TypeVar,
15 cast,
16 final,
17 overload,
18)
19import warnings
21import numpy as np
23import pandas._libs.lib as lib
24from pandas._typing import (
25 ArrayLike,
26 DtypeObj,
27 IndexLabel,
28 NDFrameT,
29 Shape,
30 npt,
31)
32from pandas.compat import PYPY
33from pandas.compat.numpy import function as nv
34from pandas.errors import AbstractMethodError
35from pandas.util._decorators import (
36 cache_readonly,
37 doc,
38)
39from pandas.util._exceptions import find_stack_level
41from pandas.core.dtypes.common import (
42 is_categorical_dtype,
43 is_dict_like,
44 is_extension_array_dtype,
45 is_object_dtype,
46 is_scalar,
47)
48from pandas.core.dtypes.generic import (
49 ABCDataFrame,
50 ABCIndex,
51 ABCSeries,
52)
53from pandas.core.dtypes.missing import (
54 isna,
55 remove_na_arraylike,
56)
58from pandas.core import (
59 algorithms,
60 nanops,
61 ops,
62)
63from pandas.core.accessor import DirNamesMixin
64from pandas.core.algorithms import (
65 duplicated,
66 unique1d,
67 value_counts,
68)
69from pandas.core.arraylike import OpsMixin
70from pandas.core.arrays import ExtensionArray
71from pandas.core.construction import (
72 create_series_with_explicit_dtype,
73 ensure_wrapped_if_datetimelike,
74 extract_array,
75)
77if TYPE_CHECKING: 77 ↛ 79line 77 didn't jump to line 79, because the condition on line 77 was never true
79 from pandas._typing import (
80 NumpySorter,
81 NumpyValueArrayLike,
82 ScalarLike_co,
83 )
85 from pandas import (
86 Categorical,
87 Series,
88 )
91_shared_docs: dict[str, str] = {}
92_indexops_doc_kwargs = {
93 "klass": "IndexOpsMixin",
94 "inplace": "",
95 "unique": "IndexOpsMixin",
96 "duplicated": "IndexOpsMixin",
97}
99_T = TypeVar("_T", bound="IndexOpsMixin")
102class PandasObject(DirNamesMixin):
103 """
104 Baseclass for various pandas objects.
105 """
107 # results from calls to methods decorated with cache_readonly get added to _cache
108 _cache: dict[str, Any]
110 @property
111 def _constructor(self):
112 """
113 Class constructor (for this class it's just `__class__`.
114 """
115 return type(self)
117 def __repr__(self) -> str:
118 """
119 Return a string representation for a particular object.
120 """
121 # Should be overwritten by base classes
122 return object.__repr__(self)
124 def _reset_cache(self, key: str | None = None) -> None:
125 """
126 Reset cached properties. If ``key`` is passed, only clears that key.
127 """
128 if not hasattr(self, "_cache"):
129 return
130 if key is None:
131 self._cache.clear()
132 else:
133 self._cache.pop(key, None)
135 def __sizeof__(self) -> int:
136 """
137 Generates the total memory usage for an object that returns
138 either a value or Series of values
139 """
140 memory_usage = getattr(self, "memory_usage", None)
141 if memory_usage:
142 mem = memory_usage(deep=True)
143 return int(mem if is_scalar(mem) else mem.sum())
145 # no memory_usage attribute, so fall back to object's 'sizeof'
146 return super().__sizeof__()
149class NoNewAttributesMixin:
150 """
151 Mixin which prevents adding new attributes.
153 Prevents additional attributes via xxx.attribute = "something" after a
154 call to `self.__freeze()`. Mainly used to prevent the user from using
155 wrong attributes on an accessor (`Series.cat/.str/.dt`).
157 If you really want to add a new attribute at a later time, you need to use
158 `object.__setattr__(self, key, value)`.
159 """
161 def _freeze(self):
162 """
163 Prevents setting additional attributes.
164 """
165 object.__setattr__(self, "__frozen", True)
167 # prevent adding any attribute via s.xxx.new_attribute = ...
168 def __setattr__(self, key: str, value) -> None:
169 # _cache is used by a decorator
170 # We need to check both 1.) cls.__dict__ and 2.) getattr(self, key)
171 # because
172 # 1.) getattr is false for attributes that raise errors
173 # 2.) cls.__dict__ doesn't traverse into base classes
174 if getattr(self, "__frozen", False) and not (
175 key == "_cache"
176 or key in type(self).__dict__
177 or getattr(self, key, None) is not None
178 ):
179 raise AttributeError(f"You cannot add any new attribute '{key}'")
180 object.__setattr__(self, key, value)
183class SelectionMixin(Generic[NDFrameT]):
184 """
185 mixin implementing the selection & aggregation interface on a group-like
186 object sub-classes need to define: obj, exclusions
187 """
189 obj: NDFrameT
190 _selection: IndexLabel | None = None
191 exclusions: frozenset[Hashable]
192 _internal_names = ["_cache", "__setstate__"]
193 _internal_names_set = set(_internal_names)
195 @final
196 @property
197 def _selection_list(self):
198 if not isinstance(
199 self._selection, (list, tuple, ABCSeries, ABCIndex, np.ndarray)
200 ):
201 return [self._selection]
202 return self._selection
204 @cache_readonly
205 def _selected_obj(self):
206 if self._selection is None or isinstance(self.obj, ABCSeries):
207 return self.obj
208 else:
209 return self.obj[self._selection]
211 @final
212 @cache_readonly
213 def ndim(self) -> int:
214 return self._selected_obj.ndim
216 @final
217 @cache_readonly
218 def _obj_with_exclusions(self):
219 if self._selection is not None and isinstance(self.obj, ABCDataFrame):
220 return self.obj[self._selection_list]
222 if len(self.exclusions) > 0:
223 # equivalent to `self.obj.drop(self.exclusions, axis=1)
224 # but this avoids consolidating and making a copy
225 # TODO: following GH#45287 can we now use .drop directly without
226 # making a copy?
227 return self.obj._drop_axis(self.exclusions, axis=1, only_slice=True)
228 else:
229 return self.obj
231 def __getitem__(self, key):
232 if self._selection is not None:
233 raise IndexError(f"Column(s) {self._selection} already selected")
235 if isinstance(key, (list, tuple, ABCSeries, ABCIndex, np.ndarray)):
236 if len(self.obj.columns.intersection(key)) != len(set(key)):
237 bad_keys = list(set(key).difference(self.obj.columns))
238 raise KeyError(f"Columns not found: {str(bad_keys)[1:-1]}")
239 return self._gotitem(list(key), ndim=2)
241 elif not getattr(self, "as_index", False):
242 if key not in self.obj.columns:
243 raise KeyError(f"Column not found: {key}")
244 return self._gotitem(key, ndim=2)
246 else:
247 if key not in self.obj:
248 raise KeyError(f"Column not found: {key}")
249 subset = self.obj[key]
250 ndim = subset.ndim
251 return self._gotitem(key, ndim=ndim, subset=subset)
253 def _gotitem(self, key, ndim: int, subset=None):
254 """
255 sub-classes to define
256 return a sliced object
258 Parameters
259 ----------
260 key : str / list of selections
261 ndim : {1, 2}
262 requested ndim of result
263 subset : object, default None
264 subset to act on
265 """
266 raise AbstractMethodError(self)
268 def aggregate(self, func, *args, **kwargs):
269 raise AbstractMethodError(self)
271 agg = aggregate
274class IndexOpsMixin(OpsMixin):
275 """
276 Common ops mixin to support a unified interface / docs for Series / Index
277 """
279 # ndarray compatibility
280 __array_priority__ = 1000
281 _hidden_attrs: frozenset[str] = frozenset(
282 ["tolist"] # tolist is not deprecated, just suppressed in the __dir__
283 )
285 @property
286 def dtype(self) -> DtypeObj:
287 # must be defined here as a property for mypy
288 raise AbstractMethodError(self)
290 @property
291 def _values(self) -> ExtensionArray | np.ndarray:
292 # must be defined here as a property for mypy
293 raise AbstractMethodError(self)
295 def transpose(self: _T, *args, **kwargs) -> _T:
296 """
297 Return the transpose, which is by definition self.
299 Returns
300 -------
301 %(klass)s
302 """
303 nv.validate_transpose(args, kwargs)
304 return self
306 T = property(
307 transpose,
308 doc="""
309 Return the transpose, which is by definition self.
310 """,
311 )
313 @property
314 def shape(self) -> Shape:
315 """
316 Return a tuple of the shape of the underlying data.
317 """
318 return self._values.shape
320 def __len__(self) -> int:
321 # We need this defined here for mypy
322 raise AbstractMethodError(self)
324 @property
325 def ndim(self) -> Literal[1]:
326 """
327 Number of dimensions of the underlying data, by definition 1.
328 """
329 return 1
331 def item(self):
332 """
333 Return the first element of the underlying data as a Python scalar.
335 Returns
336 -------
337 scalar
338 The first element of %(klass)s.
340 Raises
341 ------
342 ValueError
343 If the data is not length-1.
344 """
345 if len(self) == 1:
346 return next(iter(self))
347 raise ValueError("can only convert an array of size 1 to a Python scalar")
349 @property
350 def nbytes(self) -> int:
351 """
352 Return the number of bytes in the underlying data.
353 """
354 return self._values.nbytes
356 @property
357 def size(self) -> int:
358 """
359 Return the number of elements in the underlying data.
360 """
361 return len(self._values)
363 @property
364 def array(self) -> ExtensionArray:
365 """
366 The ExtensionArray of the data backing this Series or Index.
368 Returns
369 -------
370 ExtensionArray
371 An ExtensionArray of the values stored within. For extension
372 types, this is the actual array. For NumPy native types, this
373 is a thin (no copy) wrapper around :class:`numpy.ndarray`.
375 ``.array`` differs ``.values`` which may require converting the
376 data to a different form.
378 See Also
379 --------
380 Index.to_numpy : Similar method that always returns a NumPy array.
381 Series.to_numpy : Similar method that always returns a NumPy array.
383 Notes
384 -----
385 This table lays out the different array types for each extension
386 dtype within pandas.
388 ================== =============================
389 dtype array type
390 ================== =============================
391 category Categorical
392 period PeriodArray
393 interval IntervalArray
394 IntegerNA IntegerArray
395 string StringArray
396 boolean BooleanArray
397 datetime64[ns, tz] DatetimeArray
398 ================== =============================
400 For any 3rd-party extension types, the array type will be an
401 ExtensionArray.
403 For all remaining dtypes ``.array`` will be a
404 :class:`arrays.NumpyExtensionArray` wrapping the actual ndarray
405 stored within. If you absolutely need a NumPy array (possibly with
406 copying / coercing data), then use :meth:`Series.to_numpy` instead.
408 Examples
409 --------
410 For regular NumPy types like int, and float, a PandasArray
411 is returned.
413 >>> pd.Series([1, 2, 3]).array
414 <PandasArray>
415 [1, 2, 3]
416 Length: 3, dtype: int64
418 For extension types, like Categorical, the actual ExtensionArray
419 is returned
421 >>> ser = pd.Series(pd.Categorical(['a', 'b', 'a']))
422 >>> ser.array
423 ['a', 'b', 'a']
424 Categories (2, object): ['a', 'b']
425 """
426 raise AbstractMethodError(self)
428 def to_numpy(
429 self,
430 dtype: npt.DTypeLike | None = None,
431 copy: bool = False,
432 na_value: object = lib.no_default,
433 **kwargs,
434 ) -> np.ndarray:
435 """
436 A NumPy ndarray representing the values in this Series or Index.
438 Parameters
439 ----------
440 dtype : str or numpy.dtype, optional
441 The dtype to pass to :meth:`numpy.asarray`.
442 copy : bool, default False
443 Whether to ensure that the returned value is not a view on
444 another array. Note that ``copy=False`` does not *ensure* that
445 ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that
446 a copy is made, even if not strictly necessary.
447 na_value : Any, optional
448 The value to use for missing values. The default value depends
449 on `dtype` and the type of the array.
451 .. versionadded:: 1.0.0
453 **kwargs
454 Additional keywords passed through to the ``to_numpy`` method
455 of the underlying array (for extension arrays).
457 .. versionadded:: 1.0.0
459 Returns
460 -------
461 numpy.ndarray
463 See Also
464 --------
465 Series.array : Get the actual data stored within.
466 Index.array : Get the actual data stored within.
467 DataFrame.to_numpy : Similar method for DataFrame.
469 Notes
470 -----
471 The returned array will be the same up to equality (values equal
472 in `self` will be equal in the returned array; likewise for values
473 that are not equal). When `self` contains an ExtensionArray, the
474 dtype may be different. For example, for a category-dtype Series,
475 ``to_numpy()`` will return a NumPy array and the categorical dtype
476 will be lost.
478 For NumPy dtypes, this will be a reference to the actual data stored
479 in this Series or Index (assuming ``copy=False``). Modifying the result
480 in place will modify the data stored in the Series or Index (not that
481 we recommend doing that).
483 For extension types, ``to_numpy()`` *may* require copying data and
484 coercing the result to a NumPy type (possibly object), which may be
485 expensive. When you need a no-copy reference to the underlying data,
486 :attr:`Series.array` should be used instead.
488 This table lays out the different dtypes and default return types of
489 ``to_numpy()`` for various dtypes within pandas.
491 ================== ================================
492 dtype array type
493 ================== ================================
494 category[T] ndarray[T] (same dtype as input)
495 period ndarray[object] (Periods)
496 interval ndarray[object] (Intervals)
497 IntegerNA ndarray[object]
498 datetime64[ns] datetime64[ns]
499 datetime64[ns, tz] ndarray[object] (Timestamps)
500 ================== ================================
502 Examples
503 --------
504 >>> ser = pd.Series(pd.Categorical(['a', 'b', 'a']))
505 >>> ser.to_numpy()
506 array(['a', 'b', 'a'], dtype=object)
508 Specify the `dtype` to control how datetime-aware data is represented.
509 Use ``dtype=object`` to return an ndarray of pandas :class:`Timestamp`
510 objects, each with the correct ``tz``.
512 >>> ser = pd.Series(pd.date_range('2000', periods=2, tz="CET"))
513 >>> ser.to_numpy(dtype=object)
514 array([Timestamp('2000-01-01 00:00:00+0100', tz='CET'),
515 Timestamp('2000-01-02 00:00:00+0100', tz='CET')],
516 dtype=object)
518 Or ``dtype='datetime64[ns]'`` to return an ndarray of native
519 datetime64 values. The values are converted to UTC and the timezone
520 info is dropped.
522 >>> ser.to_numpy(dtype="datetime64[ns]")
523 ... # doctest: +ELLIPSIS
524 array(['1999-12-31T23:00:00.000000000', '2000-01-01T23:00:00...'],
525 dtype='datetime64[ns]')
526 """
527 if is_extension_array_dtype(self.dtype):
528 return self.array.to_numpy(dtype, copy=copy, na_value=na_value, **kwargs)
529 elif kwargs:
530 bad_keys = list(kwargs.keys())[0]
531 raise TypeError(
532 f"to_numpy() got an unexpected keyword argument '{bad_keys}'"
533 )
535 result = np.asarray(self._values, dtype=dtype)
536 # TODO(GH-24345): Avoid potential double copy
537 if copy or na_value is not lib.no_default:
538 result = result.copy()
539 if na_value is not lib.no_default:
540 result[np.asanyarray(self.isna())] = na_value
541 return result
543 @property
544 def empty(self) -> bool:
545 return not self.size
547 def max(self, axis=None, skipna: bool = True, *args, **kwargs):
548 """
549 Return the maximum value of the Index.
551 Parameters
552 ----------
553 axis : int, optional
554 For compatibility with NumPy. Only 0 or None are allowed.
555 skipna : bool, default True
556 Exclude NA/null values when showing the result.
557 *args, **kwargs
558 Additional arguments and keywords for compatibility with NumPy.
560 Returns
561 -------
562 scalar
563 Maximum value.
565 See Also
566 --------
567 Index.min : Return the minimum value in an Index.
568 Series.max : Return the maximum value in a Series.
569 DataFrame.max : Return the maximum values in a DataFrame.
571 Examples
572 --------
573 >>> idx = pd.Index([3, 2, 1])
574 >>> idx.max()
575 3
577 >>> idx = pd.Index(['c', 'b', 'a'])
578 >>> idx.max()
579 'c'
581 For a MultiIndex, the maximum is determined lexicographically.
583 >>> idx = pd.MultiIndex.from_product([('a', 'b'), (2, 1)])
584 >>> idx.max()
585 ('b', 2)
586 """
587 nv.validate_minmax_axis(axis)
588 nv.validate_max(args, kwargs)
589 return nanops.nanmax(self._values, skipna=skipna)
591 @doc(op="max", oppose="min", value="largest")
592 def argmax(self, axis=None, skipna: bool = True, *args, **kwargs) -> int:
593 """
594 Return int position of the {value} value in the Series.
596 If the {op}imum is achieved in multiple locations,
597 the first row position is returned.
599 Parameters
600 ----------
601 axis : {{None}}
602 Unused. Parameter needed for compatibility with DataFrame.
603 skipna : bool, default True
604 Exclude NA/null values when showing the result.
605 *args, **kwargs
606 Additional arguments and keywords for compatibility with NumPy.
608 Returns
609 -------
610 int
611 Row position of the {op}imum value.
613 See Also
614 --------
615 Series.arg{op} : Return position of the {op}imum value.
616 Series.arg{oppose} : Return position of the {oppose}imum value.
617 numpy.ndarray.arg{op} : Equivalent method for numpy arrays.
618 Series.idxmax : Return index label of the maximum values.
619 Series.idxmin : Return index label of the minimum values.
621 Examples
622 --------
623 Consider dataset containing cereal calories
625 >>> s = pd.Series({{'Corn Flakes': 100.0, 'Almond Delight': 110.0,
626 ... 'Cinnamon Toast Crunch': 120.0, 'Cocoa Puff': 110.0}})
627 >>> s
628 Corn Flakes 100.0
629 Almond Delight 110.0
630 Cinnamon Toast Crunch 120.0
631 Cocoa Puff 110.0
632 dtype: float64
634 >>> s.argmax()
635 2
636 >>> s.argmin()
637 0
639 The maximum cereal calories is the third element and
640 the minimum cereal calories is the first element,
641 since series is zero-indexed.
642 """
643 delegate = self._values
644 nv.validate_minmax_axis(axis)
645 skipna = nv.validate_argmax_with_skipna(skipna, args, kwargs)
647 if isinstance(delegate, ExtensionArray):
648 if not skipna and delegate.isna().any():
649 return -1
650 else:
651 return delegate.argmax()
652 else:
653 # error: Incompatible return value type (got "Union[int, ndarray]", expected
654 # "int")
655 return nanops.nanargmax( # type: ignore[return-value]
656 delegate, skipna=skipna
657 )
659 def min(self, axis=None, skipna: bool = True, *args, **kwargs):
660 """
661 Return the minimum value of the Index.
663 Parameters
664 ----------
665 axis : {None}
666 Dummy argument for consistency with Series.
667 skipna : bool, default True
668 Exclude NA/null values when showing the result.
669 *args, **kwargs
670 Additional arguments and keywords for compatibility with NumPy.
672 Returns
673 -------
674 scalar
675 Minimum value.
677 See Also
678 --------
679 Index.max : Return the maximum value of the object.
680 Series.min : Return the minimum value in a Series.
681 DataFrame.min : Return the minimum values in a DataFrame.
683 Examples
684 --------
685 >>> idx = pd.Index([3, 2, 1])
686 >>> idx.min()
687 1
689 >>> idx = pd.Index(['c', 'b', 'a'])
690 >>> idx.min()
691 'a'
693 For a MultiIndex, the minimum is determined lexicographically.
695 >>> idx = pd.MultiIndex.from_product([('a', 'b'), (2, 1)])
696 >>> idx.min()
697 ('a', 1)
698 """
699 nv.validate_minmax_axis(axis)
700 nv.validate_min(args, kwargs)
701 return nanops.nanmin(self._values, skipna=skipna)
703 @doc(argmax, op="min", oppose="max", value="smallest")
704 def argmin(self, axis=None, skipna=True, *args, **kwargs) -> int:
705 delegate = self._values
706 nv.validate_minmax_axis(axis)
707 skipna = nv.validate_argmin_with_skipna(skipna, args, kwargs)
709 if isinstance(delegate, ExtensionArray):
710 if not skipna and delegate.isna().any():
711 return -1
712 else:
713 return delegate.argmin()
714 else:
715 # error: Incompatible return value type (got "Union[int, ndarray]", expected
716 # "int")
717 return nanops.nanargmin( # type: ignore[return-value]
718 delegate, skipna=skipna
719 )
721 def tolist(self):
722 """
723 Return a list of the values.
725 These are each a scalar type, which is a Python scalar
726 (for str, int, float) or a pandas scalar
727 (for Timestamp/Timedelta/Interval/Period)
729 Returns
730 -------
731 list
733 See Also
734 --------
735 numpy.ndarray.tolist : Return the array as an a.ndim-levels deep
736 nested list of Python scalars.
737 """
738 return self._values.tolist()
740 to_list = tolist
742 def __iter__(self):
743 """
744 Return an iterator of the values.
746 These are each a scalar type, which is a Python scalar
747 (for str, int, float) or a pandas scalar
748 (for Timestamp/Timedelta/Interval/Period)
750 Returns
751 -------
752 iterator
753 """
754 # We are explicitly making element iterators.
755 if not isinstance(self._values, np.ndarray):
756 # Check type instead of dtype to catch DTA/TDA
757 return iter(self._values)
758 else:
759 return map(self._values.item, range(self._values.size))
761 @cache_readonly
762 def hasnans(self) -> bool:
763 """
764 Return True if there are any NaNs.
766 Enables various performance speedups.
767 """
768 # error: Item "bool" of "Union[bool, ndarray[Any, dtype[bool_]], NDFrame]"
769 # has no attribute "any"
770 return bool(isna(self).any()) # type: ignore[union-attr]
772 def isna(self) -> npt.NDArray[np.bool_]:
773 return isna(self._values)
775 def _reduce(
776 self,
777 op,
778 name: str,
779 *,
780 axis=0,
781 skipna=True,
782 numeric_only=None,
783 filter_type=None,
784 **kwds,
785 ):
786 """
787 Perform the reduction type operation if we can.
788 """
789 func = getattr(self, name, None)
790 if func is None:
791 raise TypeError(
792 f"{type(self).__name__} cannot perform the operation {name}"
793 )
794 return func(skipna=skipna, **kwds)
796 @final
797 def _map_values(self, mapper, na_action=None):
798 """
799 An internal function that maps values using the input
800 correspondence (which can be a dict, Series, or function).
802 Parameters
803 ----------
804 mapper : function, dict, or Series
805 The input correspondence object
806 na_action : {None, 'ignore'}
807 If 'ignore', propagate NA values, without passing them to the
808 mapping function
810 Returns
811 -------
812 Union[Index, MultiIndex], inferred
813 The output of the mapping function applied to the index.
814 If the function returns a tuple with more than one element
815 a MultiIndex will be returned.
816 """
817 # we can fastpath dict/Series to an efficient map
818 # as we know that we are not going to have to yield
819 # python types
820 if is_dict_like(mapper):
821 if isinstance(mapper, dict) and hasattr(mapper, "__missing__"):
822 # If a dictionary subclass defines a default value method,
823 # convert mapper to a lookup function (GH #15999).
824 dict_with_default = mapper
825 mapper = lambda x: dict_with_default[x]
826 else:
827 # Dictionary does not have a default. Thus it's safe to
828 # convert to an Series for efficiency.
829 # we specify the keys here to handle the
830 # possibility that they are tuples
832 # The return value of mapping with an empty mapper is
833 # expected to be pd.Series(np.nan, ...). As np.nan is
834 # of dtype float64 the return value of this method should
835 # be float64 as well
836 mapper = create_series_with_explicit_dtype(
837 mapper, dtype_if_empty=np.float64
838 )
840 if isinstance(mapper, ABCSeries):
841 if na_action not in (None, "ignore"):
842 msg = (
843 "na_action must either be 'ignore' or None, "
844 f"{na_action} was passed"
845 )
846 raise ValueError(msg)
848 if na_action == "ignore":
849 mapper = mapper[mapper.index.notna()]
851 # Since values were input this means we came from either
852 # a dict or a series and mapper should be an index
853 if is_categorical_dtype(self.dtype):
854 # use the built in categorical series mapper which saves
855 # time by mapping the categories instead of all values
857 cat = cast("Categorical", self._values)
858 return cat.map(mapper)
860 values = self._values
862 indexer = mapper.index.get_indexer(values)
863 new_values = algorithms.take_nd(mapper._values, indexer)
865 return new_values
867 # we must convert to python types
868 if is_extension_array_dtype(self.dtype) and hasattr(self._values, "map"):
869 # GH#23179 some EAs do not have `map`
870 values = self._values
871 if na_action is not None:
872 raise NotImplementedError
873 map_f = lambda values, f: values.map(f)
874 else:
875 values = self._values.astype(object)
876 if na_action == "ignore":
877 map_f = lambda values, f: lib.map_infer_mask(
878 values, f, isna(values).view(np.uint8)
879 )
880 elif na_action is None:
881 map_f = lib.map_infer
882 else:
883 msg = (
884 "na_action must either be 'ignore' or None, "
885 f"{na_action} was passed"
886 )
887 raise ValueError(msg)
889 # mapper is a function
890 new_values = map_f(values, mapper)
892 return new_values
894 def value_counts(
895 self,
896 normalize: bool = False,
897 sort: bool = True,
898 ascending: bool = False,
899 bins=None,
900 dropna: bool = True,
901 ) -> Series:
902 """
903 Return a Series containing counts of unique values.
905 The resulting object will be in descending order so that the
906 first element is the most frequently-occurring element.
907 Excludes NA values by default.
909 Parameters
910 ----------
911 normalize : bool, default False
912 If True then the object returned will contain the relative
913 frequencies of the unique values.
914 sort : bool, default True
915 Sort by frequencies.
916 ascending : bool, default False
917 Sort in ascending order.
918 bins : int, optional
919 Rather than count values, group them into half-open bins,
920 a convenience for ``pd.cut``, only works with numeric data.
921 dropna : bool, default True
922 Don't include counts of NaN.
924 Returns
925 -------
926 Series
928 See Also
929 --------
930 Series.count: Number of non-NA elements in a Series.
931 DataFrame.count: Number of non-NA elements in a DataFrame.
932 DataFrame.value_counts: Equivalent method on DataFrames.
934 Examples
935 --------
936 >>> index = pd.Index([3, 1, 2, 3, 4, np.nan])
937 >>> index.value_counts()
938 3.0 2
939 1.0 1
940 2.0 1
941 4.0 1
942 dtype: int64
944 With `normalize` set to `True`, returns the relative frequency by
945 dividing all values by the sum of values.
947 >>> s = pd.Series([3, 1, 2, 3, 4, np.nan])
948 >>> s.value_counts(normalize=True)
949 3.0 0.4
950 1.0 0.2
951 2.0 0.2
952 4.0 0.2
953 dtype: float64
955 **bins**
957 Bins can be useful for going from a continuous variable to a
958 categorical variable; instead of counting unique
959 apparitions of values, divide the index in the specified
960 number of half-open bins.
962 >>> s.value_counts(bins=3)
963 (0.996, 2.0] 2
964 (2.0, 3.0] 2
965 (3.0, 4.0] 1
966 dtype: int64
968 **dropna**
970 With `dropna` set to `False` we can also see NaN index values.
972 >>> s.value_counts(dropna=False)
973 3.0 2
974 1.0 1
975 2.0 1
976 4.0 1
977 NaN 1
978 dtype: int64
979 """
980 return value_counts(
981 self,
982 sort=sort,
983 ascending=ascending,
984 normalize=normalize,
985 bins=bins,
986 dropna=dropna,
987 )
989 def unique(self):
990 values = self._values
992 if not isinstance(values, np.ndarray):
993 result: ArrayLike = values.unique()
994 if (
995 isinstance(self.dtype, np.dtype) and self.dtype.kind in ["m", "M"]
996 ) and isinstance(self, ABCSeries):
997 # GH#31182 Series._values returns EA
998 # unpack numpy datetime for backward-compat
999 result = np.asarray(result)
1000 else:
1001 result = unique1d(values)
1003 return result
1005 def nunique(self, dropna: bool = True) -> int:
1006 """
1007 Return number of unique elements in the object.
1009 Excludes NA values by default.
1011 Parameters
1012 ----------
1013 dropna : bool, default True
1014 Don't include NaN in the count.
1016 Returns
1017 -------
1018 int
1020 See Also
1021 --------
1022 DataFrame.nunique: Method nunique for DataFrame.
1023 Series.count: Count non-NA/null observations in the Series.
1025 Examples
1026 --------
1027 >>> s = pd.Series([1, 3, 5, 7, 7])
1028 >>> s
1029 0 1
1030 1 3
1031 2 5
1032 3 7
1033 4 7
1034 dtype: int64
1036 >>> s.nunique()
1037 4
1038 """
1039 uniqs = self.unique()
1040 if dropna:
1041 uniqs = remove_na_arraylike(uniqs)
1042 return len(uniqs)
1044 @property
1045 def is_unique(self) -> bool:
1046 """
1047 Return boolean if values in the object are unique.
1049 Returns
1050 -------
1051 bool
1052 """
1053 return self.nunique(dropna=False) == len(self)
1055 @property
1056 def is_monotonic(self) -> bool:
1057 """
1058 Return boolean if values in the object are monotonically increasing.
1060 .. deprecated:: 1.5.0
1061 is_monotonic is deprecated and will be removed in a future version.
1062 Use is_monotonic_increasing instead.
1064 Returns
1065 -------
1066 bool
1067 """
1068 warnings.warn(
1069 "is_monotonic is deprecated and will be removed in a future version. "
1070 "Use is_monotonic_increasing instead.",
1071 FutureWarning,
1072 stacklevel=find_stack_level(),
1073 )
1074 return self.is_monotonic_increasing
1076 @property
1077 def is_monotonic_increasing(self) -> bool:
1078 """
1079 Return boolean if values in the object are monotonically increasing.
1081 Returns
1082 -------
1083 bool
1084 """
1085 from pandas import Index
1087 return Index(self).is_monotonic_increasing
1089 @property
1090 def is_monotonic_decreasing(self) -> bool:
1091 """
1092 Return boolean if values in the object are monotonically decreasing.
1094 Returns
1095 -------
1096 bool
1097 """
1098 from pandas import Index
1100 return Index(self).is_monotonic_decreasing
1102 def _memory_usage(self, deep: bool = False) -> int:
1103 """
1104 Memory usage of the values.
1106 Parameters
1107 ----------
1108 deep : bool, default False
1109 Introspect the data deeply, interrogate
1110 `object` dtypes for system-level memory consumption.
1112 Returns
1113 -------
1114 bytes used
1116 See Also
1117 --------
1118 numpy.ndarray.nbytes : Total bytes consumed by the elements of the
1119 array.
1121 Notes
1122 -----
1123 Memory usage does not include memory consumed by elements that
1124 are not components of the array if deep=False or if used on PyPy
1125 """
1126 if hasattr(self.array, "memory_usage"):
1127 # https://github.com/python/mypy/issues/1424
1128 # error: "ExtensionArray" has no attribute "memory_usage"
1129 return self.array.memory_usage(deep=deep) # type: ignore[attr-defined]
1131 v = self.array.nbytes
1132 if deep and is_object_dtype(self) and not PYPY:
1133 values = cast(np.ndarray, self._values)
1134 v += lib.memory_usage_of_objects(values)
1135 return v
1137 @doc(
1138 algorithms.factorize,
1139 values="",
1140 order="",
1141 size_hint="",
1142 sort=textwrap.dedent(
1143 """\
1144 sort : bool, default False
1145 Sort `uniques` and shuffle `codes` to maintain the
1146 relationship.
1147 """
1148 ),
1149 )
1150 def factorize(
1151 self,
1152 sort: bool = False,
1153 na_sentinel: int | lib.NoDefault = lib.no_default,
1154 use_na_sentinel: bool | lib.NoDefault = lib.no_default,
1155 ):
1156 return algorithms.factorize(
1157 self, sort=sort, na_sentinel=na_sentinel, use_na_sentinel=use_na_sentinel
1158 )
1160 _shared_docs[
1161 "searchsorted"
1162 ] = """
1163 Find indices where elements should be inserted to maintain order.
1165 Find the indices into a sorted {klass} `self` such that, if the
1166 corresponding elements in `value` were inserted before the indices,
1167 the order of `self` would be preserved.
1169 .. note::
1171 The {klass} *must* be monotonically sorted, otherwise
1172 wrong locations will likely be returned. Pandas does *not*
1173 check this for you.
1175 Parameters
1176 ----------
1177 value : array-like or scalar
1178 Values to insert into `self`.
1179 side : {{'left', 'right'}}, optional
1180 If 'left', the index of the first suitable location found is given.
1181 If 'right', return the last such index. If there is no suitable
1182 index, return either 0 or N (where N is the length of `self`).
1183 sorter : 1-D array-like, optional
1184 Optional array of integer indices that sort `self` into ascending
1185 order. They are typically the result of ``np.argsort``.
1187 Returns
1188 -------
1189 int or array of int
1190 A scalar or array of insertion points with the
1191 same shape as `value`.
1193 See Also
1194 --------
1195 sort_values : Sort by the values along either axis.
1196 numpy.searchsorted : Similar method from NumPy.
1198 Notes
1199 -----
1200 Binary search is used to find the required insertion points.
1202 Examples
1203 --------
1204 >>> ser = pd.Series([1, 2, 3])
1205 >>> ser
1206 0 1
1207 1 2
1208 2 3
1209 dtype: int64
1211 >>> ser.searchsorted(4)
1212 3
1214 >>> ser.searchsorted([0, 4])
1215 array([0, 3])
1217 >>> ser.searchsorted([1, 3], side='left')
1218 array([0, 2])
1220 >>> ser.searchsorted([1, 3], side='right')
1221 array([1, 3])
1223 >>> ser = pd.Series(pd.to_datetime(['3/11/2000', '3/12/2000', '3/13/2000']))
1224 >>> ser
1225 0 2000-03-11
1226 1 2000-03-12
1227 2 2000-03-13
1228 dtype: datetime64[ns]
1230 >>> ser.searchsorted('3/14/2000')
1231 3
1233 >>> ser = pd.Categorical(
1234 ... ['apple', 'bread', 'bread', 'cheese', 'milk'], ordered=True
1235 ... )
1236 >>> ser
1237 ['apple', 'bread', 'bread', 'cheese', 'milk']
1238 Categories (4, object): ['apple' < 'bread' < 'cheese' < 'milk']
1240 >>> ser.searchsorted('bread')
1241 1
1243 >>> ser.searchsorted(['bread'], side='right')
1244 array([3])
1246 If the values are not monotonically sorted, wrong locations
1247 may be returned:
1249 >>> ser = pd.Series([2, 1, 3])
1250 >>> ser
1251 0 2
1252 1 1
1253 2 3
1254 dtype: int64
1256 >>> ser.searchsorted(1) # doctest: +SKIP
1257 0 # wrong result, correct would be 1
1258 """
1260 # This overload is needed so that the call to searchsorted in
1261 # pandas.core.resample.TimeGrouper._get_period_bins picks the correct result
1263 @overload
1264 # The following ignore is also present in numpy/__init__.pyi
1265 # Possibly a mypy bug??
1266 # error: Overloaded function signatures 1 and 2 overlap with incompatible
1267 # return types [misc]
1268 def searchsorted( # type: ignore[misc]
1269 self,
1270 value: ScalarLike_co,
1271 side: Literal["left", "right"] = ...,
1272 sorter: NumpySorter = ...,
1273 ) -> np.intp:
1274 ...
1276 @overload
1277 def searchsorted(
1278 self,
1279 value: npt.ArrayLike | ExtensionArray,
1280 side: Literal["left", "right"] = ...,
1281 sorter: NumpySorter = ...,
1282 ) -> npt.NDArray[np.intp]:
1283 ...
1285 @doc(_shared_docs["searchsorted"], klass="Index")
1286 def searchsorted(
1287 self,
1288 value: NumpyValueArrayLike | ExtensionArray,
1289 side: Literal["left", "right"] = "left",
1290 sorter: NumpySorter = None,
1291 ) -> npt.NDArray[np.intp] | np.intp:
1293 values = self._values
1294 if not isinstance(values, np.ndarray):
1295 # Going through EA.searchsorted directly improves performance GH#38083
1296 return values.searchsorted(value, side=side, sorter=sorter)
1298 return algorithms.searchsorted(
1299 values,
1300 value,
1301 side=side,
1302 sorter=sorter,
1303 )
1305 def drop_duplicates(self, keep="first"):
1306 duplicated = self._duplicated(keep=keep)
1307 # error: Value of type "IndexOpsMixin" is not indexable
1308 return self[~duplicated] # type: ignore[index]
1310 @final
1311 def _duplicated(
1312 self, keep: Literal["first", "last", False] = "first"
1313 ) -> npt.NDArray[np.bool_]:
1314 return duplicated(self._values, keep=keep)
1316 def _arith_method(self, other, op):
1317 res_name = ops.get_op_result_name(self, other)
1319 lvalues = self._values
1320 rvalues = extract_array(other, extract_numpy=True, extract_range=True)
1321 rvalues = ops.maybe_prepare_scalar_for_op(rvalues, lvalues.shape)
1322 rvalues = ensure_wrapped_if_datetimelike(rvalues)
1324 with np.errstate(all="ignore"):
1325 result = ops.arithmetic_op(lvalues, rvalues, op)
1327 return self._construct_result(result, name=res_name)
1329 def _construct_result(self, result, name):
1330 """
1331 Construct an appropriately-wrapped result from the ArrayLike result
1332 of an arithmetic-like operation.
1333 """
1334 raise AbstractMethodError(self)