Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/dtypes/base.py: 51%
136 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1"""
2Extend pandas with custom array types.
3"""
4from __future__ import annotations
6from typing import (
7 TYPE_CHECKING,
8 Any,
9 TypeVar,
10 cast,
11 overload,
12)
14import numpy as np
16from pandas._libs import missing as libmissing
17from pandas._libs.hashtable import object_hash
18from pandas._typing import (
19 DtypeObj,
20 Shape,
21 npt,
22 type_t,
23)
24from pandas.errors import AbstractMethodError
26from pandas.core.dtypes.generic import (
27 ABCDataFrame,
28 ABCIndex,
29 ABCSeries,
30)
32if TYPE_CHECKING: 32 ↛ 33line 32 didn't jump to line 33, because the condition on line 32 was never true
33 from pandas.core.arrays import ExtensionArray
35 # To parameterize on same ExtensionDtype
36 ExtensionDtypeT = TypeVar("ExtensionDtypeT", bound="ExtensionDtype")
39class ExtensionDtype:
40 """
41 A custom data type, to be paired with an ExtensionArray.
43 See Also
44 --------
45 extensions.register_extension_dtype: Register an ExtensionType
46 with pandas as class decorator.
47 extensions.ExtensionArray: Abstract base class for custom 1-D array types.
49 Notes
50 -----
51 The interface includes the following abstract methods that must
52 be implemented by subclasses:
54 * type
55 * name
56 * construct_array_type
58 The following attributes and methods influence the behavior of the dtype in
59 pandas operations
61 * _is_numeric
62 * _is_boolean
63 * _get_common_dtype
65 The `na_value` class attribute can be used to set the default NA value
66 for this type. :attr:`numpy.nan` is used by default.
68 ExtensionDtypes are required to be hashable. The base class provides
69 a default implementation, which relies on the ``_metadata`` class
70 attribute. ``_metadata`` should be a tuple containing the strings
71 that define your data type. For example, with ``PeriodDtype`` that's
72 the ``freq`` attribute.
74 **If you have a parametrized dtype you should set the ``_metadata``
75 class property**.
77 Ideally, the attributes in ``_metadata`` will match the
78 parameters to your ``ExtensionDtype.__init__`` (if any). If any of
79 the attributes in ``_metadata`` don't implement the standard
80 ``__eq__`` or ``__hash__``, the default implementations here will not
81 work.
83 For interaction with Apache Arrow (pyarrow), a ``__from_arrow__`` method
84 can be implemented: this method receives a pyarrow Array or ChunkedArray
85 as only argument and is expected to return the appropriate pandas
86 ExtensionArray for this dtype and the passed values::
88 class ExtensionDtype:
90 def __from_arrow__(
91 self, array: Union[pyarrow.Array, pyarrow.ChunkedArray]
92 ) -> ExtensionArray:
93 ...
95 This class does not inherit from 'abc.ABCMeta' for performance reasons.
96 Methods and properties required by the interface raise
97 ``pandas.errors.AbstractMethodError`` and no ``register`` method is
98 provided for registering virtual subclasses.
99 """
101 _metadata: tuple[str, ...] = ()
103 def __str__(self) -> str:
104 return self.name
106 def __eq__(self, other: Any) -> bool:
107 """
108 Check whether 'other' is equal to self.
110 By default, 'other' is considered equal if either
112 * it's a string matching 'self.name'.
113 * it's an instance of this type and all of the attributes
114 in ``self._metadata`` are equal between `self` and `other`.
116 Parameters
117 ----------
118 other : Any
120 Returns
121 -------
122 bool
123 """
124 if isinstance(other, str):
125 try:
126 other = self.construct_from_string(other)
127 except TypeError:
128 return False
129 if isinstance(other, type(self)):
130 return all(
131 getattr(self, attr) == getattr(other, attr) for attr in self._metadata
132 )
133 return False
135 def __hash__(self) -> int:
136 # for python>=3.10, different nan objects have different hashes
137 # we need to avoid that und thus use hash function with old behavior
138 return object_hash(tuple(getattr(self, attr) for attr in self._metadata))
140 def __ne__(self, other: Any) -> bool:
141 return not self.__eq__(other)
143 @property
144 def na_value(self) -> object:
145 """
146 Default NA value to use for this type.
148 This is used in e.g. ExtensionArray.take. This should be the
149 user-facing "boxed" version of the NA value, not the physical NA value
150 for storage. e.g. for JSONArray, this is an empty dictionary.
151 """
152 return np.nan
154 @property
155 def type(self) -> type_t[Any]:
156 """
157 The scalar type for the array, e.g. ``int``
159 It's expected ``ExtensionArray[item]`` returns an instance
160 of ``ExtensionDtype.type`` for scalar ``item``, assuming
161 that value is valid (not NA). NA values do not need to be
162 instances of `type`.
163 """
164 raise AbstractMethodError(self)
166 @property
167 def kind(self) -> str:
168 """
169 A character code (one of 'biufcmMOSUV'), default 'O'
171 This should match the NumPy dtype used when the array is
172 converted to an ndarray, which is probably 'O' for object if
173 the extension type cannot be represented as a built-in NumPy
174 type.
176 See Also
177 --------
178 numpy.dtype.kind
179 """
180 return "O"
182 @property
183 def name(self) -> str:
184 """
185 A string identifying the data type.
187 Will be used for display in, e.g. ``Series.dtype``
188 """
189 raise AbstractMethodError(self)
191 @property
192 def names(self) -> list[str] | None:
193 """
194 Ordered list of field names, or None if there are no fields.
196 This is for compatibility with NumPy arrays, and may be removed in the
197 future.
198 """
199 return None
201 @classmethod
202 def construct_array_type(cls) -> type_t[ExtensionArray]:
203 """
204 Return the array type associated with this dtype.
206 Returns
207 -------
208 type
209 """
210 raise AbstractMethodError(cls)
212 def empty(self, shape: Shape) -> type_t[ExtensionArray]:
213 """
214 Construct an ExtensionArray of this dtype with the given shape.
216 Analogous to numpy.empty.
218 Parameters
219 ----------
220 shape : int or tuple[int]
222 Returns
223 -------
224 ExtensionArray
225 """
226 cls = self.construct_array_type()
227 return cls._empty(shape, dtype=self)
229 @classmethod
230 def construct_from_string(
231 cls: type_t[ExtensionDtypeT], string: str
232 ) -> ExtensionDtypeT:
233 r"""
234 Construct this type from a string.
236 This is useful mainly for data types that accept parameters.
237 For example, a period dtype accepts a frequency parameter that
238 can be set as ``period[H]`` (where H means hourly frequency).
240 By default, in the abstract class, just the name of the type is
241 expected. But subclasses can overwrite this method to accept
242 parameters.
244 Parameters
245 ----------
246 string : str
247 The name of the type, for example ``category``.
249 Returns
250 -------
251 ExtensionDtype
252 Instance of the dtype.
254 Raises
255 ------
256 TypeError
257 If a class cannot be constructed from this 'string'.
259 Examples
260 --------
261 For extension dtypes with arguments the following may be an
262 adequate implementation.
264 >>> @classmethod
265 ... def construct_from_string(cls, string):
266 ... pattern = re.compile(r"^my_type\[(?P<arg_name>.+)\]$")
267 ... match = pattern.match(string)
268 ... if match:
269 ... return cls(**match.groupdict())
270 ... else:
271 ... raise TypeError(
272 ... f"Cannot construct a '{cls.__name__}' from '{string}'"
273 ... )
274 """
275 if not isinstance(string, str):
276 raise TypeError(
277 f"'construct_from_string' expects a string, got {type(string)}"
278 )
279 # error: Non-overlapping equality check (left operand type: "str", right
280 # operand type: "Callable[[ExtensionDtype], str]") [comparison-overlap]
281 assert isinstance(cls.name, str), (cls, type(cls.name))
282 if string != cls.name:
283 raise TypeError(f"Cannot construct a '{cls.__name__}' from '{string}'")
284 return cls()
286 @classmethod
287 def is_dtype(cls, dtype: object) -> bool:
288 """
289 Check if we match 'dtype'.
291 Parameters
292 ----------
293 dtype : object
294 The object to check.
296 Returns
297 -------
298 bool
300 Notes
301 -----
302 The default implementation is True if
304 1. ``cls.construct_from_string(dtype)`` is an instance
305 of ``cls``.
306 2. ``dtype`` is an object and is an instance of ``cls``
307 3. ``dtype`` has a ``dtype`` attribute, and any of the above
308 conditions is true for ``dtype.dtype``.
309 """
310 dtype = getattr(dtype, "dtype", dtype)
312 if isinstance(dtype, (ABCSeries, ABCIndex, ABCDataFrame, np.dtype)):
313 # https://github.com/pandas-dev/pandas/issues/22960
314 # avoid passing data to `construct_from_string`. This could
315 # cause a FutureWarning from numpy about failing elementwise
316 # comparison from, e.g., comparing DataFrame == 'category'.
317 return False
318 elif dtype is None:
319 return False
320 elif isinstance(dtype, cls):
321 return True
322 if isinstance(dtype, str):
323 try:
324 return cls.construct_from_string(dtype) is not None
325 except TypeError:
326 return False
327 return False
329 @property
330 def _is_numeric(self) -> bool:
331 """
332 Whether columns with this dtype should be considered numeric.
334 By default ExtensionDtypes are assumed to be non-numeric.
335 They'll be excluded from operations that exclude non-numeric
336 columns, like (groupby) reductions, plotting, etc.
337 """
338 return False
340 @property
341 def _is_boolean(self) -> bool:
342 """
343 Whether this dtype should be considered boolean.
345 By default, ExtensionDtypes are assumed to be non-numeric.
346 Setting this to True will affect the behavior of several places,
347 e.g.
349 * is_bool
350 * boolean indexing
352 Returns
353 -------
354 bool
355 """
356 return False
358 def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
359 """
360 Return the common dtype, if one exists.
362 Used in `find_common_type` implementation. This is for example used
363 to determine the resulting dtype in a concat operation.
365 If no common dtype exists, return None (which gives the other dtypes
366 the chance to determine a common dtype). If all dtypes in the list
367 return None, then the common dtype will be "object" dtype (this means
368 it is never needed to return "object" dtype from this method itself).
370 Parameters
371 ----------
372 dtypes : list of dtypes
373 The dtypes for which to determine a common dtype. This is a list
374 of np.dtype or ExtensionDtype instances.
376 Returns
377 -------
378 Common dtype (np.dtype or ExtensionDtype) or None
379 """
380 if len(set(dtypes)) == 1:
381 # only itself
382 return self
383 else:
384 return None
386 @property
387 def _can_hold_na(self) -> bool:
388 """
389 Can arrays of this dtype hold NA values?
390 """
391 return True
394class StorageExtensionDtype(ExtensionDtype):
395 """ExtensionDtype that may be backed by more than one implementation."""
397 name: str
398 _metadata = ("storage",)
400 def __init__(self, storage=None) -> None:
401 self.storage = storage
403 def __repr__(self) -> str:
404 return f"{self.name}[{self.storage}]"
406 def __str__(self):
407 return self.name
409 def __eq__(self, other: Any) -> bool:
410 if isinstance(other, str) and other == self.name:
411 return True
412 return super().__eq__(other)
414 def __hash__(self) -> int:
415 # custom __eq__ so have to override __hash__
416 return super().__hash__()
418 @property
419 def na_value(self) -> libmissing.NAType:
420 return libmissing.NA
423def register_extension_dtype(cls: type_t[ExtensionDtypeT]) -> type_t[ExtensionDtypeT]:
424 """
425 Register an ExtensionType with pandas as class decorator.
427 This enables operations like ``.astype(name)`` for the name
428 of the ExtensionDtype.
430 Returns
431 -------
432 callable
433 A class decorator.
435 Examples
436 --------
437 >>> from pandas.api.extensions import register_extension_dtype, ExtensionDtype
438 >>> @register_extension_dtype
439 ... class MyExtensionDtype(ExtensionDtype):
440 ... name = "myextension"
441 """
442 _registry.register(cls)
443 return cls
446class Registry:
447 """
448 Registry for dtype inference.
450 The registry allows one to map a string repr of a extension
451 dtype to an extension dtype. The string alias can be used in several
452 places, including
454 * Series and Index constructors
455 * :meth:`pandas.array`
456 * :meth:`pandas.Series.astype`
458 Multiple extension types can be registered.
459 These are tried in order.
460 """
462 def __init__(self) -> None:
463 self.dtypes: list[type_t[ExtensionDtype]] = []
465 def register(self, dtype: type_t[ExtensionDtype]) -> None:
466 """
467 Parameters
468 ----------
469 dtype : ExtensionDtype class
470 """
471 if not issubclass(dtype, ExtensionDtype): 471 ↛ 472line 471 didn't jump to line 472, because the condition on line 471 was never true
472 raise ValueError("can only register pandas extension dtypes")
474 self.dtypes.append(dtype)
476 @overload
477 def find(self, dtype: type_t[ExtensionDtypeT]) -> type_t[ExtensionDtypeT]:
478 ...
480 @overload
481 def find(self, dtype: ExtensionDtypeT) -> ExtensionDtypeT:
482 ...
484 @overload
485 def find(self, dtype: str) -> ExtensionDtype | None:
486 ...
488 @overload
489 def find(
490 self, dtype: npt.DTypeLike
491 ) -> type_t[ExtensionDtype] | ExtensionDtype | None:
492 ...
494 def find(
495 self, dtype: type_t[ExtensionDtype] | ExtensionDtype | npt.DTypeLike
496 ) -> type_t[ExtensionDtype] | ExtensionDtype | None:
497 """
498 Parameters
499 ----------
500 dtype : ExtensionDtype class or instance or str or numpy dtype or python type
502 Returns
503 -------
504 return the first matching dtype, otherwise return None
505 """
506 if not isinstance(dtype, str):
507 dtype_type: type_t
508 if not isinstance(dtype, type): 508 ↛ 509line 508 didn't jump to line 509, because the condition on line 508 was never true
509 dtype_type = type(dtype)
510 else:
511 dtype_type = dtype
512 if issubclass(dtype_type, ExtensionDtype): 512 ↛ 517line 512 didn't jump to line 517, because the condition on line 512 was never false
513 # cast needed here as mypy doesn't know we have figured
514 # out it is an ExtensionDtype or type_t[ExtensionDtype]
515 return cast("ExtensionDtype | type_t[ExtensionDtype]", dtype)
517 return None
519 for dtype_type in self.dtypes:
520 try:
521 return dtype_type.construct_from_string(dtype)
522 except TypeError:
523 pass
525 return None
528_registry = Registry()