Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/arrays/arrow/dtype.py: 41%
79 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1from __future__ import annotations
3import re
5import numpy as np
7from pandas._typing import DtypeObj
8from pandas.compat import pa_version_under1p01
9from pandas.util._decorators import cache_readonly
11from pandas.core.dtypes.base import (
12 StorageExtensionDtype,
13 register_extension_dtype,
14)
16if not pa_version_under1p01: 16 ↛ 17line 16 didn't jump to line 17, because the condition on line 16 was never true
17 import pyarrow as pa
20@register_extension_dtype
21class ArrowDtype(StorageExtensionDtype):
22 """
23 An ExtensionDtype for PyArrow data types.
25 .. warning::
27 ArrowDtype is considered experimental. The implementation and
28 parts of the API may change without warning.
30 While most ``dtype`` arguments can accept the "string"
31 constructor, e.g. ``"int64[pyarrow]"``, ArrowDtype is useful
32 if the data type contains parameters like ``pyarrow.timestamp``.
34 Parameters
35 ----------
36 pyarrow_dtype : pa.DataType
37 An instance of a `pyarrow.DataType <https://arrow.apache.org/docs/python/api/datatypes.html#factory-functions>`__.
39 Attributes
40 ----------
41 pyarrow_dtype
43 Methods
44 -------
45 None
47 Returns
48 -------
49 ArrowDtype
51 Examples
52 --------
53 >>> import pyarrow as pa
54 >>> pd.ArrowDtype(pa.int64())
55 int64[pyarrow]
57 Types with parameters must be constructed with ArrowDtype.
59 >>> pd.ArrowDtype(pa.timestamp("s", tz="America/New_York"))
60 timestamp[s, tz=America/New_York][pyarrow]
61 >>> pd.ArrowDtype(pa.list_(pa.int64()))
62 list<item: int64>[pyarrow]
63 """ # noqa: E501
65 _metadata = ("storage", "pyarrow_dtype") # type: ignore[assignment]
67 def __init__(self, pyarrow_dtype: pa.DataType) -> None:
68 super().__init__("pyarrow")
69 if pa_version_under1p01:
70 raise ImportError("pyarrow>=1.0.1 is required for ArrowDtype")
71 if not isinstance(pyarrow_dtype, pa.DataType):
72 raise ValueError(
73 f"pyarrow_dtype ({pyarrow_dtype}) must be an instance "
74 f"of a pyarrow.DataType. Got {type(pyarrow_dtype)} instead."
75 )
76 self.pyarrow_dtype = pyarrow_dtype
78 def __repr__(self) -> str:
79 return self.name
81 @property
82 def type(self):
83 """
84 Returns pyarrow.DataType.
85 """
86 return type(self.pyarrow_dtype)
88 @property
89 def name(self) -> str: # type: ignore[override]
90 """
91 A string identifying the data type.
92 """
93 return f"{str(self.pyarrow_dtype)}[{self.storage}]"
95 @cache_readonly
96 def numpy_dtype(self) -> np.dtype:
97 """Return an instance of the related numpy dtype"""
98 try:
99 return np.dtype(self.pyarrow_dtype.to_pandas_dtype())
100 except (NotImplementedError, TypeError):
101 return np.dtype(object)
103 @cache_readonly
104 def kind(self) -> str:
105 return self.numpy_dtype.kind
107 @cache_readonly
108 def itemsize(self) -> int:
109 """Return the number of bytes in this dtype"""
110 return self.numpy_dtype.itemsize
112 @classmethod
113 def construct_array_type(cls):
114 """
115 Return the array type associated with this dtype.
117 Returns
118 -------
119 type
120 """
121 from pandas.core.arrays.arrow import ArrowExtensionArray
123 return ArrowExtensionArray
125 @classmethod
126 def construct_from_string(cls, string: str) -> ArrowDtype:
127 """
128 Construct this type from a string.
130 Parameters
131 ----------
132 string : str
133 string should follow the format f"{pyarrow_type}[pyarrow]"
134 e.g. int64[pyarrow]
135 """
136 if not isinstance(string, str): 136 ↛ 137line 136 didn't jump to line 137, because the condition on line 136 was never true
137 raise TypeError(
138 f"'construct_from_string' expects a string, got {type(string)}"
139 )
140 if not string.endswith("[pyarrow]"): 140 ↛ 142line 140 didn't jump to line 142, because the condition on line 140 was never false
141 raise TypeError(f"'{string}' must end with '[pyarrow]'")
142 if string == "string[pyarrow]":
143 # Ensure Registry.find skips ArrowDtype to use StringDtype instead
144 raise TypeError("string[pyarrow] should be constructed by StringDtype")
145 base_type = string.split("[pyarrow]")[0]
146 try:
147 pa_dtype = pa.type_for_alias(base_type)
148 except ValueError as err:
149 has_parameters = re.search(r"\[.*\]", base_type)
150 if has_parameters:
151 raise NotImplementedError(
152 "Passing pyarrow type specific parameters "
153 f"({has_parameters.group()}) in the string is not supported. "
154 "Please construct an ArrowDtype object with a pyarrow_dtype "
155 "instance with specific parameters."
156 ) from err
157 raise TypeError(f"'{base_type}' is not a valid pyarrow data type.") from err
158 return cls(pa_dtype)
160 @property
161 def _is_numeric(self) -> bool:
162 """
163 Whether columns with this dtype should be considered numeric.
164 """
165 # TODO: pa.types.is_boolean?
166 return (
167 pa.types.is_integer(self.pyarrow_dtype)
168 or pa.types.is_floating(self.pyarrow_dtype)
169 or pa.types.is_decimal(self.pyarrow_dtype)
170 )
172 @property
173 def _is_boolean(self) -> bool:
174 """
175 Whether this dtype should be considered boolean.
176 """
177 return pa.types.is_boolean(self.pyarrow_dtype)
179 def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
180 # We unwrap any masked dtypes, find the common dtype we would use
181 # for that, then re-mask the result.
182 # Mirrors BaseMaskedDtype
183 from pandas.core.dtypes.cast import find_common_type
185 new_dtype = find_common_type(
186 [
187 dtype.numpy_dtype if isinstance(dtype, ArrowDtype) else dtype
188 for dtype in dtypes
189 ]
190 )
191 if not isinstance(new_dtype, np.dtype):
192 return None
193 try:
194 pa_dtype = pa.from_numpy_dtype(new_dtype)
195 return type(self)(pa_dtype)
196 except NotImplementedError:
197 return None
199 def __from_arrow__(self, array: pa.Array | pa.ChunkedArray):
200 """
201 Construct IntegerArray/FloatingArray from pyarrow Array/ChunkedArray.
202 """
203 array_class = self.construct_array_type()
204 return array_class(array)