Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/arrays/arrow/dtype.py: 41%

79 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1from __future__ import annotations 

2 

3import re 

4 

5import numpy as np 

6 

7from pandas._typing import DtypeObj 

8from pandas.compat import pa_version_under1p01 

9from pandas.util._decorators import cache_readonly 

10 

11from pandas.core.dtypes.base import ( 

12 StorageExtensionDtype, 

13 register_extension_dtype, 

14) 

15 

16if not pa_version_under1p01: 16 ↛ 17line 16 didn't jump to line 17, because the condition on line 16 was never true

17 import pyarrow as pa 

18 

19 

20@register_extension_dtype 

21class ArrowDtype(StorageExtensionDtype): 

22 """ 

23 An ExtensionDtype for PyArrow data types. 

24 

25 .. warning:: 

26 

27 ArrowDtype is considered experimental. The implementation and 

28 parts of the API may change without warning. 

29 

30 While most ``dtype`` arguments can accept the "string" 

31 constructor, e.g. ``"int64[pyarrow]"``, ArrowDtype is useful 

32 if the data type contains parameters like ``pyarrow.timestamp``. 

33 

34 Parameters 

35 ---------- 

36 pyarrow_dtype : pa.DataType 

37 An instance of a `pyarrow.DataType <https://arrow.apache.org/docs/python/api/datatypes.html#factory-functions>`__. 

38 

39 Attributes 

40 ---------- 

41 pyarrow_dtype 

42 

43 Methods 

44 ------- 

45 None 

46 

47 Returns 

48 ------- 

49 ArrowDtype 

50 

51 Examples 

52 -------- 

53 >>> import pyarrow as pa 

54 >>> pd.ArrowDtype(pa.int64()) 

55 int64[pyarrow] 

56 

57 Types with parameters must be constructed with ArrowDtype. 

58 

59 >>> pd.ArrowDtype(pa.timestamp("s", tz="America/New_York")) 

60 timestamp[s, tz=America/New_York][pyarrow] 

61 >>> pd.ArrowDtype(pa.list_(pa.int64())) 

62 list<item: int64>[pyarrow] 

63 """ # noqa: E501 

64 

65 _metadata = ("storage", "pyarrow_dtype") # type: ignore[assignment] 

66 

67 def __init__(self, pyarrow_dtype: pa.DataType) -> None: 

68 super().__init__("pyarrow") 

69 if pa_version_under1p01: 

70 raise ImportError("pyarrow>=1.0.1 is required for ArrowDtype") 

71 if not isinstance(pyarrow_dtype, pa.DataType): 

72 raise ValueError( 

73 f"pyarrow_dtype ({pyarrow_dtype}) must be an instance " 

74 f"of a pyarrow.DataType. Got {type(pyarrow_dtype)} instead." 

75 ) 

76 self.pyarrow_dtype = pyarrow_dtype 

77 

78 def __repr__(self) -> str: 

79 return self.name 

80 

81 @property 

82 def type(self): 

83 """ 

84 Returns pyarrow.DataType. 

85 """ 

86 return type(self.pyarrow_dtype) 

87 

88 @property 

89 def name(self) -> str: # type: ignore[override] 

90 """ 

91 A string identifying the data type. 

92 """ 

93 return f"{str(self.pyarrow_dtype)}[{self.storage}]" 

94 

95 @cache_readonly 

96 def numpy_dtype(self) -> np.dtype: 

97 """Return an instance of the related numpy dtype""" 

98 try: 

99 return np.dtype(self.pyarrow_dtype.to_pandas_dtype()) 

100 except (NotImplementedError, TypeError): 

101 return np.dtype(object) 

102 

103 @cache_readonly 

104 def kind(self) -> str: 

105 return self.numpy_dtype.kind 

106 

107 @cache_readonly 

108 def itemsize(self) -> int: 

109 """Return the number of bytes in this dtype""" 

110 return self.numpy_dtype.itemsize 

111 

112 @classmethod 

113 def construct_array_type(cls): 

114 """ 

115 Return the array type associated with this dtype. 

116 

117 Returns 

118 ------- 

119 type 

120 """ 

121 from pandas.core.arrays.arrow import ArrowExtensionArray 

122 

123 return ArrowExtensionArray 

124 

125 @classmethod 

126 def construct_from_string(cls, string: str) -> ArrowDtype: 

127 """ 

128 Construct this type from a string. 

129 

130 Parameters 

131 ---------- 

132 string : str 

133 string should follow the format f"{pyarrow_type}[pyarrow]" 

134 e.g. int64[pyarrow] 

135 """ 

136 if not isinstance(string, str): 136 ↛ 137line 136 didn't jump to line 137, because the condition on line 136 was never true

137 raise TypeError( 

138 f"'construct_from_string' expects a string, got {type(string)}" 

139 ) 

140 if not string.endswith("[pyarrow]"): 140 ↛ 142line 140 didn't jump to line 142, because the condition on line 140 was never false

141 raise TypeError(f"'{string}' must end with '[pyarrow]'") 

142 if string == "string[pyarrow]": 

143 # Ensure Registry.find skips ArrowDtype to use StringDtype instead 

144 raise TypeError("string[pyarrow] should be constructed by StringDtype") 

145 base_type = string.split("[pyarrow]")[0] 

146 try: 

147 pa_dtype = pa.type_for_alias(base_type) 

148 except ValueError as err: 

149 has_parameters = re.search(r"\[.*\]", base_type) 

150 if has_parameters: 

151 raise NotImplementedError( 

152 "Passing pyarrow type specific parameters " 

153 f"({has_parameters.group()}) in the string is not supported. " 

154 "Please construct an ArrowDtype object with a pyarrow_dtype " 

155 "instance with specific parameters." 

156 ) from err 

157 raise TypeError(f"'{base_type}' is not a valid pyarrow data type.") from err 

158 return cls(pa_dtype) 

159 

160 @property 

161 def _is_numeric(self) -> bool: 

162 """ 

163 Whether columns with this dtype should be considered numeric. 

164 """ 

165 # TODO: pa.types.is_boolean? 

166 return ( 

167 pa.types.is_integer(self.pyarrow_dtype) 

168 or pa.types.is_floating(self.pyarrow_dtype) 

169 or pa.types.is_decimal(self.pyarrow_dtype) 

170 ) 

171 

172 @property 

173 def _is_boolean(self) -> bool: 

174 """ 

175 Whether this dtype should be considered boolean. 

176 """ 

177 return pa.types.is_boolean(self.pyarrow_dtype) 

178 

179 def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None: 

180 # We unwrap any masked dtypes, find the common dtype we would use 

181 # for that, then re-mask the result. 

182 # Mirrors BaseMaskedDtype 

183 from pandas.core.dtypes.cast import find_common_type 

184 

185 new_dtype = find_common_type( 

186 [ 

187 dtype.numpy_dtype if isinstance(dtype, ArrowDtype) else dtype 

188 for dtype in dtypes 

189 ] 

190 ) 

191 if not isinstance(new_dtype, np.dtype): 

192 return None 

193 try: 

194 pa_dtype = pa.from_numpy_dtype(new_dtype) 

195 return type(self)(pa_dtype) 

196 except NotImplementedError: 

197 return None 

198 

199 def __from_arrow__(self, array: pa.Array | pa.ChunkedArray): 

200 """ 

201 Construct IntegerArray/FloatingArray from pyarrow Array/ChunkedArray. 

202 """ 

203 array_class = self.construct_array_type() 

204 return array_class(array)