Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/arrays/sparse/accessor.py: 27%
110 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1"""Sparse accessor"""
2from __future__ import annotations
4from typing import TYPE_CHECKING
6import numpy as np
8from pandas.compat._optional import import_optional_dependency
10from pandas.core.dtypes.cast import find_common_type
12from pandas.core.accessor import (
13 PandasDelegate,
14 delegate_names,
15)
16from pandas.core.arrays.sparse.array import SparseArray
17from pandas.core.arrays.sparse.dtype import SparseDtype
19if TYPE_CHECKING: 19 ↛ 20line 19 didn't jump to line 20, because the condition on line 19 was never true
20 from pandas import (
21 DataFrame,
22 Series,
23 )
26class BaseAccessor:
27 _validation_msg = "Can only use the '.sparse' accessor with Sparse data."
29 def __init__(self, data=None) -> None:
30 self._parent = data
31 self._validate(data)
33 def _validate(self, data):
34 raise NotImplementedError
37@delegate_names(
38 SparseArray, ["npoints", "density", "fill_value", "sp_values"], typ="property"
39)
40class SparseAccessor(BaseAccessor, PandasDelegate):
41 """
42 Accessor for SparseSparse from other sparse matrix data types.
43 """
45 def _validate(self, data):
46 if not isinstance(data.dtype, SparseDtype):
47 raise AttributeError(self._validation_msg)
49 def _delegate_property_get(self, name, *args, **kwargs):
50 return getattr(self._parent.array, name)
52 def _delegate_method(self, name, *args, **kwargs):
53 if name == "from_coo":
54 return self.from_coo(*args, **kwargs)
55 elif name == "to_coo":
56 return self.to_coo(*args, **kwargs)
57 else:
58 raise ValueError
60 @classmethod
61 def from_coo(cls, A, dense_index=False) -> Series:
62 """
63 Create a Series with sparse values from a scipy.sparse.coo_matrix.
65 Parameters
66 ----------
67 A : scipy.sparse.coo_matrix
68 dense_index : bool, default False
69 If False (default), the SparseSeries index consists of only the
70 coords of the non-null entries of the original coo_matrix.
71 If True, the SparseSeries index consists of the full sorted
72 (row, col) coordinates of the coo_matrix.
74 Returns
75 -------
76 s : Series
77 A Series with sparse values.
79 Examples
80 --------
81 >>> from scipy import sparse
83 >>> A = sparse.coo_matrix(
84 ... ([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])), shape=(3, 4)
85 ... )
86 >>> A
87 <3x4 sparse matrix of type '<class 'numpy.float64'>'
88 with 3 stored elements in COOrdinate format>
90 >>> A.todense()
91 matrix([[0., 0., 1., 2.],
92 [3., 0., 0., 0.],
93 [0., 0., 0., 0.]])
95 >>> ss = pd.Series.sparse.from_coo(A)
96 >>> ss
97 0 2 1.0
98 3 2.0
99 1 0 3.0
100 dtype: Sparse[float64, nan]
101 """
102 from pandas import Series
103 from pandas.core.arrays.sparse.scipy_sparse import coo_to_sparse_series
105 result = coo_to_sparse_series(A, dense_index=dense_index)
106 result = Series(result.array, index=result.index, copy=False)
108 return result
110 def to_coo(self, row_levels=(0,), column_levels=(1,), sort_labels=False):
111 """
112 Create a scipy.sparse.coo_matrix from a Series with MultiIndex.
114 Use row_levels and column_levels to determine the row and column
115 coordinates respectively. row_levels and column_levels are the names
116 (labels) or numbers of the levels. {row_levels, column_levels} must be
117 a partition of the MultiIndex level names (or numbers).
119 Parameters
120 ----------
121 row_levels : tuple/list
122 column_levels : tuple/list
123 sort_labels : bool, default False
124 Sort the row and column labels before forming the sparse matrix.
125 When `row_levels` and/or `column_levels` refer to a single level,
126 set to `True` for a faster execution.
128 Returns
129 -------
130 y : scipy.sparse.coo_matrix
131 rows : list (row labels)
132 columns : list (column labels)
134 Examples
135 --------
136 >>> s = pd.Series([3.0, np.nan, 1.0, 3.0, np.nan, np.nan])
137 >>> s.index = pd.MultiIndex.from_tuples(
138 ... [
139 ... (1, 2, "a", 0),
140 ... (1, 2, "a", 1),
141 ... (1, 1, "b", 0),
142 ... (1, 1, "b", 1),
143 ... (2, 1, "b", 0),
144 ... (2, 1, "b", 1)
145 ... ],
146 ... names=["A", "B", "C", "D"],
147 ... )
148 >>> s
149 A B C D
150 1 2 a 0 3.0
151 1 NaN
152 1 b 0 1.0
153 1 3.0
154 2 1 b 0 NaN
155 1 NaN
156 dtype: float64
158 >>> ss = s.astype("Sparse")
159 >>> ss
160 A B C D
161 1 2 a 0 3.0
162 1 NaN
163 1 b 0 1.0
164 1 3.0
165 2 1 b 0 NaN
166 1 NaN
167 dtype: Sparse[float64, nan]
169 >>> A, rows, columns = ss.sparse.to_coo(
170 ... row_levels=["A", "B"], column_levels=["C", "D"], sort_labels=True
171 ... )
172 >>> A
173 <3x4 sparse matrix of type '<class 'numpy.float64'>'
174 with 3 stored elements in COOrdinate format>
175 >>> A.todense()
176 matrix([[0., 0., 1., 3.],
177 [3., 0., 0., 0.],
178 [0., 0., 0., 0.]])
180 >>> rows
181 [(1, 1), (1, 2), (2, 1)]
182 >>> columns
183 [('a', 0), ('a', 1), ('b', 0), ('b', 1)]
184 """
185 from pandas.core.arrays.sparse.scipy_sparse import sparse_series_to_coo
187 A, rows, columns = sparse_series_to_coo(
188 self._parent, row_levels, column_levels, sort_labels=sort_labels
189 )
190 return A, rows, columns
192 def to_dense(self) -> Series:
193 """
194 Convert a Series from sparse values to dense.
196 .. versionadded:: 0.25.0
198 Returns
199 -------
200 Series:
201 A Series with the same values, stored as a dense array.
203 Examples
204 --------
205 >>> series = pd.Series(pd.arrays.SparseArray([0, 1, 0]))
206 >>> series
207 0 0
208 1 1
209 2 0
210 dtype: Sparse[int64, 0]
212 >>> series.sparse.to_dense()
213 0 0
214 1 1
215 2 0
216 dtype: int64
217 """
218 from pandas import Series
220 return Series(
221 self._parent.array.to_dense(),
222 index=self._parent.index,
223 name=self._parent.name,
224 )
227class SparseFrameAccessor(BaseAccessor, PandasDelegate):
228 """
229 DataFrame accessor for sparse data.
231 .. versionadded:: 0.25.0
232 """
234 def _validate(self, data):
235 dtypes = data.dtypes
236 if not all(isinstance(t, SparseDtype) for t in dtypes):
237 raise AttributeError(self._validation_msg)
239 @classmethod
240 def from_spmatrix(cls, data, index=None, columns=None) -> DataFrame:
241 """
242 Create a new DataFrame from a scipy sparse matrix.
244 .. versionadded:: 0.25.0
246 Parameters
247 ----------
248 data : scipy.sparse.spmatrix
249 Must be convertible to csc format.
250 index, columns : Index, optional
251 Row and column labels to use for the resulting DataFrame.
252 Defaults to a RangeIndex.
254 Returns
255 -------
256 DataFrame
257 Each column of the DataFrame is stored as a
258 :class:`arrays.SparseArray`.
260 Examples
261 --------
262 >>> import scipy.sparse
263 >>> mat = scipy.sparse.eye(3)
264 >>> pd.DataFrame.sparse.from_spmatrix(mat)
265 0 1 2
266 0 1.0 0.0 0.0
267 1 0.0 1.0 0.0
268 2 0.0 0.0 1.0
269 """
270 from pandas._libs.sparse import IntIndex
272 from pandas import DataFrame
274 data = data.tocsc()
275 index, columns = cls._prep_index(data, index, columns)
276 n_rows, n_columns = data.shape
277 # We need to make sure indices are sorted, as we create
278 # IntIndex with no input validation (i.e. check_integrity=False ).
279 # Indices may already be sorted in scipy in which case this adds
280 # a small overhead.
281 data.sort_indices()
282 indices = data.indices
283 indptr = data.indptr
284 array_data = data.data
285 dtype = SparseDtype(array_data.dtype, 0)
286 arrays = []
287 for i in range(n_columns):
288 sl = slice(indptr[i], indptr[i + 1])
289 idx = IntIndex(n_rows, indices[sl], check_integrity=False)
290 arr = SparseArray._simple_new(array_data[sl], idx, dtype)
291 arrays.append(arr)
292 return DataFrame._from_arrays(
293 arrays, columns=columns, index=index, verify_integrity=False
294 )
296 def to_dense(self) -> DataFrame:
297 """
298 Convert a DataFrame with sparse values to dense.
300 .. versionadded:: 0.25.0
302 Returns
303 -------
304 DataFrame
305 A DataFrame with the same values stored as dense arrays.
307 Examples
308 --------
309 >>> df = pd.DataFrame({"A": pd.arrays.SparseArray([0, 1, 0])})
310 >>> df.sparse.to_dense()
311 A
312 0 0
313 1 1
314 2 0
315 """
316 from pandas import DataFrame
318 data = {k: v.array.to_dense() for k, v in self._parent.items()}
319 return DataFrame(data, index=self._parent.index, columns=self._parent.columns)
321 def to_coo(self):
322 """
323 Return the contents of the frame as a sparse SciPy COO matrix.
325 .. versionadded:: 0.25.0
327 Returns
328 -------
329 coo_matrix : scipy.sparse.spmatrix
330 If the caller is heterogeneous and contains booleans or objects,
331 the result will be of dtype=object. See Notes.
333 Notes
334 -----
335 The dtype will be the lowest-common-denominator type (implicit
336 upcasting); that is to say if the dtypes (even of numeric types)
337 are mixed, the one that accommodates all will be chosen.
339 e.g. If the dtypes are float16 and float32, dtype will be upcast to
340 float32. By numpy.find_common_type convention, mixing int64 and
341 and uint64 will result in a float64 dtype.
342 """
343 import_optional_dependency("scipy")
344 from scipy.sparse import coo_matrix
346 dtype = find_common_type(self._parent.dtypes.to_list())
347 if isinstance(dtype, SparseDtype):
348 dtype = dtype.subtype
350 cols, rows, data = [], [], []
351 for col, (_, ser) in enumerate(self._parent.items()):
352 sp_arr = ser.array
353 if sp_arr.fill_value != 0:
354 raise ValueError("fill value must be 0 when converting to COO matrix")
356 row = sp_arr.sp_index.indices
357 cols.append(np.repeat(col, len(row)))
358 rows.append(row)
359 data.append(sp_arr.sp_values.astype(dtype, copy=False))
361 cols = np.concatenate(cols)
362 rows = np.concatenate(rows)
363 data = np.concatenate(data)
364 return coo_matrix((data, (rows, cols)), shape=self._parent.shape)
366 @property
367 def density(self) -> float:
368 """
369 Ratio of non-sparse points to total (dense) data points.
370 """
371 tmp = np.mean([column.array.density for _, column in self._parent.items()])
372 # error: Expression of type "floating" cannot be assigned to return type "float"
373 return tmp # pyright: ignore[reportGeneralTypeIssues]
375 @staticmethod
376 def _prep_index(data, index, columns):
377 from pandas.core.indexes.api import (
378 default_index,
379 ensure_index,
380 )
382 N, K = data.shape
383 if index is None:
384 index = default_index(N)
385 else:
386 index = ensure_index(index)
387 if columns is None:
388 columns = default_index(K)
389 else:
390 columns = ensure_index(columns)
392 if len(columns) != K:
393 raise ValueError(f"Column length mismatch: {len(columns)} vs. {K}")
394 if len(index) != N:
395 raise ValueError(f"Index length mismatch: {len(index)} vs. {N}")
396 return index, columns