Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/arrays/sparse/accessor.py: 27%

1"""Sparse accessor"""

2from __future__ import annotations

4from typing import TYPE_CHECKING

6import numpy as np

8from pandas.compat._optional import import_optional_dependency

10from pandas.core.dtypes.cast import find_common_type

12from pandas.core.accessor import (

13 PandasDelegate,

14 delegate_names,

15)

16from pandas.core.arrays.sparse.array import SparseArray

17from pandas.core.arrays.sparse.dtype import SparseDtype

19if TYPE_CHECKING: 19 ↛ 20line 19 didn't jump to line 20, because the condition on line 19 was never true

20 from pandas import (

21 DataFrame,

22 Series,

23 )

26class BaseAccessor:

27 _validation_msg = "Can only use the '.sparse' accessor with Sparse data."

29 def __init__(self, data=None) -> None:

30 self._parent = data

31 self._validate(data)

33 def _validate(self, data):

34 raise NotImplementedError

37@delegate_names(

38 SparseArray, ["npoints", "density", "fill_value", "sp_values"], typ="property"

39)

40class SparseAccessor(BaseAccessor, PandasDelegate):

41 """

42 Accessor for SparseSparse from other sparse matrix data types.

43 """

45 def _validate(self, data):

46 if not isinstance(data.dtype, SparseDtype):

47 raise AttributeError(self._validation_msg)

49 def _delegate_property_get(self, name, *args, **kwargs):

50 return getattr(self._parent.array, name)

52 def _delegate_method(self, name, *args, **kwargs):

53 if name == "from_coo":

54 return self.from_coo(*args, **kwargs)

55 elif name == "to_coo":

56 return self.to_coo(*args, **kwargs)

57 else:

58 raise ValueError

60 @classmethod

61 def from_coo(cls, A, dense_index=False) -> Series:

62 """

63 Create a Series with sparse values from a scipy.sparse.coo_matrix.

65 Parameters

66 ----------

67 A : scipy.sparse.coo_matrix

68 dense_index : bool, default False

69 If False (default), the SparseSeries index consists of only the

70 coords of the non-null entries of the original coo_matrix.

71 If True, the SparseSeries index consists of the full sorted

72 (row, col) coordinates of the coo_matrix.

74 Returns

75 -------

76 s : Series

77 A Series with sparse values.

79 Examples

80 --------

81 >>> from scipy import sparse

83 >>> A = sparse.coo_matrix(

84 ... ([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])), shape=(3, 4)

85 ... )

86 >>> A

87 <3x4 sparse matrix of type '<class 'numpy.float64'>'

88 with 3 stored elements in COOrdinate format>

90 >>> A.todense()

91 matrix([[0., 0., 1., 2.],

92 [3., 0., 0., 0.],

93 [0., 0., 0., 0.]])

95 >>> ss = pd.Series.sparse.from_coo(A)

96 >>> ss

97 0 2 1.0

98 3 2.0

99 1 0 3.0

100 dtype: Sparse[float64, nan]

101 """

102 from pandas import Series

103 from pandas.core.arrays.sparse.scipy_sparse import coo_to_sparse_series

104

105 result = coo_to_sparse_series(A, dense_index=dense_index)

106 result = Series(result.array, index=result.index, copy=False)

107

108 return result

109

110 def to_coo(self, row_levels=(0,), column_levels=(1,), sort_labels=False):

111 """

112 Create a scipy.sparse.coo_matrix from a Series with MultiIndex.

113

114 Use row_levels and column_levels to determine the row and column

115 coordinates respectively. row_levels and column_levels are the names

116 (labels) or numbers of the levels. {row_levels, column_levels} must be

117 a partition of the MultiIndex level names (or numbers).

118

119 Parameters

120 ----------

121 row_levels : tuple/list

122 column_levels : tuple/list

123 sort_labels : bool, default False

124 Sort the row and column labels before forming the sparse matrix.

125 When `row_levels` and/or `column_levels` refer to a single level,

126 set to `True` for a faster execution.

127

128 Returns

129 -------

130 y : scipy.sparse.coo_matrix

131 rows : list (row labels)

132 columns : list (column labels)

133

134 Examples

135 --------

136 >>> s = pd.Series([3.0, np.nan, 1.0, 3.0, np.nan, np.nan])

137 >>> s.index = pd.MultiIndex.from_tuples(

138 ... [

139 ... (1, 2, "a", 0),

140 ... (1, 2, "a", 1),

141 ... (1, 1, "b", 0),

142 ... (1, 1, "b", 1),

143 ... (2, 1, "b", 0),

144 ... (2, 1, "b", 1)

145 ... ],

146 ... names=["A", "B", "C", "D"],

147 ... )

148 >>> s

149 A B C D

150 1 2 a 0 3.0

151 1 NaN

152 1 b 0 1.0

153 1 3.0

154 2 1 b 0 NaN

155 1 NaN

156 dtype: float64

157

158 >>> ss = s.astype("Sparse")

159 >>> ss

160 A B C D

161 1 2 a 0 3.0

162 1 NaN

163 1 b 0 1.0

164 1 3.0

165 2 1 b 0 NaN

166 1 NaN

167 dtype: Sparse[float64, nan]

168

169 >>> A, rows, columns = ss.sparse.to_coo(

170 ... row_levels=["A", "B"], column_levels=["C", "D"], sort_labels=True

171 ... )

172 >>> A

173 <3x4 sparse matrix of type '<class 'numpy.float64'>'

174 with 3 stored elements in COOrdinate format>

175 >>> A.todense()

176 matrix([[0., 0., 1., 3.],

177 [3., 0., 0., 0.],

178 [0., 0., 0., 0.]])

179

180 >>> rows

181 [(1, 1), (1, 2), (2, 1)]

182 >>> columns

183 [('a', 0), ('a', 1), ('b', 0), ('b', 1)]

184 """

185 from pandas.core.arrays.sparse.scipy_sparse import sparse_series_to_coo

186

187 A, rows, columns = sparse_series_to_coo(

188 self._parent, row_levels, column_levels, sort_labels=sort_labels

189 )

190 return A, rows, columns

191

192 def to_dense(self) -> Series:

193 """

194 Convert a Series from sparse values to dense.

195

196 .. versionadded:: 0.25.0

197

198 Returns

199 -------

200 Series:

201 A Series with the same values, stored as a dense array.

202

203 Examples

204 --------

205 >>> series = pd.Series(pd.arrays.SparseArray([0, 1, 0]))

206 >>> series

207 0 0

208 1 1

209 2 0

210 dtype: Sparse[int64, 0]

211

212 >>> series.sparse.to_dense()

213 0 0

214 1 1

215 2 0

216 dtype: int64

217 """

218 from pandas import Series

219

220 return Series(

221 self._parent.array.to_dense(),

222 index=self._parent.index,

223 name=self._parent.name,

224 )

225

226

227class SparseFrameAccessor(BaseAccessor, PandasDelegate):

228 """

229 DataFrame accessor for sparse data.

230

231 .. versionadded:: 0.25.0

232 """

233

234 def _validate(self, data):

235 dtypes = data.dtypes

236 if not all(isinstance(t, SparseDtype) for t in dtypes):

237 raise AttributeError(self._validation_msg)

238

239 @classmethod

240 def from_spmatrix(cls, data, index=None, columns=None) -> DataFrame:

241 """

242 Create a new DataFrame from a scipy sparse matrix.

243

244 .. versionadded:: 0.25.0

245

246 Parameters

247 ----------

248 data : scipy.sparse.spmatrix

249 Must be convertible to csc format.

250 index, columns : Index, optional

251 Row and column labels to use for the resulting DataFrame.

252 Defaults to a RangeIndex.

253

254 Returns

255 -------

256 DataFrame

257 Each column of the DataFrame is stored as a

258 :class:`arrays.SparseArray`.

259

260 Examples

261 --------

262 >>> import scipy.sparse

263 >>> mat = scipy.sparse.eye(3)

264 >>> pd.DataFrame.sparse.from_spmatrix(mat)

265 0 1 2

266 0 1.0 0.0 0.0

267 1 0.0 1.0 0.0

268 2 0.0 0.0 1.0

269 """

270 from pandas._libs.sparse import IntIndex

271

272 from pandas import DataFrame

273

274 data = data.tocsc()

275 index, columns = cls._prep_index(data, index, columns)

276 n_rows, n_columns = data.shape

277 # We need to make sure indices are sorted, as we create

278 # IntIndex with no input validation (i.e. check_integrity=False ).

279 # Indices may already be sorted in scipy in which case this adds

280 # a small overhead.

281 data.sort_indices()

282 indices = data.indices

283 indptr = data.indptr

284 array_data = data.data

285 dtype = SparseDtype(array_data.dtype, 0)

286 arrays = []

287 for i in range(n_columns):

288 sl = slice(indptr[i], indptr[i + 1])

289 idx = IntIndex(n_rows, indices[sl], check_integrity=False)

290 arr = SparseArray._simple_new(array_data[sl], idx, dtype)

291 arrays.append(arr)

292 return DataFrame._from_arrays(

293 arrays, columns=columns, index=index, verify_integrity=False

294 )

295

296 def to_dense(self) -> DataFrame:

297 """

298 Convert a DataFrame with sparse values to dense.

299

300 .. versionadded:: 0.25.0

301

302 Returns

303 -------

304 DataFrame

305 A DataFrame with the same values stored as dense arrays.

306

307 Examples

308 --------

309 >>> df = pd.DataFrame({"A": pd.arrays.SparseArray([0, 1, 0])})

310 >>> df.sparse.to_dense()

311 A

312 0 0

313 1 1

314 2 0

315 """

316 from pandas import DataFrame

317

318 data = {k: v.array.to_dense() for k, v in self._parent.items()}

319 return DataFrame(data, index=self._parent.index, columns=self._parent.columns)

320

321 def to_coo(self):

322 """

323 Return the contents of the frame as a sparse SciPy COO matrix.

324

325 .. versionadded:: 0.25.0

326

327 Returns

328 -------

329 coo_matrix : scipy.sparse.spmatrix

330 If the caller is heterogeneous and contains booleans or objects,

331 the result will be of dtype=object. See Notes.

332

333 Notes

334 -----

335 The dtype will be the lowest-common-denominator type (implicit

336 upcasting); that is to say if the dtypes (even of numeric types)

337 are mixed, the one that accommodates all will be chosen.

338

339 e.g. If the dtypes are float16 and float32, dtype will be upcast to

340 float32. By numpy.find_common_type convention, mixing int64 and

341 and uint64 will result in a float64 dtype.

342 """

343 import_optional_dependency("scipy")

344 from scipy.sparse import coo_matrix

345

346 dtype = find_common_type(self._parent.dtypes.to_list())

347 if isinstance(dtype, SparseDtype):

348 dtype = dtype.subtype

349

350 cols, rows, data = [], [], []

351 for col, (_, ser) in enumerate(self._parent.items()):

352 sp_arr = ser.array

353 if sp_arr.fill_value != 0:

354 raise ValueError("fill value must be 0 when converting to COO matrix")

355

356 row = sp_arr.sp_index.indices

357 cols.append(np.repeat(col, len(row)))

358 rows.append(row)

359 data.append(sp_arr.sp_values.astype(dtype, copy=False))

360

361 cols = np.concatenate(cols)

362 rows = np.concatenate(rows)

363 data = np.concatenate(data)

364 return coo_matrix((data, (rows, cols)), shape=self._parent.shape)

365

366 @property

367 def density(self) -> float:

368 """

369 Ratio of non-sparse points to total (dense) data points.

370 """

371 tmp = np.mean([column.array.density for _, column in self._parent.items()])

372 # error: Expression of type "floating" cannot be assigned to return type "float"

373 return tmp # pyright: ignore[reportGeneralTypeIssues]

374

375 @staticmethod

376 def _prep_index(data, index, columns):

377 from pandas.core.indexes.api import (

378 default_index,

379 ensure_index,

380 )

381

382 N, K = data.shape

383 if index is None:

384 index = default_index(N)

385 else:

386 index = ensure_index(index)

387 if columns is None:

388 columns = default_index(K)

389 else:

390 columns = ensure_index(columns)

391

392 if len(columns) != K:

393 raise ValueError(f"Column length mismatch: {len(columns)} vs. {K}")

394 if len(index) != N:

395 raise ValueError(f"Index length mismatch: {len(index)} vs. {N}")

396 return index, columns