Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/indexes/api.py: 13%

130 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1from __future__ import annotations 

2 

3import textwrap 

4from typing import cast 

5 

6import numpy as np 

7 

8from pandas._libs import ( 

9 NaT, 

10 lib, 

11) 

12from pandas.errors import InvalidIndexError 

13 

14from pandas.core.dtypes.cast import find_common_type 

15from pandas.core.dtypes.common import is_dtype_equal 

16 

17from pandas.core.algorithms import safe_sort 

18from pandas.core.indexes.base import ( 

19 Index, 

20 _new_Index, 

21 ensure_index, 

22 ensure_index_from_sequences, 

23 get_unanimous_names, 

24) 

25from pandas.core.indexes.category import CategoricalIndex 

26from pandas.core.indexes.datetimes import DatetimeIndex 

27from pandas.core.indexes.interval import IntervalIndex 

28from pandas.core.indexes.multi import MultiIndex 

29from pandas.core.indexes.numeric import ( 

30 Float64Index, 

31 Int64Index, 

32 NumericIndex, 

33 UInt64Index, 

34) 

35from pandas.core.indexes.period import PeriodIndex 

36from pandas.core.indexes.range import RangeIndex 

37from pandas.core.indexes.timedeltas import TimedeltaIndex 

38 

39_sort_msg = textwrap.dedent( 

40 """\ 

41Sorting because non-concatenation axis is not aligned. A future version 

42of pandas will change to not sort by default. 

43 

44To accept the future behavior, pass 'sort=False'. 

45 

46To retain the current behavior and silence the warning, pass 'sort=True'. 

47""" 

48) 

49 

50 

51__all__ = [ 

52 "Index", 

53 "MultiIndex", 

54 "NumericIndex", 

55 "Float64Index", 

56 "Int64Index", 

57 "CategoricalIndex", 

58 "IntervalIndex", 

59 "RangeIndex", 

60 "UInt64Index", 

61 "InvalidIndexError", 

62 "TimedeltaIndex", 

63 "PeriodIndex", 

64 "DatetimeIndex", 

65 "_new_Index", 

66 "NaT", 

67 "ensure_index", 

68 "ensure_index_from_sequences", 

69 "get_objs_combined_axis", 

70 "union_indexes", 

71 "get_unanimous_names", 

72 "all_indexes_same", 

73 "default_index", 

74 "safe_sort_index", 

75] 

76 

77 

78def get_objs_combined_axis( 

79 objs, intersect: bool = False, axis=0, sort: bool = True, copy: bool = False 

80) -> Index: 

81 """ 

82 Extract combined index: return intersection or union (depending on the 

83 value of "intersect") of indexes on given axis, or None if all objects 

84 lack indexes (e.g. they are numpy arrays). 

85 

86 Parameters 

87 ---------- 

88 objs : list 

89 Series or DataFrame objects, may be mix of the two. 

90 intersect : bool, default False 

91 If True, calculate the intersection between indexes. Otherwise, 

92 calculate the union. 

93 axis : {0 or 'index', 1 or 'outer'}, default 0 

94 The axis to extract indexes from. 

95 sort : bool, default True 

96 Whether the result index should come out sorted or not. 

97 copy : bool, default False 

98 If True, return a copy of the combined index. 

99 

100 Returns 

101 ------- 

102 Index 

103 """ 

104 obs_idxes = [obj._get_axis(axis) for obj in objs] 

105 return _get_combined_index(obs_idxes, intersect=intersect, sort=sort, copy=copy) 

106 

107 

108def _get_distinct_objs(objs: list[Index]) -> list[Index]: 

109 """ 

110 Return a list with distinct elements of "objs" (different ids). 

111 Preserves order. 

112 """ 

113 ids: set[int] = set() 

114 res = [] 

115 for obj in objs: 

116 if id(obj) not in ids: 

117 ids.add(id(obj)) 

118 res.append(obj) 

119 return res 

120 

121 

122def _get_combined_index( 

123 indexes: list[Index], 

124 intersect: bool = False, 

125 sort: bool = False, 

126 copy: bool = False, 

127) -> Index: 

128 """ 

129 Return the union or intersection of indexes. 

130 

131 Parameters 

132 ---------- 

133 indexes : list of Index or list objects 

134 When intersect=True, do not accept list of lists. 

135 intersect : bool, default False 

136 If True, calculate the intersection between indexes. Otherwise, 

137 calculate the union. 

138 sort : bool, default False 

139 Whether the result index should come out sorted or not. 

140 copy : bool, default False 

141 If True, return a copy of the combined index. 

142 

143 Returns 

144 ------- 

145 Index 

146 """ 

147 # TODO: handle index names! 

148 indexes = _get_distinct_objs(indexes) 

149 if len(indexes) == 0: 

150 index = Index([]) 

151 elif len(indexes) == 1: 

152 index = indexes[0] 

153 elif intersect: 

154 index = indexes[0] 

155 for other in indexes[1:]: 

156 index = index.intersection(other) 

157 else: 

158 index = union_indexes(indexes, sort=False) 

159 index = ensure_index(index) 

160 

161 if sort: 

162 index = safe_sort_index(index) 

163 # GH 29879 

164 if copy: 

165 index = index.copy() 

166 

167 return index 

168 

169 

170def safe_sort_index(index: Index) -> Index: 

171 """ 

172 Returns the sorted index 

173 

174 We keep the dtypes and the name attributes. 

175 

176 Parameters 

177 ---------- 

178 index : an Index 

179 

180 Returns 

181 ------- 

182 Index 

183 """ 

184 if index.is_monotonic_increasing: 

185 return index 

186 

187 try: 

188 array_sorted = safe_sort(index) 

189 except TypeError: 

190 pass 

191 else: 

192 if isinstance(array_sorted, MultiIndex): 

193 return array_sorted 

194 

195 array_sorted = cast(np.ndarray, array_sorted) 

196 if isinstance(index, MultiIndex): 

197 index = MultiIndex.from_tuples(array_sorted, names=index.names) 

198 else: 

199 index = Index(array_sorted, name=index.name, dtype=index.dtype) 

200 

201 return index 

202 

203 

204def union_indexes(indexes, sort: bool | None = True) -> Index: 

205 """ 

206 Return the union of indexes. 

207 

208 The behavior of sort and names is not consistent. 

209 

210 Parameters 

211 ---------- 

212 indexes : list of Index or list objects 

213 sort : bool, default True 

214 Whether the result index should come out sorted or not. 

215 

216 Returns 

217 ------- 

218 Index 

219 """ 

220 if len(indexes) == 0: 

221 raise AssertionError("Must have at least 1 Index to union") 

222 if len(indexes) == 1: 

223 result = indexes[0] 

224 if isinstance(result, list): 

225 result = Index(sorted(result)) 

226 return result 

227 

228 indexes, kind = _sanitize_and_check(indexes) 

229 

230 def _unique_indices(inds, dtype) -> Index: 

231 """ 

232 Convert indexes to lists and concatenate them, removing duplicates. 

233 

234 The final dtype is inferred. 

235 

236 Parameters 

237 ---------- 

238 inds : list of Index or list objects 

239 dtype : dtype to set for the resulting Index 

240 

241 Returns 

242 ------- 

243 Index 

244 """ 

245 

246 def conv(i): 

247 if isinstance(i, Index): 

248 i = i.tolist() 

249 return i 

250 

251 return Index( 

252 lib.fast_unique_multiple_list([conv(i) for i in inds], sort=sort), 

253 dtype=dtype, 

254 ) 

255 

256 def _find_common_index_dtype(inds): 

257 """ 

258 Finds a common type for the indexes to pass through to resulting index. 

259 

260 Parameters 

261 ---------- 

262 inds: list of Index or list objects 

263 

264 Returns 

265 ------- 

266 The common type or None if no indexes were given 

267 """ 

268 dtypes = [idx.dtype for idx in indexes if isinstance(idx, Index)] 

269 if dtypes: 

270 dtype = find_common_type(dtypes) 

271 else: 

272 dtype = None 

273 

274 return dtype 

275 

276 if kind == "special": 

277 result = indexes[0] 

278 first = result 

279 

280 dtis = [x for x in indexes if isinstance(x, DatetimeIndex)] 

281 dti_tzs = [x for x in dtis if x.tz is not None] 

282 if len(dti_tzs) not in [0, len(dtis)]: 

283 # TODO: this behavior is not tested (so may not be desired), 

284 # but is kept in order to keep behavior the same when 

285 # deprecating union_many 

286 # test_frame_from_dict_with_mixed_indexes 

287 raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex") 

288 

289 if len(dtis) == len(indexes): 

290 sort = True 

291 if not all(is_dtype_equal(x.dtype, first.dtype) for x in indexes): 

292 # i.e. timezones mismatch 

293 # TODO(2.0): once deprecation is enforced, this union will 

294 # cast to UTC automatically. 

295 indexes = [x.tz_convert("UTC") for x in indexes] 

296 

297 result = indexes[0] 

298 

299 elif len(dtis) > 1: 

300 # If we have mixed timezones, our casting behavior may depend on 

301 # the order of indexes, which we don't want. 

302 sort = False 

303 

304 # TODO: what about Categorical[dt64]? 

305 # test_frame_from_dict_with_mixed_indexes 

306 indexes = [x.astype(object, copy=False) for x in indexes] 

307 result = indexes[0] 

308 

309 for other in indexes[1:]: 

310 result = result.union(other, sort=None if sort else False) 

311 return result 

312 

313 elif kind == "array": 

314 dtype = _find_common_index_dtype(indexes) 

315 index = indexes[0] 

316 if not all(index.equals(other) for other in indexes[1:]): 

317 index = _unique_indices(indexes, dtype) 

318 

319 name = get_unanimous_names(*indexes)[0] 

320 if name != index.name: 

321 index = index.rename(name) 

322 return index 

323 else: # kind='list' 

324 dtype = _find_common_index_dtype(indexes) 

325 return _unique_indices(indexes, dtype) 

326 

327 

328def _sanitize_and_check(indexes): 

329 """ 

330 Verify the type of indexes and convert lists to Index. 

331 

332 Cases: 

333 

334 - [list, list, ...]: Return ([list, list, ...], 'list') 

335 - [list, Index, ...]: Return _sanitize_and_check([Index, Index, ...]) 

336 Lists are sorted and converted to Index. 

337 - [Index, Index, ...]: Return ([Index, Index, ...], TYPE) 

338 TYPE = 'special' if at least one special type, 'array' otherwise. 

339 

340 Parameters 

341 ---------- 

342 indexes : list of Index or list objects 

343 

344 Returns 

345 ------- 

346 sanitized_indexes : list of Index or list objects 

347 type : {'list', 'array', 'special'} 

348 """ 

349 kinds = list({type(index) for index in indexes}) 

350 

351 if list in kinds: 

352 if len(kinds) > 1: 

353 indexes = [ 

354 Index(list(x)) if not isinstance(x, Index) else x for x in indexes 

355 ] 

356 kinds.remove(list) 

357 else: 

358 return indexes, "list" 

359 

360 if len(kinds) > 1 or Index not in kinds: 

361 return indexes, "special" 

362 else: 

363 return indexes, "array" 

364 

365 

366def all_indexes_same(indexes) -> bool: 

367 """ 

368 Determine if all indexes contain the same elements. 

369 

370 Parameters 

371 ---------- 

372 indexes : iterable of Index objects 

373 

374 Returns 

375 ------- 

376 bool 

377 True if all indexes contain the same elements, False otherwise. 

378 """ 

379 itr = iter(indexes) 

380 first = next(itr) 

381 return all(first.equals(index) for index in itr) 

382 

383 

384def default_index(n: int) -> RangeIndex: 

385 rng = range(0, n) 

386 return RangeIndex._simple_new(rng, name=None)