Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/indexes/category.py: 25%

168 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1from __future__ import annotations 

2 

3from typing import ( 

4 Any, 

5 Hashable, 

6) 

7import warnings 

8 

9import numpy as np 

10 

11from pandas._libs import index as libindex 

12from pandas._typing import ( 

13 Dtype, 

14 DtypeObj, 

15 npt, 

16) 

17from pandas.util._decorators import ( 

18 cache_readonly, 

19 doc, 

20) 

21from pandas.util._exceptions import find_stack_level 

22 

23from pandas.core.dtypes.common import ( 

24 is_categorical_dtype, 

25 is_scalar, 

26 pandas_dtype, 

27) 

28from pandas.core.dtypes.missing import ( 

29 is_valid_na_for_dtype, 

30 isna, 

31 notna, 

32) 

33 

34from pandas.core.arrays.categorical import ( 

35 Categorical, 

36 contains, 

37) 

38from pandas.core.construction import extract_array 

39import pandas.core.indexes.base as ibase 

40from pandas.core.indexes.base import ( 

41 Index, 

42 maybe_extract_name, 

43) 

44from pandas.core.indexes.extension import ( 

45 NDArrayBackedExtensionIndex, 

46 inherit_names, 

47) 

48 

49from pandas.io.formats.printing import pprint_thing 

50 

51_index_doc_kwargs: dict[str, str] = dict(ibase._index_doc_kwargs) 

52_index_doc_kwargs.update({"target_klass": "CategoricalIndex"}) 

53 

54 

55@inherit_names( 

56 [ 

57 "argsort", 

58 "tolist", 

59 "codes", 

60 "categories", 

61 "ordered", 

62 "_reverse_indexer", 

63 "searchsorted", 

64 "is_dtype_equal", 

65 "min", 

66 "max", 

67 ], 

68 Categorical, 

69) 

70@inherit_names( 

71 [ 

72 "rename_categories", 

73 "reorder_categories", 

74 "add_categories", 

75 "remove_categories", 

76 "remove_unused_categories", 

77 "set_categories", 

78 "as_ordered", 

79 "as_unordered", 

80 ], 

81 Categorical, 

82 wrap=True, 

83) 

84class CategoricalIndex(NDArrayBackedExtensionIndex): 

85 """ 

86 Index based on an underlying :class:`Categorical`. 

87 

88 CategoricalIndex, like Categorical, can only take on a limited, 

89 and usually fixed, number of possible values (`categories`). Also, 

90 like Categorical, it might have an order, but numerical operations 

91 (additions, divisions, ...) are not possible. 

92 

93 Parameters 

94 ---------- 

95 data : array-like (1-dimensional) 

96 The values of the categorical. If `categories` are given, values not in 

97 `categories` will be replaced with NaN. 

98 categories : index-like, optional 

99 The categories for the categorical. Items need to be unique. 

100 If the categories are not given here (and also not in `dtype`), they 

101 will be inferred from the `data`. 

102 ordered : bool, optional 

103 Whether or not this categorical is treated as an ordered 

104 categorical. If not given here or in `dtype`, the resulting 

105 categorical will be unordered. 

106 dtype : CategoricalDtype or "category", optional 

107 If :class:`CategoricalDtype`, cannot be used together with 

108 `categories` or `ordered`. 

109 copy : bool, default False 

110 Make a copy of input ndarray. 

111 name : object, optional 

112 Name to be stored in the index. 

113 

114 Attributes 

115 ---------- 

116 codes 

117 categories 

118 ordered 

119 

120 Methods 

121 ------- 

122 rename_categories 

123 reorder_categories 

124 add_categories 

125 remove_categories 

126 remove_unused_categories 

127 set_categories 

128 as_ordered 

129 as_unordered 

130 map 

131 

132 Raises 

133 ------ 

134 ValueError 

135 If the categories do not validate. 

136 TypeError 

137 If an explicit ``ordered=True`` is given but no `categories` and the 

138 `values` are not sortable. 

139 

140 See Also 

141 -------- 

142 Index : The base pandas Index type. 

143 Categorical : A categorical array. 

144 CategoricalDtype : Type for categorical data. 

145 

146 Notes 

147 ----- 

148 See the `user guide 

149 <https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html#categoricalindex>`__ 

150 for more. 

151 

152 Examples 

153 -------- 

154 >>> pd.CategoricalIndex(["a", "b", "c", "a", "b", "c"]) 

155 CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c'], 

156 categories=['a', 'b', 'c'], ordered=False, dtype='category') 

157 

158 ``CategoricalIndex`` can also be instantiated from a ``Categorical``: 

159 

160 >>> c = pd.Categorical(["a", "b", "c", "a", "b", "c"]) 

161 >>> pd.CategoricalIndex(c) 

162 CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c'], 

163 categories=['a', 'b', 'c'], ordered=False, dtype='category') 

164 

165 Ordered ``CategoricalIndex`` can have a min and max value. 

166 

167 >>> ci = pd.CategoricalIndex( 

168 ... ["a", "b", "c", "a", "b", "c"], ordered=True, categories=["c", "b", "a"] 

169 ... ) 

170 >>> ci 

171 CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c'], 

172 categories=['c', 'b', 'a'], ordered=True, dtype='category') 

173 >>> ci.min() 

174 'c' 

175 """ 

176 

177 _typ = "categoricalindex" 

178 _data_cls = Categorical 

179 

180 @property 

181 def _can_hold_strings(self): 

182 return self.categories._can_hold_strings 

183 

184 @cache_readonly 

185 def _should_fallback_to_positional(self) -> bool: 

186 return self.categories._should_fallback_to_positional 

187 

188 codes: np.ndarray 

189 categories: Index 

190 ordered: bool | None 

191 _data: Categorical 

192 _values: Categorical 

193 

194 @property 

195 def _engine_type(self) -> type[libindex.IndexEngine]: 

196 # self.codes can have dtype int8, int16, int32 or int64, so we need 

197 # to return the corresponding engine type (libindex.Int8Engine, etc.). 

198 return { 

199 np.int8: libindex.Int8Engine, 

200 np.int16: libindex.Int16Engine, 

201 np.int32: libindex.Int32Engine, 

202 np.int64: libindex.Int64Engine, 

203 }[self.codes.dtype.type] 

204 

205 # -------------------------------------------------------------------- 

206 # Constructors 

207 

208 def __new__( 

209 cls, 

210 data=None, 

211 categories=None, 

212 ordered=None, 

213 dtype: Dtype | None = None, 

214 copy: bool = False, 

215 name: Hashable = None, 

216 ) -> CategoricalIndex: 

217 

218 name = maybe_extract_name(name, data, cls) 

219 

220 if data is None: 

221 # GH#38944 

222 warnings.warn( 

223 "Constructing a CategoricalIndex without passing data is " 

224 "deprecated and will raise in a future version. " 

225 "Use CategoricalIndex([], ...) instead.", 

226 FutureWarning, 

227 stacklevel=find_stack_level(), 

228 ) 

229 data = [] 

230 

231 if is_scalar(data): 

232 raise cls._scalar_data_error(data) 

233 

234 data = Categorical( 

235 data, categories=categories, ordered=ordered, dtype=dtype, copy=copy 

236 ) 

237 

238 return cls._simple_new(data, name=name) 

239 

240 # -------------------------------------------------------------------- 

241 

242 def _is_dtype_compat(self, other) -> Categorical: 

243 """ 

244 *this is an internal non-public method* 

245 

246 provide a comparison between the dtype of self and other (coercing if 

247 needed) 

248 

249 Parameters 

250 ---------- 

251 other : Index 

252 

253 Returns 

254 ------- 

255 Categorical 

256 

257 Raises 

258 ------ 

259 TypeError if the dtypes are not compatible 

260 """ 

261 if is_categorical_dtype(other): 

262 other = extract_array(other) 

263 if not other._categories_match_up_to_permutation(self): 

264 raise TypeError( 

265 "categories must match existing categories when appending" 

266 ) 

267 

268 elif other._is_multi: 

269 # preempt raising NotImplementedError in isna call 

270 raise TypeError("MultiIndex is not dtype-compatible with CategoricalIndex") 

271 else: 

272 values = other 

273 

274 cat = Categorical(other, dtype=self.dtype) 

275 other = CategoricalIndex(cat) 

276 if not other.isin(values).all(): 

277 raise TypeError( 

278 "cannot append a non-category item to a CategoricalIndex" 

279 ) 

280 other = other._values 

281 

282 if not ((other == values) | (isna(other) & isna(values))).all(): 

283 # GH#37667 see test_equals_non_category 

284 raise TypeError( 

285 "categories must match existing categories when appending" 

286 ) 

287 

288 return other 

289 

290 @doc(Index.astype) 

291 def astype(self, dtype: Dtype, copy: bool = True) -> Index: 

292 from pandas.core.api import NumericIndex 

293 

294 dtype = pandas_dtype(dtype) 

295 

296 categories = self.categories 

297 # the super method always returns Int64Index, UInt64Index and Float64Index 

298 # but if the categories are a NumericIndex with dtype float32, we want to 

299 # return an index with the same dtype as self.categories. 

300 if categories._is_backward_compat_public_numeric_index: 

301 assert isinstance(categories, NumericIndex) # mypy complaint fix 

302 try: 

303 categories._validate_dtype(dtype) 

304 except ValueError: 

305 pass 

306 else: 

307 new_values = self._data.astype(dtype, copy=copy) 

308 # pass copy=False because any copying has been done in the 

309 # _data.astype call above 

310 return categories._constructor(new_values, name=self.name, copy=False) 

311 

312 return super().astype(dtype, copy=copy) 

313 

314 def equals(self, other: object) -> bool: 

315 """ 

316 Determine if two CategoricalIndex objects contain the same elements. 

317 

318 Returns 

319 ------- 

320 bool 

321 If two CategoricalIndex objects have equal elements True, 

322 otherwise False. 

323 """ 

324 if self.is_(other): 

325 return True 

326 

327 if not isinstance(other, Index): 

328 return False 

329 

330 try: 

331 other = self._is_dtype_compat(other) 

332 except (TypeError, ValueError): 

333 return False 

334 

335 return self._data.equals(other) 

336 

337 # -------------------------------------------------------------------- 

338 # Rendering Methods 

339 

340 @property 

341 def _formatter_func(self): 

342 return self.categories._formatter_func 

343 

344 def _format_attrs(self): 

345 """ 

346 Return a list of tuples of the (attr,formatted_value) 

347 """ 

348 attrs: list[tuple[str, str | int | bool | None]] 

349 

350 attrs = [ 

351 ( 

352 "categories", 

353 "[" + ", ".join(self._data._repr_categories()) + "]", 

354 ), 

355 ("ordered", self.ordered), 

356 ] 

357 extra = super()._format_attrs() 

358 return attrs + extra 

359 

360 def _format_with_header(self, header: list[str], na_rep: str) -> list[str]: 

361 result = [ 

362 pprint_thing(x, escape_chars=("\t", "\r", "\n")) if notna(x) else na_rep 

363 for x in self._values 

364 ] 

365 return header + result 

366 

367 # -------------------------------------------------------------------- 

368 

369 @property 

370 def inferred_type(self) -> str: 

371 return "categorical" 

372 

373 @doc(Index.__contains__) 

374 def __contains__(self, key: Any) -> bool: 

375 # if key is a NaN, check if any NaN is in self. 

376 if is_valid_na_for_dtype(key, self.categories.dtype): 

377 return self.hasnans 

378 

379 return contains(self, key, container=self._engine) 

380 

381 # TODO(2.0): remove reindex once non-unique deprecation is enforced 

382 def reindex( 

383 self, target, method=None, level=None, limit=None, tolerance=None 

384 ) -> tuple[Index, npt.NDArray[np.intp] | None]: 

385 """ 

386 Create index with target's values (move/add/delete values as necessary) 

387 

388 Returns 

389 ------- 

390 new_index : pd.Index 

391 Resulting index 

392 indexer : np.ndarray[np.intp] or None 

393 Indices of output values in original index 

394 

395 """ 

396 if method is not None: 

397 raise NotImplementedError( 

398 "argument method is not implemented for CategoricalIndex.reindex" 

399 ) 

400 if level is not None: 

401 raise NotImplementedError( 

402 "argument level is not implemented for CategoricalIndex.reindex" 

403 ) 

404 if limit is not None: 

405 raise NotImplementedError( 

406 "argument limit is not implemented for CategoricalIndex.reindex" 

407 ) 

408 

409 target = ibase.ensure_index(target) 

410 

411 if self.equals(target): 

412 indexer = None 

413 missing = np.array([], dtype=np.intp) 

414 else: 

415 indexer, missing = self.get_indexer_non_unique(target) 

416 if not self.is_unique: 

417 # GH#42568 

418 warnings.warn( 

419 "reindexing with a non-unique Index is deprecated and will " 

420 "raise in a future version.", 

421 FutureWarning, 

422 stacklevel=find_stack_level(), 

423 ) 

424 

425 new_target: Index 

426 if len(self) and indexer is not None: 

427 new_target = self.take(indexer) 

428 else: 

429 new_target = target 

430 

431 # filling in missing if needed 

432 if len(missing): 

433 cats = self.categories.get_indexer(target) 

434 

435 if not isinstance(target, CategoricalIndex) or (cats == -1).any(): 

436 new_target, indexer, _ = super()._reindex_non_unique(target) 

437 else: 

438 # error: "Index" has no attribute "codes" 

439 codes = new_target.codes.copy() # type: ignore[attr-defined] 

440 codes[indexer == -1] = cats[missing] 

441 cat = self._data._from_backing_data(codes) 

442 new_target = type(self)._simple_new(cat, name=self.name) 

443 

444 # we always want to return an Index type here 

445 # to be consistent with .reindex for other index types (e.g. they don't 

446 # coerce based on the actual values, only on the dtype) 

447 # unless we had an initial Categorical to begin with 

448 # in which case we are going to conform to the passed Categorical 

449 if is_categorical_dtype(target): 

450 cat = Categorical(new_target, dtype=target.dtype) 

451 new_target = type(self)._simple_new(cat, name=self.name) 

452 else: 

453 # e.g. test_reindex_with_categoricalindex, test_reindex_duplicate_target 

454 new_target_array = np.asarray(new_target) 

455 new_target = Index._with_infer(new_target_array, name=self.name) 

456 

457 return new_target, indexer 

458 

459 # -------------------------------------------------------------------- 

460 # Indexing Methods 

461 

462 def _maybe_cast_indexer(self, key) -> int: 

463 # GH#41933: we have to do this instead of self._data._validate_scalar 

464 # because this will correctly get partial-indexing on Interval categories 

465 try: 

466 return self._data._unbox_scalar(key) 

467 except KeyError: 

468 if is_valid_na_for_dtype(key, self.categories.dtype): 

469 return -1 

470 raise 

471 

472 def _maybe_cast_listlike_indexer(self, values) -> CategoricalIndex: 

473 if isinstance(values, CategoricalIndex): 

474 values = values._data 

475 if isinstance(values, Categorical): 

476 # Indexing on codes is more efficient if categories are the same, 

477 # so we can apply some optimizations based on the degree of 

478 # dtype-matching. 

479 cat = self._data._encode_with_my_categories(values) 

480 codes = cat._codes 

481 else: 

482 codes = self.categories.get_indexer(values) 

483 codes = codes.astype(self.codes.dtype, copy=False) 

484 cat = self._data._from_backing_data(codes) 

485 return type(self)._simple_new(cat) 

486 

487 # -------------------------------------------------------------------- 

488 

489 def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: 

490 return self.categories._is_comparable_dtype(dtype) 

491 

492 def take_nd(self, *args, **kwargs) -> CategoricalIndex: 

493 """Alias for `take`""" 

494 warnings.warn( 

495 "CategoricalIndex.take_nd is deprecated, use CategoricalIndex.take " 

496 "instead.", 

497 FutureWarning, 

498 stacklevel=find_stack_level(), 

499 ) 

500 return self.take(*args, **kwargs) 

501 

502 def map(self, mapper): 

503 """ 

504 Map values using input an input mapping or function. 

505 

506 Maps the values (their categories, not the codes) of the index to new 

507 categories. If the mapping correspondence is one-to-one the result is a 

508 :class:`~pandas.CategoricalIndex` which has the same order property as 

509 the original, otherwise an :class:`~pandas.Index` is returned. 

510 

511 If a `dict` or :class:`~pandas.Series` is used any unmapped category is 

512 mapped to `NaN`. Note that if this happens an :class:`~pandas.Index` 

513 will be returned. 

514 

515 Parameters 

516 ---------- 

517 mapper : function, dict, or Series 

518 Mapping correspondence. 

519 

520 Returns 

521 ------- 

522 pandas.CategoricalIndex or pandas.Index 

523 Mapped index. 

524 

525 See Also 

526 -------- 

527 Index.map : Apply a mapping correspondence on an 

528 :class:`~pandas.Index`. 

529 Series.map : Apply a mapping correspondence on a 

530 :class:`~pandas.Series`. 

531 Series.apply : Apply more complex functions on a 

532 :class:`~pandas.Series`. 

533 

534 Examples 

535 -------- 

536 >>> idx = pd.CategoricalIndex(['a', 'b', 'c']) 

537 >>> idx 

538 CategoricalIndex(['a', 'b', 'c'], categories=['a', 'b', 'c'], 

539 ordered=False, dtype='category') 

540 >>> idx.map(lambda x: x.upper()) 

541 CategoricalIndex(['A', 'B', 'C'], categories=['A', 'B', 'C'], 

542 ordered=False, dtype='category') 

543 >>> idx.map({'a': 'first', 'b': 'second', 'c': 'third'}) 

544 CategoricalIndex(['first', 'second', 'third'], categories=['first', 

545 'second', 'third'], ordered=False, dtype='category') 

546 

547 If the mapping is one-to-one the ordering of the categories is 

548 preserved: 

549 

550 >>> idx = pd.CategoricalIndex(['a', 'b', 'c'], ordered=True) 

551 >>> idx 

552 CategoricalIndex(['a', 'b', 'c'], categories=['a', 'b', 'c'], 

553 ordered=True, dtype='category') 

554 >>> idx.map({'a': 3, 'b': 2, 'c': 1}) 

555 CategoricalIndex([3, 2, 1], categories=[3, 2, 1], ordered=True, 

556 dtype='category') 

557 

558 If the mapping is not one-to-one an :class:`~pandas.Index` is returned: 

559 

560 >>> idx.map({'a': 'first', 'b': 'second', 'c': 'first'}) 

561 Index(['first', 'second', 'first'], dtype='object') 

562 

563 If a `dict` is used, all unmapped categories are mapped to `NaN` and 

564 the result is an :class:`~pandas.Index`: 

565 

566 >>> idx.map({'a': 'first', 'b': 'second'}) 

567 Index(['first', 'second', nan], dtype='object') 

568 """ 

569 mapped = self._values.map(mapper) 

570 return Index(mapped, name=self.name) 

571 

572 def _concat(self, to_concat: list[Index], name: Hashable) -> Index: 

573 # if calling index is category, don't check dtype of others 

574 try: 

575 cat = Categorical._concat_same_type( 

576 [self._is_dtype_compat(c) for c in to_concat] 

577 ) 

578 except TypeError: 

579 # not all to_concat elements are among our categories (or NA) 

580 from pandas.core.dtypes.concat import concat_compat 

581 

582 res = concat_compat([x._values for x in to_concat]) 

583 return Index(res, name=name) 

584 else: 

585 return type(self)._simple_new(cat, name=name)