Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/sorting.py: 8%

255 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1""" miscellaneous sorting / groupby utilities """ 

2from __future__ import annotations 

3 

4from collections import defaultdict 

5from typing import ( 

6 TYPE_CHECKING, 

7 Callable, 

8 DefaultDict, 

9 Hashable, 

10 Iterable, 

11 Sequence, 

12 cast, 

13) 

14import warnings 

15 

16import numpy as np 

17 

18from pandas._libs import ( 

19 algos, 

20 hashtable, 

21 lib, 

22) 

23from pandas._libs.hashtable import unique_label_indices 

24from pandas._typing import ( 

25 IndexKeyFunc, 

26 Level, 

27 NaPosition, 

28 Shape, 

29 SortKind, 

30 npt, 

31) 

32 

33from pandas.core.dtypes.common import ( 

34 ensure_int64, 

35 ensure_platform_int, 

36 is_extension_array_dtype, 

37) 

38from pandas.core.dtypes.generic import ( 

39 ABCMultiIndex, 

40 ABCRangeIndex, 

41) 

42from pandas.core.dtypes.missing import isna 

43 

44from pandas.core.construction import extract_array 

45 

46if TYPE_CHECKING: 46 ↛ 47line 46 didn't jump to line 47, because the condition on line 46 was never true

47 from pandas import MultiIndex 

48 from pandas.core.arrays import ExtensionArray 

49 from pandas.core.indexes.base import Index 

50 

51 

52def get_indexer_indexer( 

53 target: Index, 

54 level: Level | list[Level] | None, 

55 ascending: Sequence[bool] | bool, 

56 kind: SortKind, 

57 na_position: NaPosition, 

58 sort_remaining: bool, 

59 key: IndexKeyFunc, 

60) -> npt.NDArray[np.intp] | None: 

61 """ 

62 Helper method that return the indexer according to input parameters for 

63 the sort_index method of DataFrame and Series. 

64 

65 Parameters 

66 ---------- 

67 target : Index 

68 level : int or level name or list of ints or list of level names 

69 ascending : bool or list of bools, default True 

70 kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, default 'quicksort' 

71 na_position : {'first', 'last'}, default 'last' 

72 sort_remaining : bool, default True 

73 key : callable, optional 

74 

75 Returns 

76 ------- 

77 Optional[ndarray[intp]] 

78 The indexer for the new index. 

79 """ 

80 

81 target = ensure_key_mapped(target, key, levels=level) 

82 target = target._sort_levels_monotonic() 

83 

84 if level is not None: 

85 _, indexer = target.sortlevel( 

86 level, ascending=ascending, sort_remaining=sort_remaining 

87 ) 

88 elif isinstance(target, ABCMultiIndex): 

89 indexer = lexsort_indexer( 

90 target._get_codes_for_sorting(), orders=ascending, na_position=na_position 

91 ) 

92 else: 

93 # Check monotonic-ness before sort an index (GH 11080) 

94 if (ascending and target.is_monotonic_increasing) or ( 

95 not ascending and target.is_monotonic_decreasing 

96 ): 

97 return None 

98 

99 # ascending can only be a Sequence for MultiIndex 

100 indexer = nargsort( 

101 target, 

102 kind=kind, 

103 ascending=cast(bool, ascending), 

104 na_position=na_position, 

105 ) 

106 return indexer 

107 

108 

109def get_group_index( 

110 labels, shape: Shape, sort: bool, xnull: bool 

111) -> npt.NDArray[np.int64]: 

112 """ 

113 For the particular label_list, gets the offsets into the hypothetical list 

114 representing the totally ordered cartesian product of all possible label 

115 combinations, *as long as* this space fits within int64 bounds; 

116 otherwise, though group indices identify unique combinations of 

117 labels, they cannot be deconstructed. 

118 - If `sort`, rank of returned ids preserve lexical ranks of labels. 

119 i.e. returned id's can be used to do lexical sort on labels; 

120 - If `xnull` nulls (-1 labels) are passed through. 

121 

122 Parameters 

123 ---------- 

124 labels : sequence of arrays 

125 Integers identifying levels at each location 

126 shape : tuple[int, ...] 

127 Number of unique levels at each location 

128 sort : bool 

129 If the ranks of returned ids should match lexical ranks of labels 

130 xnull : bool 

131 If true nulls are excluded. i.e. -1 values in the labels are 

132 passed through. 

133 

134 Returns 

135 ------- 

136 An array of type int64 where two elements are equal if their corresponding 

137 labels are equal at all location. 

138 

139 Notes 

140 ----- 

141 The length of `labels` and `shape` must be identical. 

142 """ 

143 

144 def _int64_cut_off(shape) -> int: 

145 acc = 1 

146 for i, mul in enumerate(shape): 

147 acc *= int(mul) 

148 if not acc < lib.i8max: 

149 return i 

150 return len(shape) 

151 

152 def maybe_lift(lab, size) -> tuple[np.ndarray, int]: 

153 # promote nan values (assigned -1 label in lab array) 

154 # so that all output values are non-negative 

155 return (lab + 1, size + 1) if (lab == -1).any() else (lab, size) 

156 

157 labels = [ensure_int64(x) for x in labels] 

158 lshape = list(shape) 

159 if not xnull: 

160 for i, (lab, size) in enumerate(zip(labels, shape)): 

161 lab, size = maybe_lift(lab, size) 

162 labels[i] = lab 

163 lshape[i] = size 

164 

165 labels = list(labels) 

166 

167 # Iteratively process all the labels in chunks sized so less 

168 # than lib.i8max unique int ids will be required for each chunk 

169 while True: 

170 # how many levels can be done without overflow: 

171 nlev = _int64_cut_off(lshape) 

172 

173 # compute flat ids for the first `nlev` levels 

174 stride = np.prod(lshape[1:nlev], dtype="i8") 

175 out = stride * labels[0].astype("i8", subok=False, copy=False) 

176 

177 for i in range(1, nlev): 

178 if lshape[i] == 0: 

179 stride = np.int64(0) 

180 else: 

181 stride //= lshape[i] 

182 out += labels[i] * stride 

183 

184 if xnull: # exclude nulls 

185 mask = labels[0] == -1 

186 for lab in labels[1:nlev]: 

187 mask |= lab == -1 

188 out[mask] = -1 

189 

190 if nlev == len(lshape): # all levels done! 

191 break 

192 

193 # compress what has been done so far in order to avoid overflow 

194 # to retain lexical ranks, obs_ids should be sorted 

195 comp_ids, obs_ids = compress_group_index(out, sort=sort) 

196 

197 labels = [comp_ids] + labels[nlev:] 

198 lshape = [len(obs_ids)] + lshape[nlev:] 

199 

200 return out 

201 

202 

203def get_compressed_ids( 

204 labels, sizes: Shape 

205) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.int64]]: 

206 """ 

207 Group_index is offsets into cartesian product of all possible labels. This 

208 space can be huge, so this function compresses it, by computing offsets 

209 (comp_ids) into the list of unique labels (obs_group_ids). 

210 

211 Parameters 

212 ---------- 

213 labels : list of label arrays 

214 sizes : tuple[int] of size of the levels 

215 

216 Returns 

217 ------- 

218 np.ndarray[np.intp] 

219 comp_ids 

220 np.ndarray[np.int64] 

221 obs_group_ids 

222 """ 

223 ids = get_group_index(labels, sizes, sort=True, xnull=False) 

224 return compress_group_index(ids, sort=True) 

225 

226 

227def is_int64_overflow_possible(shape: Shape) -> bool: 

228 the_prod = 1 

229 for x in shape: 

230 the_prod *= int(x) 

231 

232 return the_prod >= lib.i8max 

233 

234 

235def _decons_group_index( 

236 comp_labels: npt.NDArray[np.intp], shape: Shape 

237) -> list[npt.NDArray[np.intp]]: 

238 # reconstruct labels 

239 if is_int64_overflow_possible(shape): 

240 # at some point group indices are factorized, 

241 # and may not be deconstructed here! wrong path! 

242 raise ValueError("cannot deconstruct factorized group indices!") 

243 

244 label_list = [] 

245 factor = 1 

246 y = np.array(0) 

247 x = comp_labels 

248 for i in reversed(range(len(shape))): 

249 labels = (x - y) % (factor * shape[i]) // factor 

250 np.putmask(labels, comp_labels < 0, -1) 

251 label_list.append(labels) 

252 y = labels * factor 

253 factor *= shape[i] 

254 return label_list[::-1] 

255 

256 

257def decons_obs_group_ids( 

258 comp_ids: npt.NDArray[np.intp], 

259 obs_ids: npt.NDArray[np.intp], 

260 shape: Shape, 

261 labels: Sequence[npt.NDArray[np.signedinteger]], 

262 xnull: bool, 

263) -> list[npt.NDArray[np.intp]]: 

264 """ 

265 Reconstruct labels from observed group ids. 

266 

267 Parameters 

268 ---------- 

269 comp_ids : np.ndarray[np.intp] 

270 obs_ids: np.ndarray[np.intp] 

271 shape : tuple[int] 

272 labels : Sequence[np.ndarray[np.signedinteger]] 

273 xnull : bool 

274 If nulls are excluded; i.e. -1 labels are passed through. 

275 """ 

276 if not xnull: 

277 lift = np.fromiter(((a == -1).any() for a in labels), dtype=np.intp) 

278 arr_shape = np.asarray(shape, dtype=np.intp) + lift 

279 shape = tuple(arr_shape) 

280 

281 if not is_int64_overflow_possible(shape): 

282 # obs ids are deconstructable! take the fast route! 

283 out = _decons_group_index(obs_ids, shape) 

284 return out if xnull or not lift.any() else [x - y for x, y in zip(out, lift)] 

285 

286 indexer = unique_label_indices(comp_ids) 

287 return [lab[indexer].astype(np.intp, subok=False, copy=True) for lab in labels] 

288 

289 

290def indexer_from_factorized( 

291 labels, shape: Shape, compress: bool = True 

292) -> npt.NDArray[np.intp]: 

293 ids = get_group_index(labels, shape, sort=True, xnull=False) 

294 

295 if not compress: 

296 ngroups = (ids.size and ids.max()) + 1 

297 else: 

298 ids, obs = compress_group_index(ids, sort=True) 

299 ngroups = len(obs) 

300 

301 return get_group_index_sorter(ids, ngroups) 

302 

303 

304def lexsort_indexer( 

305 keys, orders=None, na_position: str = "last", key: Callable | None = None 

306) -> npt.NDArray[np.intp]: 

307 """ 

308 Performs lexical sorting on a set of keys 

309 

310 Parameters 

311 ---------- 

312 keys : sequence of arrays 

313 Sequence of ndarrays to be sorted by the indexer 

314 orders : bool or list of booleans, optional 

315 Determines the sorting order for each element in keys. If a list, 

316 it must be the same length as keys. This determines whether the 

317 corresponding element in keys should be sorted in ascending 

318 (True) or descending (False) order. if bool, applied to all 

319 elements as above. if None, defaults to True. 

320 na_position : {'first', 'last'}, default 'last' 

321 Determines placement of NA elements in the sorted list ("last" or "first") 

322 key : Callable, optional 

323 Callable key function applied to every element in keys before sorting 

324 

325 .. versionadded:: 1.0.0 

326 

327 Returns 

328 ------- 

329 np.ndarray[np.intp] 

330 """ 

331 from pandas.core.arrays import Categorical 

332 

333 labels = [] 

334 shape = [] 

335 if isinstance(orders, bool): 

336 orders = [orders] * len(keys) 

337 elif orders is None: 

338 orders = [True] * len(keys) 

339 

340 keys = [ensure_key_mapped(k, key) for k in keys] 

341 

342 for k, order in zip(keys, orders): 

343 with warnings.catch_warnings(): 

344 # TODO(2.0): unnecessary once deprecation is enforced 

345 # GH#45618 don't issue warning user can't do anything about 

346 warnings.filterwarnings( 

347 "ignore", ".*(SparseArray|SparseDtype).*", category=FutureWarning 

348 ) 

349 

350 cat = Categorical(k, ordered=True) 

351 

352 if na_position not in ["last", "first"]: 

353 raise ValueError(f"invalid na_position: {na_position}") 

354 

355 n = len(cat.categories) 

356 codes = cat.codes.copy() 

357 

358 mask = cat.codes == -1 

359 if order: # ascending 

360 if na_position == "last": 

361 codes = np.where(mask, n, codes) 

362 elif na_position == "first": 

363 codes += 1 

364 else: # not order means descending 

365 if na_position == "last": 

366 codes = np.where(mask, n, n - codes - 1) 

367 elif na_position == "first": 

368 codes = np.where(mask, 0, n - codes) 

369 if mask.any(): 

370 n += 1 

371 

372 shape.append(n) 

373 labels.append(codes) 

374 

375 return indexer_from_factorized(labels, tuple(shape)) 

376 

377 

378def nargsort( 

379 items, 

380 kind: str = "quicksort", 

381 ascending: bool = True, 

382 na_position: str = "last", 

383 key: Callable | None = None, 

384 mask: npt.NDArray[np.bool_] | None = None, 

385) -> npt.NDArray[np.intp]: 

386 """ 

387 Intended to be a drop-in replacement for np.argsort which handles NaNs. 

388 

389 Adds ascending, na_position, and key parameters. 

390 

391 (GH #6399, #5231, #27237) 

392 

393 Parameters 

394 ---------- 

395 kind : str, default 'quicksort' 

396 ascending : bool, default True 

397 na_position : {'first', 'last'}, default 'last' 

398 key : Optional[Callable], default None 

399 mask : Optional[np.ndarray[bool]], default None 

400 Passed when called by ExtensionArray.argsort. 

401 

402 Returns 

403 ------- 

404 np.ndarray[np.intp] 

405 """ 

406 

407 if key is not None: 

408 items = ensure_key_mapped(items, key) 

409 return nargsort( 

410 items, 

411 kind=kind, 

412 ascending=ascending, 

413 na_position=na_position, 

414 key=None, 

415 mask=mask, 

416 ) 

417 

418 if isinstance(items, ABCRangeIndex): 

419 return items.argsort(ascending=ascending) # TODO: test coverage with key? 

420 elif not isinstance(items, ABCMultiIndex): 

421 items = extract_array(items) 

422 if mask is None: 

423 mask = np.asarray(isna(items)) # TODO: does this exclude MultiIndex too? 

424 

425 if is_extension_array_dtype(items): 

426 return items.argsort(ascending=ascending, kind=kind, na_position=na_position) 

427 else: 

428 items = np.asanyarray(items) 

429 

430 idx = np.arange(len(items)) 

431 non_nans = items[~mask] 

432 non_nan_idx = idx[~mask] 

433 

434 nan_idx = np.nonzero(mask)[0] 

435 if not ascending: 

436 non_nans = non_nans[::-1] 

437 non_nan_idx = non_nan_idx[::-1] 

438 indexer = non_nan_idx[non_nans.argsort(kind=kind)] 

439 if not ascending: 

440 indexer = indexer[::-1] 

441 # Finally, place the NaNs at the end or the beginning according to 

442 # na_position 

443 if na_position == "last": 

444 indexer = np.concatenate([indexer, nan_idx]) 

445 elif na_position == "first": 

446 indexer = np.concatenate([nan_idx, indexer]) 

447 else: 

448 raise ValueError(f"invalid na_position: {na_position}") 

449 return ensure_platform_int(indexer) 

450 

451 

452def nargminmax(values: ExtensionArray, method: str, axis: int = 0): 

453 """ 

454 Implementation of np.argmin/argmax but for ExtensionArray and which 

455 handles missing values. 

456 

457 Parameters 

458 ---------- 

459 values : ExtensionArray 

460 method : {"argmax", "argmin"} 

461 axis : int, default 0 

462 

463 Returns 

464 ------- 

465 int 

466 """ 

467 assert method in {"argmax", "argmin"} 

468 func = np.argmax if method == "argmax" else np.argmin 

469 

470 mask = np.asarray(isna(values)) 

471 arr_values = values._values_for_argsort() 

472 

473 if arr_values.ndim > 1: 

474 if mask.any(): 

475 if axis == 1: 

476 zipped = zip(arr_values, mask) 

477 else: 

478 zipped = zip(arr_values.T, mask.T) 

479 return np.array([_nanargminmax(v, m, func) for v, m in zipped]) 

480 return func(arr_values, axis=axis) 

481 

482 return _nanargminmax(arr_values, mask, func) 

483 

484 

485def _nanargminmax(values: np.ndarray, mask: npt.NDArray[np.bool_], func) -> int: 

486 """ 

487 See nanargminmax.__doc__. 

488 """ 

489 idx = np.arange(values.shape[0]) 

490 non_nans = values[~mask] 

491 non_nan_idx = idx[~mask] 

492 

493 return non_nan_idx[func(non_nans)] 

494 

495 

496def _ensure_key_mapped_multiindex( 

497 index: MultiIndex, key: Callable, level=None 

498) -> MultiIndex: 

499 """ 

500 Returns a new MultiIndex in which key has been applied 

501 to all levels specified in level (or all levels if level 

502 is None). Used for key sorting for MultiIndex. 

503 

504 Parameters 

505 ---------- 

506 index : MultiIndex 

507 Index to which to apply the key function on the 

508 specified levels. 

509 key : Callable 

510 Function that takes an Index and returns an Index of 

511 the same shape. This key is applied to each level 

512 separately. The name of the level can be used to 

513 distinguish different levels for application. 

514 level : list-like, int or str, default None 

515 Level or list of levels to apply the key function to. 

516 If None, key function is applied to all levels. Other 

517 levels are left unchanged. 

518 

519 Returns 

520 ------- 

521 labels : MultiIndex 

522 Resulting MultiIndex with modified levels. 

523 """ 

524 

525 if level is not None: 

526 if isinstance(level, (str, int)): 

527 sort_levels = [level] 

528 else: 

529 sort_levels = level 

530 

531 sort_levels = [index._get_level_number(lev) for lev in sort_levels] 

532 else: 

533 sort_levels = list(range(index.nlevels)) # satisfies mypy 

534 

535 mapped = [ 

536 ensure_key_mapped(index._get_level_values(level), key) 

537 if level in sort_levels 

538 else index._get_level_values(level) 

539 for level in range(index.nlevels) 

540 ] 

541 

542 return type(index).from_arrays(mapped) 

543 

544 

545def ensure_key_mapped(values, key: Callable | None, levels=None): 

546 """ 

547 Applies a callable key function to the values function and checks 

548 that the resulting value has the same shape. Can be called on Index 

549 subclasses, Series, DataFrames, or ndarrays. 

550 

551 Parameters 

552 ---------- 

553 values : Series, DataFrame, Index subclass, or ndarray 

554 key : Optional[Callable], key to be called on the values array 

555 levels : Optional[List], if values is a MultiIndex, list of levels to 

556 apply the key to. 

557 """ 

558 from pandas.core.indexes.api import Index 

559 

560 if not key: 

561 return values 

562 

563 if isinstance(values, ABCMultiIndex): 

564 return _ensure_key_mapped_multiindex(values, key, level=levels) 

565 

566 result = key(values.copy()) 

567 if len(result) != len(values): 

568 raise ValueError( 

569 "User-provided `key` function must not change the shape of the array." 

570 ) 

571 

572 try: 

573 if isinstance( 

574 values, Index 

575 ): # convert to a new Index subclass, not necessarily the same 

576 result = Index(result) 

577 else: 

578 type_of_values = type(values) 

579 result = type_of_values(result) # try to revert to original type otherwise 

580 except TypeError: 

581 raise TypeError( 

582 f"User-provided `key` function returned an invalid type {type(result)} \ 

583 which could not be converted to {type(values)}." 

584 ) 

585 

586 return result 

587 

588 

589def get_flattened_list( 

590 comp_ids: npt.NDArray[np.intp], 

591 ngroups: int, 

592 levels: Iterable[Index], 

593 labels: Iterable[np.ndarray], 

594) -> list[tuple]: 

595 """Map compressed group id -> key tuple.""" 

596 comp_ids = comp_ids.astype(np.int64, copy=False) 

597 arrays: DefaultDict[int, list[int]] = defaultdict(list) 

598 for labs, level in zip(labels, levels): 

599 table = hashtable.Int64HashTable(ngroups) 

600 table.map_keys_to_values(comp_ids, labs.astype(np.int64, copy=False)) 

601 for i in range(ngroups): 

602 arrays[i].append(level[table.get_item(i)]) 

603 return [tuple(array) for array in arrays.values()] 

604 

605 

606def get_indexer_dict( 

607 label_list: list[np.ndarray], keys: list[Index] 

608) -> dict[Hashable, npt.NDArray[np.intp]]: 

609 """ 

610 Returns 

611 ------- 

612 dict: 

613 Labels mapped to indexers. 

614 """ 

615 shape = tuple(len(x) for x in keys) 

616 

617 group_index = get_group_index(label_list, shape, sort=True, xnull=True) 

618 if np.all(group_index == -1): 

619 # Short-circuit, lib.indices_fast will return the same 

620 return {} 

621 ngroups = ( 

622 ((group_index.size and group_index.max()) + 1) 

623 if is_int64_overflow_possible(shape) 

624 else np.prod(shape, dtype="i8") 

625 ) 

626 

627 sorter = get_group_index_sorter(group_index, ngroups) 

628 

629 sorted_labels = [lab.take(sorter) for lab in label_list] 

630 group_index = group_index.take(sorter) 

631 

632 return lib.indices_fast(sorter, group_index, keys, sorted_labels) 

633 

634 

635# ---------------------------------------------------------------------- 

636# sorting levels...cleverly? 

637 

638 

639def get_group_index_sorter( 

640 group_index: npt.NDArray[np.intp], ngroups: int | None = None 

641) -> npt.NDArray[np.intp]: 

642 """ 

643 algos.groupsort_indexer implements `counting sort` and it is at least 

644 O(ngroups), where 

645 ngroups = prod(shape) 

646 shape = map(len, keys) 

647 that is, linear in the number of combinations (cartesian product) of unique 

648 values of groupby keys. This can be huge when doing multi-key groupby. 

649 np.argsort(kind='mergesort') is O(count x log(count)) where count is the 

650 length of the data-frame; 

651 Both algorithms are `stable` sort and that is necessary for correctness of 

652 groupby operations. e.g. consider: 

653 df.groupby(key)[col].transform('first') 

654 

655 Parameters 

656 ---------- 

657 group_index : np.ndarray[np.intp] 

658 signed integer dtype 

659 ngroups : int or None, default None 

660 

661 Returns 

662 ------- 

663 np.ndarray[np.intp] 

664 """ 

665 if ngroups is None: 

666 ngroups = 1 + group_index.max() 

667 count = len(group_index) 

668 alpha = 0.0 # taking complexities literally; there may be 

669 beta = 1.0 # some room for fine-tuning these parameters 

670 do_groupsort = count > 0 and ((alpha + beta * ngroups) < (count * np.log(count))) 

671 if do_groupsort: 

672 sorter, _ = algos.groupsort_indexer( 

673 ensure_platform_int(group_index), 

674 ngroups, 

675 ) 

676 # sorter _should_ already be intp, but mypy is not yet able to verify 

677 else: 

678 sorter = group_index.argsort(kind="mergesort") 

679 return ensure_platform_int(sorter) 

680 

681 

682def compress_group_index( 

683 group_index: npt.NDArray[np.int64], sort: bool = True 

684) -> tuple[npt.NDArray[np.int64], npt.NDArray[np.int64]]: 

685 """ 

686 Group_index is offsets into cartesian product of all possible labels. This 

687 space can be huge, so this function compresses it, by computing offsets 

688 (comp_ids) into the list of unique labels (obs_group_ids). 

689 """ 

690 size_hint = len(group_index) 

691 table = hashtable.Int64HashTable(size_hint) 

692 

693 group_index = ensure_int64(group_index) 

694 

695 # note, group labels come out ascending (ie, 1,2,3 etc) 

696 comp_ids, obs_group_ids = table.get_labels_groupby(group_index) 

697 

698 if sort and len(obs_group_ids) > 0: 

699 obs_group_ids, comp_ids = _reorder_by_uniques(obs_group_ids, comp_ids) 

700 

701 return ensure_int64(comp_ids), ensure_int64(obs_group_ids) 

702 

703 

704def _reorder_by_uniques( 

705 uniques: npt.NDArray[np.int64], labels: npt.NDArray[np.intp] 

706) -> tuple[npt.NDArray[np.int64], npt.NDArray[np.intp]]: 

707 """ 

708 Parameters 

709 ---------- 

710 uniques : np.ndarray[np.int64] 

711 labels : np.ndarray[np.intp] 

712 

713 Returns 

714 ------- 

715 np.ndarray[np.int64] 

716 np.ndarray[np.intp] 

717 """ 

718 # sorter is index where elements ought to go 

719 sorter = uniques.argsort() 

720 

721 # reverse_indexer is where elements came from 

722 reverse_indexer = np.empty(len(sorter), dtype=np.intp) 

723 reverse_indexer.put(sorter, np.arange(len(sorter))) 

724 

725 mask = labels < 0 

726 

727 # move labels to right locations (ie, unsort ascending labels) 

728 labels = reverse_indexer.take(labels) 

729 np.putmask(labels, mask, -1) 

730 

731 # sort observed ids 

732 uniques = uniques.take(sorter) 

733 

734 return uniques, labels