Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/indexers/utils.py: 9%

150 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1""" 

2Low-dependency indexing utilities. 

3""" 

4from __future__ import annotations 

5 

6from typing import ( 

7 TYPE_CHECKING, 

8 Any, 

9) 

10import warnings 

11 

12import numpy as np 

13 

14from pandas._typing import AnyArrayLike 

15from pandas.util._exceptions import find_stack_level 

16 

17from pandas.core.dtypes.common import ( 

18 is_array_like, 

19 is_bool_dtype, 

20 is_extension_array_dtype, 

21 is_integer, 

22 is_integer_dtype, 

23 is_list_like, 

24) 

25from pandas.core.dtypes.generic import ( 

26 ABCIndex, 

27 ABCSeries, 

28) 

29 

30if TYPE_CHECKING: 30 ↛ 31line 30 didn't jump to line 31, because the condition on line 30 was never true

31 from pandas.core.frame import DataFrame 

32 from pandas.core.indexes.base import Index 

33 

34# ----------------------------------------------------------- 

35# Indexer Identification 

36 

37 

38def is_valid_positional_slice(slc: slice) -> bool: 

39 """ 

40 Check if a slice object can be interpreted as a positional indexer. 

41 

42 Parameters 

43 ---------- 

44 slc : slice 

45 

46 Returns 

47 ------- 

48 bool 

49 

50 Notes 

51 ----- 

52 A valid positional slice may also be interpreted as a label-based slice 

53 depending on the index being sliced. 

54 """ 

55 

56 def is_int_or_none(val): 

57 return val is None or is_integer(val) 

58 

59 return ( 

60 is_int_or_none(slc.start) 

61 and is_int_or_none(slc.stop) 

62 and is_int_or_none(slc.step) 

63 ) 

64 

65 

66def is_list_like_indexer(key) -> bool: 

67 """ 

68 Check if we have a list-like indexer that is *not* a NamedTuple. 

69 

70 Parameters 

71 ---------- 

72 key : object 

73 

74 Returns 

75 ------- 

76 bool 

77 """ 

78 # allow a list_like, but exclude NamedTuples which can be indexers 

79 return is_list_like(key) and not (isinstance(key, tuple) and type(key) is not tuple) 

80 

81 

82def is_scalar_indexer(indexer, ndim: int) -> bool: 

83 """ 

84 Return True if we are all scalar indexers. 

85 

86 Parameters 

87 ---------- 

88 indexer : object 

89 ndim : int 

90 Number of dimensions in the object being indexed. 

91 

92 Returns 

93 ------- 

94 bool 

95 """ 

96 if ndim == 1 and is_integer(indexer): 

97 # GH37748: allow indexer to be an integer for Series 

98 return True 

99 if isinstance(indexer, tuple) and len(indexer) == ndim: 

100 return all(is_integer(x) for x in indexer) 

101 return False 

102 

103 

104def is_empty_indexer(indexer) -> bool: 

105 """ 

106 Check if we have an empty indexer. 

107 

108 Parameters 

109 ---------- 

110 indexer : object 

111 

112 Returns 

113 ------- 

114 bool 

115 """ 

116 if is_list_like(indexer) and not len(indexer): 

117 return True 

118 if not isinstance(indexer, tuple): 

119 indexer = (indexer,) 

120 return any(isinstance(idx, np.ndarray) and len(idx) == 0 for idx in indexer) 

121 

122 

123# ----------------------------------------------------------- 

124# Indexer Validation 

125 

126 

127def check_setitem_lengths(indexer, value, values) -> bool: 

128 """ 

129 Validate that value and indexer are the same length. 

130 

131 An special-case is allowed for when the indexer is a boolean array 

132 and the number of true values equals the length of ``value``. In 

133 this case, no exception is raised. 

134 

135 Parameters 

136 ---------- 

137 indexer : sequence 

138 Key for the setitem. 

139 value : array-like 

140 Value for the setitem. 

141 values : array-like 

142 Values being set into. 

143 

144 Returns 

145 ------- 

146 bool 

147 Whether this is an empty listlike setting which is a no-op. 

148 

149 Raises 

150 ------ 

151 ValueError 

152 When the indexer is an ndarray or list and the lengths don't match. 

153 """ 

154 no_op = False 

155 

156 if isinstance(indexer, (np.ndarray, list)): 

157 # We can ignore other listlikes because they are either 

158 # a) not necessarily 1-D indexers, e.g. tuple 

159 # b) boolean indexers e.g. BoolArray 

160 if is_list_like(value): 

161 if len(indexer) != len(value) and values.ndim == 1: 

162 # boolean with truth values == len of the value is ok too 

163 if isinstance(indexer, list): 

164 indexer = np.array(indexer) 

165 if not ( 

166 isinstance(indexer, np.ndarray) 

167 and indexer.dtype == np.bool_ 

168 and indexer.sum() == len(value) 

169 ): 

170 raise ValueError( 

171 "cannot set using a list-like indexer " 

172 "with a different length than the value" 

173 ) 

174 if not len(indexer): 

175 no_op = True 

176 

177 elif isinstance(indexer, slice): 

178 if is_list_like(value): 

179 if len(value) != length_of_indexer(indexer, values) and values.ndim == 1: 

180 # In case of two dimensional value is used row-wise and broadcasted 

181 raise ValueError( 

182 "cannot set using a slice indexer with a " 

183 "different length than the value" 

184 ) 

185 if not len(value): 

186 no_op = True 

187 

188 return no_op 

189 

190 

191def validate_indices(indices: np.ndarray, n: int) -> None: 

192 """ 

193 Perform bounds-checking for an indexer. 

194 

195 -1 is allowed for indicating missing values. 

196 

197 Parameters 

198 ---------- 

199 indices : ndarray 

200 n : int 

201 Length of the array being indexed. 

202 

203 Raises 

204 ------ 

205 ValueError 

206 

207 Examples 

208 -------- 

209 >>> validate_indices(np.array([1, 2]), 3) # OK 

210 

211 >>> validate_indices(np.array([1, -2]), 3) 

212 Traceback (most recent call last): 

213 ... 

214 ValueError: negative dimensions are not allowed 

215 

216 >>> validate_indices(np.array([1, 2, 3]), 3) 

217 Traceback (most recent call last): 

218 ... 

219 IndexError: indices are out-of-bounds 

220 

221 >>> validate_indices(np.array([-1, -1]), 0) # OK 

222 

223 >>> validate_indices(np.array([0, 1]), 0) 

224 Traceback (most recent call last): 

225 ... 

226 IndexError: indices are out-of-bounds 

227 """ 

228 if len(indices): 

229 min_idx = indices.min() 

230 if min_idx < -1: 

231 msg = f"'indices' contains values less than allowed ({min_idx} < -1)" 

232 raise ValueError(msg) 

233 

234 max_idx = indices.max() 

235 if max_idx >= n: 

236 raise IndexError("indices are out-of-bounds") 

237 

238 

239# ----------------------------------------------------------- 

240# Indexer Conversion 

241 

242 

243def maybe_convert_indices(indices, n: int, verify: bool = True) -> np.ndarray: 

244 """ 

245 Attempt to convert indices into valid, positive indices. 

246 

247 If we have negative indices, translate to positive here. 

248 If we have indices that are out-of-bounds, raise an IndexError. 

249 

250 Parameters 

251 ---------- 

252 indices : array-like 

253 Array of indices that we are to convert. 

254 n : int 

255 Number of elements in the array that we are indexing. 

256 verify : bool, default True 

257 Check that all entries are between 0 and n - 1, inclusive. 

258 

259 Returns 

260 ------- 

261 array-like 

262 An array-like of positive indices that correspond to the ones 

263 that were passed in initially to this function. 

264 

265 Raises 

266 ------ 

267 IndexError 

268 One of the converted indices either exceeded the number of, 

269 elements (specified by `n`), or was still negative. 

270 """ 

271 if isinstance(indices, list): 

272 indices = np.array(indices) 

273 if len(indices) == 0: 

274 # If `indices` is empty, np.array will return a float, 

275 # and will cause indexing errors. 

276 return np.empty(0, dtype=np.intp) 

277 

278 mask = indices < 0 

279 if mask.any(): 

280 indices = indices.copy() 

281 indices[mask] += n 

282 

283 if verify: 

284 mask = (indices >= n) | (indices < 0) 

285 if mask.any(): 

286 raise IndexError("indices are out-of-bounds") 

287 return indices 

288 

289 

290# ----------------------------------------------------------- 

291# Unsorted 

292 

293 

294def length_of_indexer(indexer, target=None) -> int: 

295 """ 

296 Return the expected length of target[indexer] 

297 

298 Returns 

299 ------- 

300 int 

301 """ 

302 if target is not None and isinstance(indexer, slice): 

303 target_len = len(target) 

304 start = indexer.start 

305 stop = indexer.stop 

306 step = indexer.step 

307 if start is None: 

308 start = 0 

309 elif start < 0: 

310 start += target_len 

311 if stop is None or stop > target_len: 

312 stop = target_len 

313 elif stop < 0: 

314 stop += target_len 

315 if step is None: 

316 step = 1 

317 elif step < 0: 

318 start, stop = stop + 1, start + 1 

319 step = -step 

320 return (stop - start + step - 1) // step 

321 elif isinstance(indexer, (ABCSeries, ABCIndex, np.ndarray, list)): 

322 if isinstance(indexer, list): 

323 indexer = np.array(indexer) 

324 

325 if indexer.dtype == bool: 

326 # GH#25774 

327 return indexer.sum() 

328 return len(indexer) 

329 elif isinstance(indexer, range): 

330 return (indexer.stop - indexer.start) // indexer.step 

331 elif not is_list_like_indexer(indexer): 

332 return 1 

333 raise AssertionError("cannot find the length of the indexer") 

334 

335 

336def deprecate_ndim_indexing(result, stacklevel: int = 3) -> None: 

337 """ 

338 Helper function to raise the deprecation warning for multi-dimensional 

339 indexing on 1D Series/Index. 

340 

341 GH#27125 indexer like idx[:, None] expands dim, but we cannot do that 

342 and keep an index, so we currently return ndarray, which is deprecated 

343 (Deprecation GH#30588). 

344 """ 

345 if np.ndim(result) > 1: 

346 warnings.warn( 

347 "Support for multi-dimensional indexing (e.g. `obj[:, None]`) " 

348 "is deprecated and will be removed in a future " 

349 "version. Convert to a numpy array before indexing instead.", 

350 FutureWarning, 

351 stacklevel=find_stack_level(), 

352 ) 

353 

354 

355def unpack_1tuple(tup): 

356 """ 

357 If we have a length-1 tuple/list that contains a slice, unpack to just 

358 the slice. 

359 

360 Notes 

361 ----- 

362 The list case is deprecated. 

363 """ 

364 if len(tup) == 1 and isinstance(tup[0], slice): 

365 # if we don't have a MultiIndex, we may still be able to handle 

366 # a 1-tuple. see test_1tuple_without_multiindex 

367 

368 if isinstance(tup, list): 

369 # GH#31299 

370 warnings.warn( 

371 "Indexing with a single-item list containing a " 

372 "slice is deprecated and will raise in a future " 

373 "version. Pass a tuple instead.", 

374 FutureWarning, 

375 stacklevel=find_stack_level(), 

376 ) 

377 

378 return tup[0] 

379 return tup 

380 

381 

382def check_key_length(columns: Index, key, value: DataFrame) -> None: 

383 """ 

384 Checks if a key used as indexer has the same length as the columns it is 

385 associated with. 

386 

387 Parameters 

388 ---------- 

389 columns : Index The columns of the DataFrame to index. 

390 key : A list-like of keys to index with. 

391 value : DataFrame The value to set for the keys. 

392 

393 Raises 

394 ------ 

395 ValueError: If the length of key is not equal to the number of columns in value 

396 or if the number of columns referenced by key is not equal to number 

397 of columns. 

398 """ 

399 if columns.is_unique: 

400 if len(value.columns) != len(key): 

401 raise ValueError("Columns must be same length as key") 

402 else: 

403 # Missing keys in columns are represented as -1 

404 if len(columns.get_indexer_non_unique(key)[0]) != len(value.columns): 

405 raise ValueError("Columns must be same length as key") 

406 

407 

408def unpack_tuple_and_ellipses(item: tuple): 

409 """ 

410 Possibly unpack arr[..., n] to arr[n] 

411 """ 

412 if len(item) > 1: 

413 # Note: we are assuming this indexing is being done on a 1D arraylike 

414 if item[0] is Ellipsis: 

415 item = item[1:] 

416 elif item[-1] is Ellipsis: 

417 item = item[:-1] 

418 

419 if len(item) > 1: 

420 raise IndexError("too many indices for array.") 

421 

422 item = item[0] 

423 return item 

424 

425 

426# ----------------------------------------------------------- 

427# Public indexer validation 

428 

429 

430def check_array_indexer(array: AnyArrayLike, indexer: Any) -> Any: 

431 """ 

432 Check if `indexer` is a valid array indexer for `array`. 

433 

434 For a boolean mask, `array` and `indexer` are checked to have the same 

435 length. The dtype is validated, and if it is an integer or boolean 

436 ExtensionArray, it is checked if there are missing values present, and 

437 it is converted to the appropriate numpy array. Other dtypes will raise 

438 an error. 

439 

440 Non-array indexers (integer, slice, Ellipsis, tuples, ..) are passed 

441 through as is. 

442 

443 .. versionadded:: 1.0.0 

444 

445 Parameters 

446 ---------- 

447 array : array-like 

448 The array that is being indexed (only used for the length). 

449 indexer : array-like or list-like 

450 The array-like that's used to index. List-like input that is not yet 

451 a numpy array or an ExtensionArray is converted to one. Other input 

452 types are passed through as is. 

453 

454 Returns 

455 ------- 

456 numpy.ndarray 

457 The validated indexer as a numpy array that can be used to index. 

458 

459 Raises 

460 ------ 

461 IndexError 

462 When the lengths don't match. 

463 ValueError 

464 When `indexer` cannot be converted to a numpy ndarray to index 

465 (e.g. presence of missing values). 

466 

467 See Also 

468 -------- 

469 api.types.is_bool_dtype : Check if `key` is of boolean dtype. 

470 

471 Examples 

472 -------- 

473 When checking a boolean mask, a boolean ndarray is returned when the 

474 arguments are all valid. 

475 

476 >>> mask = pd.array([True, False]) 

477 >>> arr = pd.array([1, 2]) 

478 >>> pd.api.indexers.check_array_indexer(arr, mask) 

479 array([ True, False]) 

480 

481 An IndexError is raised when the lengths don't match. 

482 

483 >>> mask = pd.array([True, False, True]) 

484 >>> pd.api.indexers.check_array_indexer(arr, mask) 

485 Traceback (most recent call last): 

486 ... 

487 IndexError: Boolean index has wrong length: 3 instead of 2. 

488 

489 NA values in a boolean array are treated as False. 

490 

491 >>> mask = pd.array([True, pd.NA]) 

492 >>> pd.api.indexers.check_array_indexer(arr, mask) 

493 array([ True, False]) 

494 

495 A numpy boolean mask will get passed through (if the length is correct): 

496 

497 >>> mask = np.array([True, False]) 

498 >>> pd.api.indexers.check_array_indexer(arr, mask) 

499 array([ True, False]) 

500 

501 Similarly for integer indexers, an integer ndarray is returned when it is 

502 a valid indexer, otherwise an error is (for integer indexers, a matching 

503 length is not required): 

504 

505 >>> indexer = pd.array([0, 2], dtype="Int64") 

506 >>> arr = pd.array([1, 2, 3]) 

507 >>> pd.api.indexers.check_array_indexer(arr, indexer) 

508 array([0, 2]) 

509 

510 >>> indexer = pd.array([0, pd.NA], dtype="Int64") 

511 >>> pd.api.indexers.check_array_indexer(arr, indexer) 

512 Traceback (most recent call last): 

513 ... 

514 ValueError: Cannot index with an integer indexer containing NA values 

515 

516 For non-integer/boolean dtypes, an appropriate error is raised: 

517 

518 >>> indexer = np.array([0., 2.], dtype="float64") 

519 >>> pd.api.indexers.check_array_indexer(arr, indexer) 

520 Traceback (most recent call last): 

521 ... 

522 IndexError: arrays used as indices must be of integer or boolean type 

523 """ 

524 from pandas.core.construction import array as pd_array 

525 

526 # whatever is not an array-like is returned as-is (possible valid array 

527 # indexers that are not array-like: integer, slice, Ellipsis, None) 

528 # In this context, tuples are not considered as array-like, as they have 

529 # a specific meaning in indexing (multi-dimensional indexing) 

530 if is_list_like(indexer): 

531 if isinstance(indexer, tuple): 

532 return indexer 

533 else: 

534 return indexer 

535 

536 # convert list-likes to array 

537 if not is_array_like(indexer): 

538 indexer = pd_array(indexer) 

539 if len(indexer) == 0: 

540 # empty list is converted to float array by pd.array 

541 indexer = np.array([], dtype=np.intp) 

542 

543 dtype = indexer.dtype 

544 if is_bool_dtype(dtype): 

545 if is_extension_array_dtype(dtype): 

546 indexer = indexer.to_numpy(dtype=bool, na_value=False) 

547 else: 

548 indexer = np.asarray(indexer, dtype=bool) 

549 

550 # GH26658 

551 if len(indexer) != len(array): 

552 raise IndexError( 

553 f"Boolean index has wrong length: " 

554 f"{len(indexer)} instead of {len(array)}" 

555 ) 

556 elif is_integer_dtype(dtype): 

557 try: 

558 indexer = np.asarray(indexer, dtype=np.intp) 

559 except ValueError as err: 

560 raise ValueError( 

561 "Cannot index with an integer indexer containing NA values" 

562 ) from err 

563 else: 

564 raise IndexError("arrays used as indices must be of integer or boolean type") 

565 

566 return indexer