Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/indexers/utils.py: 9%

1"""

2Low-dependency indexing utilities.

3"""

4from __future__ import annotations

6from typing import (

7 TYPE_CHECKING,

8 Any,

10import warnings

12import numpy as np

14from pandas._typing import AnyArrayLike

15from pandas.util._exceptions import find_stack_level

17from pandas.core.dtypes.common import (

18 is_array_like,

19 is_bool_dtype,

20 is_extension_array_dtype,

21 is_integer,

22 is_integer_dtype,

23 is_list_like,

24)

25from pandas.core.dtypes.generic import (

26 ABCIndex,

27 ABCSeries,

28)

30if TYPE_CHECKING: 30 ↛ 31line 30 didn't jump to line 31, because the condition on line 30 was never true

31 from pandas.core.frame import DataFrame

32 from pandas.core.indexes.base import Index

34# -----------------------------------------------------------

35# Indexer Identification

38def is_valid_positional_slice(slc: slice) -> bool:

39 """

40 Check if a slice object can be interpreted as a positional indexer.

42 Parameters

43 ----------

44 slc : slice

46 Returns

47 -------

48 bool

50 Notes

51 -----

52 A valid positional slice may also be interpreted as a label-based slice

53 depending on the index being sliced.

54 """

56 def is_int_or_none(val):

57 return val is None or is_integer(val)

59 return (

60 is_int_or_none(slc.start)

61 and is_int_or_none(slc.stop)

62 and is_int_or_none(slc.step)

63 )

66def is_list_like_indexer(key) -> bool:

67 """

68 Check if we have a list-like indexer that is *not* a NamedTuple.

70 Parameters

71 ----------

72 key : object

74 Returns

75 -------

76 bool

77 """

78 # allow a list_like, but exclude NamedTuples which can be indexers

79 return is_list_like(key) and not (isinstance(key, tuple) and type(key) is not tuple)

82def is_scalar_indexer(indexer, ndim: int) -> bool:

83 """

84 Return True if we are all scalar indexers.

86 Parameters

87 ----------

88 indexer : object

89 ndim : int

90 Number of dimensions in the object being indexed.

92 Returns

93 -------

94 bool

95 """

96 if ndim == 1 and is_integer(indexer):

97 # GH37748: allow indexer to be an integer for Series

98 return True

99 if isinstance(indexer, tuple) and len(indexer) == ndim:

100 return all(is_integer(x) for x in indexer)

101 return False

102

103

104def is_empty_indexer(indexer) -> bool:

105 """

106 Check if we have an empty indexer.

107

108 Parameters

109 ----------

110 indexer : object

111

112 Returns

113 -------

114 bool

115 """

116 if is_list_like(indexer) and not len(indexer):

117 return True

118 if not isinstance(indexer, tuple):

119 indexer = (indexer,)

120 return any(isinstance(idx, np.ndarray) and len(idx) == 0 for idx in indexer)

121

122

123# -----------------------------------------------------------

124# Indexer Validation

125

126

127def check_setitem_lengths(indexer, value, values) -> bool:

128 """

129 Validate that value and indexer are the same length.

130

131 An special-case is allowed for when the indexer is a boolean array

132 and the number of true values equals the length of ``value``. In

133 this case, no exception is raised.

134

135 Parameters

136 ----------

137 indexer : sequence

138 Key for the setitem.

139 value : array-like

140 Value for the setitem.

141 values : array-like

142 Values being set into.

143

144 Returns

145 -------

146 bool

147 Whether this is an empty listlike setting which is a no-op.

148

149 Raises

150 ------

151 ValueError

152 When the indexer is an ndarray or list and the lengths don't match.

153 """

154 no_op = False

155

156 if isinstance(indexer, (np.ndarray, list)):

157 # We can ignore other listlikes because they are either

158 # a) not necessarily 1-D indexers, e.g. tuple

159 # b) boolean indexers e.g. BoolArray

160 if is_list_like(value):

161 if len(indexer) != len(value) and values.ndim == 1:

162 # boolean with truth values == len of the value is ok too

163 if isinstance(indexer, list):

164 indexer = np.array(indexer)

165 if not (

166 isinstance(indexer, np.ndarray)

167 and indexer.dtype == np.bool_

168 and indexer.sum() == len(value)

169 ):

170 raise ValueError(

171 "cannot set using a list-like indexer "

172 "with a different length than the value"

173 )

174 if not len(indexer):

175 no_op = True

176

177 elif isinstance(indexer, slice):

178 if is_list_like(value):

179 if len(value) != length_of_indexer(indexer, values) and values.ndim == 1:

180 # In case of two dimensional value is used row-wise and broadcasted

181 raise ValueError(

182 "cannot set using a slice indexer with a "

183 "different length than the value"

184 )

185 if not len(value):

186 no_op = True

187

188 return no_op

189

190

191def validate_indices(indices: np.ndarray, n: int) -> None:

192 """

193 Perform bounds-checking for an indexer.

194

195 -1 is allowed for indicating missing values.

196

197 Parameters

198 ----------

199 indices : ndarray

200 n : int

201 Length of the array being indexed.

202

203 Raises

204 ------

205 ValueError

206

207 Examples

208 --------

209 >>> validate_indices(np.array([1, 2]), 3) # OK

210

211 >>> validate_indices(np.array([1, -2]), 3)

212 Traceback (most recent call last):

213 ...

214 ValueError: negative dimensions are not allowed

215

216 >>> validate_indices(np.array([1, 2, 3]), 3)

217 Traceback (most recent call last):

218 ...

219 IndexError: indices are out-of-bounds

220

221 >>> validate_indices(np.array([-1, -1]), 0) # OK

222

223 >>> validate_indices(np.array([0, 1]), 0)

224 Traceback (most recent call last):

225 ...

226 IndexError: indices are out-of-bounds

227 """

228 if len(indices):

229 min_idx = indices.min()

230 if min_idx < -1:

231 msg = f"'indices' contains values less than allowed ({min_idx} < -1)"

232 raise ValueError(msg)

233

234 max_idx = indices.max()

235 if max_idx >= n:

236 raise IndexError("indices are out-of-bounds")

237

238

239# -----------------------------------------------------------

240# Indexer Conversion

241

242

243def maybe_convert_indices(indices, n: int, verify: bool = True) -> np.ndarray:

244 """

245 Attempt to convert indices into valid, positive indices.

246

247 If we have negative indices, translate to positive here.

248 If we have indices that are out-of-bounds, raise an IndexError.

249

250 Parameters

251 ----------

252 indices : array-like

253 Array of indices that we are to convert.

254 n : int

255 Number of elements in the array that we are indexing.

256 verify : bool, default True

257 Check that all entries are between 0 and n - 1, inclusive.

258

259 Returns

260 -------

261 array-like

262 An array-like of positive indices that correspond to the ones

263 that were passed in initially to this function.

264

265 Raises

266 ------

267 IndexError

268 One of the converted indices either exceeded the number of,

269 elements (specified by `n`), or was still negative.

270 """

271 if isinstance(indices, list):

272 indices = np.array(indices)

273 if len(indices) == 0:

274 # If `indices` is empty, np.array will return a float,

275 # and will cause indexing errors.

276 return np.empty(0, dtype=np.intp)

277

278 mask = indices < 0

279 if mask.any():

280 indices = indices.copy()

281 indices[mask] += n

282

283 if verify:

284 mask = (indices >= n) | (indices < 0)

285 if mask.any():

286 raise IndexError("indices are out-of-bounds")

287 return indices

288

289

290# -----------------------------------------------------------

291# Unsorted

292

293

294def length_of_indexer(indexer, target=None) -> int:

295 """

296 Return the expected length of target[indexer]

297

298 Returns

299 -------

300 int

301 """

302 if target is not None and isinstance(indexer, slice):

303 target_len = len(target)

304 start = indexer.start

305 stop = indexer.stop

306 step = indexer.step

307 if start is None:

308 start = 0

309 elif start < 0:

310 start += target_len

311 if stop is None or stop > target_len:

312 stop = target_len

313 elif stop < 0:

314 stop += target_len

315 if step is None:

316 step = 1

317 elif step < 0:

318 start, stop = stop + 1, start + 1

319 step = -step

320 return (stop - start + step - 1) // step

321 elif isinstance(indexer, (ABCSeries, ABCIndex, np.ndarray, list)):

322 if isinstance(indexer, list):

323 indexer = np.array(indexer)

324

325 if indexer.dtype == bool:

326 # GH#25774

327 return indexer.sum()

328 return len(indexer)

329 elif isinstance(indexer, range):

330 return (indexer.stop - indexer.start) // indexer.step

331 elif not is_list_like_indexer(indexer):

332 return 1

333 raise AssertionError("cannot find the length of the indexer")

334

335

336def deprecate_ndim_indexing(result, stacklevel: int = 3) -> None:

337 """

338 Helper function to raise the deprecation warning for multi-dimensional

339 indexing on 1D Series/Index.

340

341 GH#27125 indexer like idx[:, None] expands dim, but we cannot do that

342 and keep an index, so we currently return ndarray, which is deprecated

343 (Deprecation GH#30588).

344 """

345 if np.ndim(result) > 1:

346 warnings.warn(

347 "Support for multi-dimensional indexing (e.g. `obj[:, None]`) "

348 "is deprecated and will be removed in a future "

349 "version. Convert to a numpy array before indexing instead.",

350 FutureWarning,

351 stacklevel=find_stack_level(),

352 )

353

354

355def unpack_1tuple(tup):

356 """

357 If we have a length-1 tuple/list that contains a slice, unpack to just

358 the slice.

359

360 Notes

361 -----

362 The list case is deprecated.

363 """

364 if len(tup) == 1 and isinstance(tup[0], slice):

365 # if we don't have a MultiIndex, we may still be able to handle

366 # a 1-tuple. see test_1tuple_without_multiindex

367

368 if isinstance(tup, list):

369 # GH#31299

370 warnings.warn(

371 "Indexing with a single-item list containing a "

372 "slice is deprecated and will raise in a future "

373 "version. Pass a tuple instead.",

374 FutureWarning,

375 stacklevel=find_stack_level(),

376 )

377

378 return tup[0]

379 return tup

380

381

382def check_key_length(columns: Index, key, value: DataFrame) -> None:

383 """

384 Checks if a key used as indexer has the same length as the columns it is

385 associated with.

386

387 Parameters

388 ----------

389 columns : Index The columns of the DataFrame to index.

390 key : A list-like of keys to index with.

391 value : DataFrame The value to set for the keys.

392

393 Raises

394 ------

395 ValueError: If the length of key is not equal to the number of columns in value

396 or if the number of columns referenced by key is not equal to number

397 of columns.

398 """

399 if columns.is_unique:

400 if len(value.columns) != len(key):

401 raise ValueError("Columns must be same length as key")

402 else:

403 # Missing keys in columns are represented as -1

404 if len(columns.get_indexer_non_unique(key)[0]) != len(value.columns):

405 raise ValueError("Columns must be same length as key")

406

407

408def unpack_tuple_and_ellipses(item: tuple):

409 """

410 Possibly unpack arr[..., n] to arr[n]

411 """

412 if len(item) > 1:

413 # Note: we are assuming this indexing is being done on a 1D arraylike

414 if item[0] is Ellipsis:

415 item = item[1:]

416 elif item[-1] is Ellipsis:

417 item = item[:-1]

418

419 if len(item) > 1:

420 raise IndexError("too many indices for array.")

421

422 item = item[0]

423 return item

424

425

426# -----------------------------------------------------------

427# Public indexer validation

428

429

430def check_array_indexer(array: AnyArrayLike, indexer: Any) -> Any:

431 """

432 Check if `indexer` is a valid array indexer for `array`.

433

434 For a boolean mask, `array` and `indexer` are checked to have the same

435 length. The dtype is validated, and if it is an integer or boolean

436 ExtensionArray, it is checked if there are missing values present, and

437 it is converted to the appropriate numpy array. Other dtypes will raise

438 an error.

439

440 Non-array indexers (integer, slice, Ellipsis, tuples, ..) are passed

441 through as is.

442

443 .. versionadded:: 1.0.0

444

445 Parameters

446 ----------

447 array : array-like

448 The array that is being indexed (only used for the length).

449 indexer : array-like or list-like

450 The array-like that's used to index. List-like input that is not yet

451 a numpy array or an ExtensionArray is converted to one. Other input

452 types are passed through as is.

453

454 Returns

455 -------

456 numpy.ndarray

457 The validated indexer as a numpy array that can be used to index.

458

459 Raises

460 ------

461 IndexError

462 When the lengths don't match.

463 ValueError

464 When `indexer` cannot be converted to a numpy ndarray to index

465 (e.g. presence of missing values).

466

467 See Also

468 --------

469 api.types.is_bool_dtype : Check if `key` is of boolean dtype.

470

471 Examples

472 --------

473 When checking a boolean mask, a boolean ndarray is returned when the

474 arguments are all valid.

475

476 >>> mask = pd.array([True, False])

477 >>> arr = pd.array([1, 2])

478 >>> pd.api.indexers.check_array_indexer(arr, mask)

479 array([ True, False])

480

481 An IndexError is raised when the lengths don't match.

482

483 >>> mask = pd.array([True, False, True])

484 >>> pd.api.indexers.check_array_indexer(arr, mask)

485 Traceback (most recent call last):

486 ...

487 IndexError: Boolean index has wrong length: 3 instead of 2.

488

489 NA values in a boolean array are treated as False.

490

491 >>> mask = pd.array([True, pd.NA])

492 >>> pd.api.indexers.check_array_indexer(arr, mask)

493 array([ True, False])

494

495 A numpy boolean mask will get passed through (if the length is correct):

496

497 >>> mask = np.array([True, False])

498 >>> pd.api.indexers.check_array_indexer(arr, mask)

499 array([ True, False])

500

501 Similarly for integer indexers, an integer ndarray is returned when it is

502 a valid indexer, otherwise an error is (for integer indexers, a matching

503 length is not required):

504

505 >>> indexer = pd.array([0, 2], dtype="Int64")

506 >>> arr = pd.array([1, 2, 3])

507 >>> pd.api.indexers.check_array_indexer(arr, indexer)

508 array([0, 2])

509

510 >>> indexer = pd.array([0, pd.NA], dtype="Int64")

511 >>> pd.api.indexers.check_array_indexer(arr, indexer)

512 Traceback (most recent call last):

513 ...

514 ValueError: Cannot index with an integer indexer containing NA values

515

516 For non-integer/boolean dtypes, an appropriate error is raised:

517

518 >>> indexer = np.array([0., 2.], dtype="float64")

519 >>> pd.api.indexers.check_array_indexer(arr, indexer)

520 Traceback (most recent call last):

521 ...

522 IndexError: arrays used as indices must be of integer or boolean type

523 """

524 from pandas.core.construction import array as pd_array

525

526 # whatever is not an array-like is returned as-is (possible valid array

527 # indexers that are not array-like: integer, slice, Ellipsis, None)

528 # In this context, tuples are not considered as array-like, as they have

529 # a specific meaning in indexing (multi-dimensional indexing)

530 if is_list_like(indexer):

531 if isinstance(indexer, tuple):

532 return indexer

533 else:

534 return indexer

535

536 # convert list-likes to array

537 if not is_array_like(indexer):

538 indexer = pd_array(indexer)

539 if len(indexer) == 0:

540 # empty list is converted to float array by pd.array

541 indexer = np.array([], dtype=np.intp)

542

543 dtype = indexer.dtype

544 if is_bool_dtype(dtype):

545 if is_extension_array_dtype(dtype):

546 indexer = indexer.to_numpy(dtype=bool, na_value=False)

547 else:

548 indexer = np.asarray(indexer, dtype=bool)

549

550 # GH26658

551 if len(indexer) != len(array):

552 raise IndexError(

553 f"Boolean index has wrong length: "

554 f"{len(indexer)} instead of {len(array)}"

555 )

556 elif is_integer_dtype(dtype):

557 try:

558 indexer = np.asarray(indexer, dtype=np.intp)

559 except ValueError as err:

560 raise ValueError(

561 "Cannot index with an integer indexer containing NA values"

562 ) from err

563 else:

564 raise IndexError("arrays used as indices must be of integer or boolean type")

565

566 return indexer