Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/array

1from __future__ import annotations

3import functools

4from typing import (

5 TYPE_CHECKING,

6 cast,

7 overload,

10import numpy as np

12from pandas._libs import (

13 algos as libalgos,

14 lib,

15)

16from pandas._typing import (

17 ArrayLike,

18 npt,

19)

21from pandas.core.dtypes.cast import maybe_promote

22from pandas.core.dtypes.common import (

23 ensure_platform_int,

24 is_1d_only_ea_obj,

25)

26from pandas.core.dtypes.missing import na_value_for_dtype

28from pandas.core.construction import ensure_wrapped_if_datetimelike

30if TYPE_CHECKING: 30 ↛ 31line 30 didn't jump to line 31, because the condition on line 30 was never true

31 from pandas.core.arrays._mixins import NDArrayBackedExtensionArray

32 from pandas.core.arrays.base import ExtensionArray

35@overload

36def take_nd(

37 arr: np.ndarray,

38 indexer,

39 axis: int = ...,

40 fill_value=...,

41 allow_fill: bool = ...,

42) -> np.ndarray:

43 ...

46@overload

47def take_nd(

48 arr: ExtensionArray,

49 indexer,

50 axis: int = ...,

51 fill_value=...,

52 allow_fill: bool = ...,

53) -> ArrayLike:

54 ...

57def take_nd(

58 arr: ArrayLike,

59 indexer,

60 axis: int = 0,

61 fill_value=lib.no_default,

62 allow_fill: bool = True,

63) -> ArrayLike:

65 """

66 Specialized Cython take which sets NaN values in one pass

68 This dispatches to ``take`` defined on ExtensionArrays. It does not

69 currently dispatch to ``SparseArray.take`` for sparse ``arr``.

71 Note: this function assumes that the indexer is a valid(ated) indexer with

72 no out of bound indices.

74 Parameters

75 ----------

76 arr : np.ndarray or ExtensionArray

77 Input array.

78 indexer : ndarray

79 1-D array of indices to take, subarrays corresponding to -1 value

80 indices are filed with fill_value

81 axis : int, default 0

82 Axis to take from

83 fill_value : any, default np.nan

84 Fill value to replace -1 values with

85 allow_fill : bool, default True

86 If False, indexer is assumed to contain no -1 values so no filling

87 will be done. This short-circuits computation of a mask. Result is

88 undefined if allow_fill == False and -1 is present in indexer.

90 Returns

91 -------

92 subarray : np.ndarray or ExtensionArray

93 May be the same type as the input, or cast to an ndarray.

94 """

95 if fill_value is lib.no_default:

96 fill_value = na_value_for_dtype(arr.dtype, compat=False)

97 elif isinstance(arr.dtype, np.dtype) and arr.dtype.kind in "mM":

98 dtype, fill_value = maybe_promote(arr.dtype, fill_value)

99 if arr.dtype != dtype:

100 # EA.take is strict about returning a new object of the same type

101 # so for that case cast upfront

102 arr = arr.astype(dtype)

103

104 if not isinstance(arr, np.ndarray):

105 # i.e. ExtensionArray,

106 # includes for EA to catch DatetimeArray, TimedeltaArray

107 if not is_1d_only_ea_obj(arr):

108 # i.e. DatetimeArray, TimedeltaArray

109 arr = cast("NDArrayBackedExtensionArray", arr)

110 return arr.take(

111 indexer, fill_value=fill_value, allow_fill=allow_fill, axis=axis

112 )

113

114 return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)

115

116 arr = np.asarray(arr)

117 return _take_nd_ndarray(arr, indexer, axis, fill_value, allow_fill)

118

119

120def _take_nd_ndarray(

121 arr: np.ndarray,

122 indexer: npt.NDArray[np.intp] | None,

123 axis: int,

124 fill_value,

125 allow_fill: bool,

126) -> np.ndarray:

127

128 if indexer is None:

129 indexer = np.arange(arr.shape[axis], dtype=np.intp)

130 dtype, fill_value = arr.dtype, arr.dtype.type()

131 else:

132 indexer = ensure_platform_int(indexer)

133

134 dtype, fill_value, mask_info = _take_preprocess_indexer_and_fill_value(

135 arr, indexer, fill_value, allow_fill

136 )

137

138 flip_order = False

139 if arr.ndim == 2 and arr.flags.f_contiguous:

140 flip_order = True

141

142 if flip_order:

143 arr = arr.T

144 axis = arr.ndim - axis - 1

145

146 # at this point, it's guaranteed that dtype can hold both the arr values

147 # and the fill_value

148 out_shape_ = list(arr.shape)

149 out_shape_[axis] = len(indexer)

150 out_shape = tuple(out_shape_)

151 if arr.flags.f_contiguous and axis == arr.ndim - 1:

152 # minor tweak that can make an order-of-magnitude difference

153 # for dataframes initialized directly from 2-d ndarrays

154 # (s.t. df.values is c-contiguous and df._mgr.blocks[0] is its

155 # f-contiguous transpose)

156 out = np.empty(out_shape, dtype=dtype, order="F")

157 else:

158 out = np.empty(out_shape, dtype=dtype)

159

160 func = _get_take_nd_function(

161 arr.ndim, arr.dtype, out.dtype, axis=axis, mask_info=mask_info

162 )

163 func(arr, indexer, out, fill_value)

164

165 if flip_order:

166 out = out.T

167 return out

168

169

170def take_1d(

171 arr: ArrayLike,

172 indexer: npt.NDArray[np.intp],

173 fill_value=None,

174 allow_fill: bool = True,

175 mask: npt.NDArray[np.bool_] | None = None,

176) -> ArrayLike:

177 """

178 Specialized version for 1D arrays. Differences compared to `take_nd`:

179

180 - Assumes input array has already been converted to numpy array / EA

181 - Assumes indexer is already guaranteed to be intp dtype ndarray

182 - Only works for 1D arrays

183

184 To ensure the lowest possible overhead.

185

186 Note: similarly to `take_nd`, this function assumes that the indexer is

187 a valid(ated) indexer with no out of bound indices.

188

189 Parameters

190 ----------

191 arr : np.ndarray or ExtensionArray

192 Input array.

193 indexer : ndarray

194 1-D array of indices to take (validated indices, intp dtype).

195 fill_value : any, default np.nan

196 Fill value to replace -1 values with

197 allow_fill : bool, default True

198 If False, indexer is assumed to contain no -1 values so no filling

199 will be done. This short-circuits computation of a mask. Result is

200 undefined if allow_fill == False and -1 is present in indexer.

201 mask : np.ndarray, optional, default None

202 If `allow_fill` is True, and the mask (where indexer == -1) is already

203 known, it can be passed to avoid recomputation.

204 """

205 if not isinstance(arr, np.ndarray):

206 # ExtensionArray -> dispatch to their method

207 return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)

208

209 if not allow_fill:

210 return arr.take(indexer)

211

212 dtype, fill_value, mask_info = _take_preprocess_indexer_and_fill_value(

213 arr, indexer, fill_value, True, mask

214 )

215

216 # at this point, it's guaranteed that dtype can hold both the arr values

217 # and the fill_value

218 out = np.empty(indexer.shape, dtype=dtype)

219

220 func = _get_take_nd_function(

221 arr.ndim, arr.dtype, out.dtype, axis=0, mask_info=mask_info

222 )

223 func(arr, indexer, out, fill_value)

224

225 return out

226

227

228def take_2d_multi(

229 arr: np.ndarray,

230 indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]],

231 fill_value=np.nan,

232) -> np.ndarray:

233 """

234 Specialized Cython take which sets NaN values in one pass.

235 """

236 # This is only called from one place in DataFrame._reindex_multi,

237 # so we know indexer is well-behaved.

238 assert indexer is not None

239 assert indexer[0] is not None

240 assert indexer[1] is not None

241

242 row_idx, col_idx = indexer

243

244 row_idx = ensure_platform_int(row_idx)

245 col_idx = ensure_platform_int(col_idx)

246 indexer = row_idx, col_idx

247 mask_info = None

248

249 # check for promotion based on types only (do this first because

250 # it's faster than computing a mask)

251 dtype, fill_value = maybe_promote(arr.dtype, fill_value)

252 if dtype != arr.dtype:

253 # check if promotion is actually required based on indexer

254 row_mask = row_idx == -1

255 col_mask = col_idx == -1

256 row_needs = row_mask.any()

257 col_needs = col_mask.any()

258 mask_info = (row_mask, col_mask), (row_needs, col_needs)

259

260 if not (row_needs or col_needs):

261 # if not, then depromote, set fill_value to dummy

262 # (it won't be used but we don't want the cython code

263 # to crash when trying to cast it to dtype)

264 dtype, fill_value = arr.dtype, arr.dtype.type()

265

266 # at this point, it's guaranteed that dtype can hold both the arr values

267 # and the fill_value

268 out_shape = len(row_idx), len(col_idx)

269 out = np.empty(out_shape, dtype=dtype)

270

271 func = _take_2d_multi_dict.get((arr.dtype.name, out.dtype.name), None)

272 if func is None and arr.dtype != out.dtype:

273 func = _take_2d_multi_dict.get((out.dtype.name, out.dtype.name), None)

274 if func is not None:

275 func = _convert_wrapper(func, out.dtype)

276

277 if func is not None:

278 func(arr, indexer, out=out, fill_value=fill_value)

279 else:

280 # test_reindex_multi

281 _take_2d_multi_object(

282 arr, indexer, out, fill_value=fill_value, mask_info=mask_info

283 )

284

285 return out

286

287

288@functools.lru_cache(maxsize=128)

289def _get_take_nd_function_cached(

290 ndim: int, arr_dtype: np.dtype, out_dtype: np.dtype, axis: int

291):

292 """

293 Part of _get_take_nd_function below that doesn't need `mask_info` and thus

294 can be cached (mask_info potentially contains a numpy ndarray which is not

295 hashable and thus cannot be used as argument for cached function).

296 """

297 tup = (arr_dtype.name, out_dtype.name)

298 if ndim == 1:

299 func = _take_1d_dict.get(tup, None)

300 elif ndim == 2:

301 if axis == 0:

302 func = _take_2d_axis0_dict.get(tup, None)

303 else:

304 func = _take_2d_axis1_dict.get(tup, None)

305 if func is not None:

306 return func

307

308 # We get here with string, uint, float16, and complex dtypes that could

309 # potentially be handled in algos_take_helper.

310 # Also a couple with (M8[ns], object) and (m8[ns], object)

311 tup = (out_dtype.name, out_dtype.name)

312 if ndim == 1:

313 func = _take_1d_dict.get(tup, None)

314 elif ndim == 2:

315 if axis == 0:

316 func = _take_2d_axis0_dict.get(tup, None)

317 else:

318 func = _take_2d_axis1_dict.get(tup, None)

319 if func is not None:

320 func = _convert_wrapper(func, out_dtype)

321 return func

322

323 return None

324

325

326def _get_take_nd_function(

327 ndim: int, arr_dtype: np.dtype, out_dtype: np.dtype, axis: int = 0, mask_info=None

328):

329 """

330 Get the appropriate "take" implementation for the given dimension, axis

331 and dtypes.

332 """

333 func = None

334 if ndim <= 2:

335 # for this part we don't need `mask_info` -> use the cached algo lookup

336 func = _get_take_nd_function_cached(ndim, arr_dtype, out_dtype, axis)

337

338 if func is None:

339

340 def func(arr, indexer, out, fill_value=np.nan):

341 indexer = ensure_platform_int(indexer)

342 _take_nd_object(

343 arr, indexer, out, axis=axis, fill_value=fill_value, mask_info=mask_info

344 )

345

346 return func

347

348

349def _view_wrapper(f, arr_dtype=None, out_dtype=None, fill_wrap=None):

350 def wrapper(

351 arr: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=np.nan

352 ):

353 if arr_dtype is not None:

354 arr = arr.view(arr_dtype)

355 if out_dtype is not None:

356 out = out.view(out_dtype)

357 if fill_wrap is not None:

358 fill_value = fill_wrap(fill_value)

359 f(arr, indexer, out, fill_value=fill_value)

360

361 return wrapper

362

363

364def _convert_wrapper(f, conv_dtype):

365 def wrapper(

366 arr: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=np.nan

367 ):

368 if conv_dtype == object:

369 # GH#39755 avoid casting dt64/td64 to integers

370 arr = ensure_wrapped_if_datetimelike(arr)

371 arr = arr.astype(conv_dtype)

372 f(arr, indexer, out, fill_value=fill_value)

373

374 return wrapper

375

376

377_take_1d_dict = {

378 ("int8", "int8"): libalgos.take_1d_int8_int8,

379 ("int8", "int32"): libalgos.take_1d_int8_int32,

380 ("int8", "int64"): libalgos.take_1d_int8_int64,

381 ("int8", "float64"): libalgos.take_1d_int8_float64,

382 ("int16", "int16"): libalgos.take_1d_int16_int16,

383 ("int16", "int32"): libalgos.take_1d_int16_int32,

384 ("int16", "int64"): libalgos.take_1d_int16_int64,

385 ("int16", "float64"): libalgos.take_1d_int16_float64,

386 ("int32", "int32"): libalgos.take_1d_int32_int32,

387 ("int32", "int64"): libalgos.take_1d_int32_int64,

388 ("int32", "float64"): libalgos.take_1d_int32_float64,

389 ("int64", "int64"): libalgos.take_1d_int64_int64,

390 ("int64", "float64"): libalgos.take_1d_int64_float64,

391 ("float32", "float32"): libalgos.take_1d_float32_float32,

392 ("float32", "float64"): libalgos.take_1d_float32_float64,

393 ("float64", "float64"): libalgos.take_1d_float64_float64,

394 ("object", "object"): libalgos.take_1d_object_object,

395 ("bool", "bool"): _view_wrapper(libalgos.take_1d_bool_bool, np.uint8, np.uint8),

396 ("bool", "object"): _view_wrapper(libalgos.take_1d_bool_object, np.uint8, None),

397 ("datetime64[ns]", "datetime64[ns]"): _view_wrapper(

398 libalgos.take_1d_int64_int64, np.int64, np.int64, np.int64

399 ),

400 ("timedelta64[ns]", "timedelta64[ns]"): _view_wrapper(

401 libalgos.take_1d_int64_int64, np.int64, np.int64, np.int64

402 ),

403}

404

405_take_2d_axis0_dict = {

406 ("int8", "int8"): libalgos.take_2d_axis0_int8_int8,

407 ("int8", "int32"): libalgos.take_2d_axis0_int8_int32,

408 ("int8", "int64"): libalgos.take_2d_axis0_int8_int64,

409 ("int8", "float64"): libalgos.take_2d_axis0_int8_float64,

410 ("int16", "int16"): libalgos.take_2d_axis0_int16_int16,

411 ("int16", "int32"): libalgos.take_2d_axis0_int16_int32,

412 ("int16", "int64"): libalgos.take_2d_axis0_int16_int64,

413 ("int16", "float64"): libalgos.take_2d_axis0_int16_float64,

414 ("int32", "int32"): libalgos.take_2d_axis0_int32_int32,

415 ("int32", "int64"): libalgos.take_2d_axis0_int32_int64,

416 ("int32", "float64"): libalgos.take_2d_axis0_int32_float64,

417 ("int64", "int64"): libalgos.take_2d_axis0_int64_int64,

418 ("int64", "float64"): libalgos.take_2d_axis0_int64_float64,

419 ("float32", "float32"): libalgos.take_2d_axis0_float32_float32,

420 ("float32", "float64"): libalgos.take_2d_axis0_float32_float64,

421 ("float64", "float64"): libalgos.take_2d_axis0_float64_float64,

422 ("object", "object"): libalgos.take_2d_axis0_object_object,

423 ("bool", "bool"): _view_wrapper(

424 libalgos.take_2d_axis0_bool_bool, np.uint8, np.uint8

425 ),

426 ("bool", "object"): _view_wrapper(

427 libalgos.take_2d_axis0_bool_object, np.uint8, None

428 ),

429 ("datetime64[ns]", "datetime64[ns]"): _view_wrapper(

430 libalgos.take_2d_axis0_int64_int64, np.int64, np.int64, fill_wrap=np.int64

431 ),

432 ("timedelta64[ns]", "timedelta64[ns]"): _view_wrapper(

433 libalgos.take_2d_axis0_int64_int64, np.int64, np.int64, fill_wrap=np.int64

434 ),

435}

436

437_take_2d_axis1_dict = {

438 ("int8", "int8"): libalgos.take_2d_axis1_int8_int8,

439 ("int8", "int32"): libalgos.take_2d_axis1_int8_int32,

440 ("int8", "int64"): libalgos.take_2d_axis1_int8_int64,

441 ("int8", "float64"): libalgos.take_2d_axis1_int8_float64,

442 ("int16", "int16"): libalgos.take_2d_axis1_int16_int16,

443 ("int16", "int32"): libalgos.take_2d_axis1_int16_int32,

444 ("int16", "int64"): libalgos.take_2d_axis1_int16_int64,

445 ("int16", "float64"): libalgos.take_2d_axis1_int16_float64,

446 ("int32", "int32"): libalgos.take_2d_axis1_int32_int32,

447 ("int32", "int64"): libalgos.take_2d_axis1_int32_int64,

448 ("int32", "float64"): libalgos.take_2d_axis1_int32_float64,

449 ("int64", "int64"): libalgos.take_2d_axis1_int64_int64,

450 ("int64", "float64"): libalgos.take_2d_axis1_int64_float64,

451 ("float32", "float32"): libalgos.take_2d_axis1_float32_float32,

452 ("float32", "float64"): libalgos.take_2d_axis1_float32_float64,

453 ("float64", "float64"): libalgos.take_2d_axis1_float64_float64,

454 ("object", "object"): libalgos.take_2d_axis1_object_object,

455 ("bool", "bool"): _view_wrapper(

456 libalgos.take_2d_axis1_bool_bool, np.uint8, np.uint8

457 ),

458 ("bool", "object"): _view_wrapper(

459 libalgos.take_2d_axis1_bool_object, np.uint8, None

460 ),

461 ("datetime64[ns]", "datetime64[ns]"): _view_wrapper(

462 libalgos.take_2d_axis1_int64_int64, np.int64, np.int64, fill_wrap=np.int64

463 ),

464 ("timedelta64[ns]", "timedelta64[ns]"): _view_wrapper(

465 libalgos.take_2d_axis1_int64_int64, np.int64, np.int64, fill_wrap=np.int64

466 ),

467}

468

469_take_2d_multi_dict = {

470 ("int8", "int8"): libalgos.take_2d_multi_int8_int8,

471 ("int8", "int32"): libalgos.take_2d_multi_int8_int32,

472 ("int8", "int64"): libalgos.take_2d_multi_int8_int64,

473 ("int8", "float64"): libalgos.take_2d_multi_int8_float64,

474 ("int16", "int16"): libalgos.take_2d_multi_int16_int16,

475 ("int16", "int32"): libalgos.take_2d_multi_int16_int32,

476 ("int16", "int64"): libalgos.take_2d_multi_int16_int64,

477 ("int16", "float64"): libalgos.take_2d_multi_int16_float64,

478 ("int32", "int32"): libalgos.take_2d_multi_int32_int32,

479 ("int32", "int64"): libalgos.take_2d_multi_int32_int64,

480 ("int32", "float64"): libalgos.take_2d_multi_int32_float64,

481 ("int64", "int64"): libalgos.take_2d_multi_int64_int64,

482 ("int64", "float64"): libalgos.take_2d_multi_int64_float64,

483 ("float32", "float32"): libalgos.take_2d_multi_float32_float32,

484 ("float32", "float64"): libalgos.take_2d_multi_float32_float64,

485 ("float64", "float64"): libalgos.take_2d_multi_float64_float64,

486 ("object", "object"): libalgos.take_2d_multi_object_object,

487 ("bool", "bool"): _view_wrapper(

488 libalgos.take_2d_multi_bool_bool, np.uint8, np.uint8

489 ),

490 ("bool", "object"): _view_wrapper(

491 libalgos.take_2d_multi_bool_object, np.uint8, None

492 ),

493 ("datetime64[ns]", "datetime64[ns]"): _view_wrapper(

494 libalgos.take_2d_multi_int64_int64, np.int64, np.int64, fill_wrap=np.int64

495 ),

496 ("timedelta64[ns]", "timedelta64[ns]"): _view_wrapper(

497 libalgos.take_2d_multi_int64_int64, np.int64, np.int64, fill_wrap=np.int64

498 ),

499}

500

501

502def _take_nd_object(

503 arr: np.ndarray,

504 indexer: npt.NDArray[np.intp],

505 out: np.ndarray,

506 axis: int,

507 fill_value,

508 mask_info,

509):

510 if mask_info is not None:

511 mask, needs_masking = mask_info

512 else:

513 mask = indexer == -1

514 needs_masking = mask.any()

515 if arr.dtype != out.dtype:

516 arr = arr.astype(out.dtype)

517 if arr.shape[axis] > 0:

518 arr.take(indexer, axis=axis, out=out)

519 if needs_masking:

520 outindexer = [slice(None)] * arr.ndim

521 outindexer[axis] = mask

522 out[tuple(outindexer)] = fill_value

523

524

525def _take_2d_multi_object(

526 arr: np.ndarray,

527 indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]],

528 out: np.ndarray,

529 fill_value,

530 mask_info,

531) -> None:

532 # this is not ideal, performance-wise, but it's better than raising

533 # an exception (best to optimize in Cython to avoid getting here)

534 row_idx, col_idx = indexer # both np.intp

535 if mask_info is not None:

536 (row_mask, col_mask), (row_needs, col_needs) = mask_info

537 else:

538 row_mask = row_idx == -1

539 col_mask = col_idx == -1

540 row_needs = row_mask.any()

541 col_needs = col_mask.any()

542 if fill_value is not None:

543 if row_needs:

544 out[row_mask, :] = fill_value

545 if col_needs:

546 out[:, col_mask] = fill_value

547 for i in range(len(row_idx)):

548 u_ = row_idx[i]

549 for j in range(len(col_idx)):

550 v = col_idx[j]

551 out[i, j] = arr[u_, v]

552

553

554def _take_preprocess_indexer_and_fill_value(

555 arr: np.ndarray,

556 indexer: npt.NDArray[np.intp],

557 fill_value,

558 allow_fill: bool,

559 mask: npt.NDArray[np.bool_] | None = None,

560):

561 mask_info: tuple[np.ndarray | None, bool] | None = None

562

563 if not allow_fill:

564 dtype, fill_value = arr.dtype, arr.dtype.type()

565 mask_info = None, False

566 else:

567 # check for promotion based on types only (do this first because

568 # it's faster than computing a mask)

569 dtype, fill_value = maybe_promote(arr.dtype, fill_value)

570 if dtype != arr.dtype:

571 # check if promotion is actually required based on indexer

572 if mask is not None:

573 needs_masking = True

574 else:

575 mask = indexer == -1

576 needs_masking = bool(mask.any())

577 mask_info = mask, needs_masking

578 if not needs_masking:

579 # if not, then depromote, set fill_value to dummy

580 # (it won't be used but we don't want the cython code

581 # to crash when trying to cast it to dtype)

582 dtype, fill_value = arr.dtype, arr.dtype.type()

583

584 return dtype, fill_value, mask_info

Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/array_algos/take.py: 12%

193 statements