Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/missing.py: 10%

307 statements  

« prev     ^ index     » next       coverage.py v6.4.4, created at 2023-07-17 14:22 -0600

1""" 

2Routines for filling missing data. 

3""" 

4from __future__ import annotations 

5 

6from functools import ( 

7 partial, 

8 wraps, 

9) 

10from typing import ( 

11 TYPE_CHECKING, 

12 Any, 

13 cast, 

14) 

15 

16import numpy as np 

17 

18from pandas._libs import ( 

19 algos, 

20 lib, 

21) 

22from pandas._typing import ( 

23 ArrayLike, 

24 Axis, 

25 F, 

26 npt, 

27) 

28from pandas.compat._optional import import_optional_dependency 

29 

30from pandas.core.dtypes.cast import infer_dtype_from 

31from pandas.core.dtypes.common import ( 

32 is_array_like, 

33 is_numeric_v_string_like, 

34 needs_i8_conversion, 

35) 

36from pandas.core.dtypes.missing import ( 

37 is_valid_na_for_dtype, 

38 isna, 

39 na_value_for_dtype, 

40) 

41 

42if TYPE_CHECKING: 42 ↛ 43line 42 didn't jump to line 43, because the condition on line 42 was never true

43 from pandas import Index 

44 

45 

46def check_value_size(value, mask: npt.NDArray[np.bool_], length: int): 

47 """ 

48 Validate the size of the values passed to ExtensionArray.fillna. 

49 """ 

50 if is_array_like(value): 

51 if len(value) != length: 

52 raise ValueError( 

53 f"Length of 'value' does not match. Got ({len(value)}) " 

54 f" expected {length}" 

55 ) 

56 value = value[mask] 

57 

58 return value 

59 

60 

61def mask_missing(arr: ArrayLike, values_to_mask) -> npt.NDArray[np.bool_]: 

62 """ 

63 Return a masking array of same size/shape as arr 

64 with entries equaling any member of values_to_mask set to True 

65 

66 Parameters 

67 ---------- 

68 arr : ArrayLike 

69 values_to_mask: list, tuple, or scalar 

70 

71 Returns 

72 ------- 

73 np.ndarray[bool] 

74 """ 

75 # When called from Block.replace/replace_list, values_to_mask is a scalar 

76 # known to be holdable by arr. 

77 # When called from Series._single_replace, values_to_mask is tuple or list 

78 dtype, values_to_mask = infer_dtype_from(values_to_mask) 

79 # error: Argument "dtype" to "array" has incompatible type "Union[dtype[Any], 

80 # ExtensionDtype]"; expected "Union[dtype[Any], None, type, _SupportsDType, str, 

81 # Union[Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]], List[Any], 

82 # _DTypeDict, Tuple[Any, Any]]]" 

83 values_to_mask = np.array(values_to_mask, dtype=dtype) # type: ignore[arg-type] 

84 

85 na_mask = isna(values_to_mask) 

86 nonna = values_to_mask[~na_mask] 

87 

88 # GH 21977 

89 mask = np.zeros(arr.shape, dtype=bool) 

90 for x in nonna: 

91 if is_numeric_v_string_like(arr, x): 

92 # GH#29553 prevent numpy deprecation warnings 

93 pass 

94 else: 

95 new_mask = arr == x 

96 if not isinstance(new_mask, np.ndarray): 

97 # usually BooleanArray 

98 new_mask = new_mask.to_numpy(dtype=bool, na_value=False) 

99 mask |= new_mask 

100 

101 if na_mask.any(): 

102 mask |= isna(arr) 

103 

104 return mask 

105 

106 

107def clean_fill_method(method: str | None, allow_nearest: bool = False): 

108 # asfreq is compat for resampling 

109 if method in [None, "asfreq"]: 

110 return None 

111 

112 if isinstance(method, str): 

113 method = method.lower() 

114 if method == "ffill": 

115 method = "pad" 

116 elif method == "bfill": 

117 method = "backfill" 

118 

119 valid_methods = ["pad", "backfill"] 

120 expecting = "pad (ffill) or backfill (bfill)" 

121 if allow_nearest: 

122 valid_methods.append("nearest") 

123 expecting = "pad (ffill), backfill (bfill) or nearest" 

124 if method not in valid_methods: 

125 raise ValueError(f"Invalid fill method. Expecting {expecting}. Got {method}") 

126 return method 

127 

128 

129# interpolation methods that dispatch to np.interp 

130 

131NP_METHODS = ["linear", "time", "index", "values"] 

132 

133# interpolation methods that dispatch to _interpolate_scipy_wrapper 

134 

135SP_METHODS = [ 

136 "nearest", 

137 "zero", 

138 "slinear", 

139 "quadratic", 

140 "cubic", 

141 "barycentric", 

142 "krogh", 

143 "spline", 

144 "polynomial", 

145 "from_derivatives", 

146 "piecewise_polynomial", 

147 "pchip", 

148 "akima", 

149 "cubicspline", 

150] 

151 

152 

153def clean_interp_method(method: str, index: Index, **kwargs) -> str: 

154 order = kwargs.get("order") 

155 

156 if method in ("spline", "polynomial") and order is None: 

157 raise ValueError("You must specify the order of the spline or polynomial.") 

158 

159 valid = NP_METHODS + SP_METHODS 

160 if method not in valid: 

161 raise ValueError(f"method must be one of {valid}. Got '{method}' instead.") 

162 

163 if method in ("krogh", "piecewise_polynomial", "pchip"): 

164 if not index.is_monotonic_increasing: 

165 raise ValueError( 

166 f"{method} interpolation requires that the index be monotonic." 

167 ) 

168 

169 return method 

170 

171 

172def find_valid_index(values, *, how: str) -> int | None: 

173 """ 

174 Retrieves the index of the first valid value. 

175 

176 Parameters 

177 ---------- 

178 values : ndarray or ExtensionArray 

179 how : {'first', 'last'} 

180 Use this parameter to change between the first or last valid index. 

181 

182 Returns 

183 ------- 

184 int or None 

185 """ 

186 assert how in ["first", "last"] 

187 

188 if len(values) == 0: # early stop 

189 return None 

190 

191 is_valid = ~isna(values) 

192 

193 if values.ndim == 2: 

194 is_valid = is_valid.any(axis=1) # reduce axis 1 

195 

196 if how == "first": 

197 idxpos = is_valid[::].argmax() 

198 

199 elif how == "last": 

200 idxpos = len(values) - 1 - is_valid[::-1].argmax() 

201 

202 chk_notna = is_valid[idxpos] 

203 

204 if not chk_notna: 

205 return None 

206 return idxpos 

207 

208 

209def interpolate_array_2d( 

210 data: np.ndarray, 

211 method: str = "pad", 

212 axis: int = 0, 

213 index: Index | None = None, 

214 limit: int | None = None, 

215 limit_direction: str = "forward", 

216 limit_area: str | None = None, 

217 fill_value: Any | None = None, 

218 coerce: bool = False, 

219 downcast: str | None = None, 

220 **kwargs, 

221) -> None: 

222 """ 

223 Wrapper to dispatch to either interpolate_2d or _interpolate_2d_with_fill. 

224 

225 Notes 

226 ----- 

227 Alters 'data' in-place. 

228 """ 

229 try: 

230 m = clean_fill_method(method) 

231 except ValueError: 

232 m = None 

233 

234 if m is not None: 

235 if fill_value is not None: 

236 # similar to validate_fillna_kwargs 

237 raise ValueError("Cannot pass both fill_value and method") 

238 

239 interpolate_2d( 

240 data, 

241 method=m, 

242 axis=axis, 

243 limit=limit, 

244 limit_area=limit_area, 

245 ) 

246 else: 

247 assert index is not None # for mypy 

248 

249 _interpolate_2d_with_fill( 

250 data=data, 

251 index=index, 

252 axis=axis, 

253 method=method, 

254 limit=limit, 

255 limit_direction=limit_direction, 

256 limit_area=limit_area, 

257 fill_value=fill_value, 

258 **kwargs, 

259 ) 

260 return 

261 

262 

263def _interpolate_2d_with_fill( 

264 data: np.ndarray, # floating dtype 

265 index: Index, 

266 axis: int, 

267 method: str = "linear", 

268 limit: int | None = None, 

269 limit_direction: str = "forward", 

270 limit_area: str | None = None, 

271 fill_value: Any | None = None, 

272 **kwargs, 

273) -> None: 

274 """ 

275 Column-wise application of _interpolate_1d. 

276 

277 Notes 

278 ----- 

279 Alters 'data' in-place. 

280 

281 The signature does differ from _interpolate_1d because it only 

282 includes what is needed for Block.interpolate. 

283 """ 

284 # validate the interp method 

285 clean_interp_method(method, index, **kwargs) 

286 

287 if is_valid_na_for_dtype(fill_value, data.dtype): 

288 fill_value = na_value_for_dtype(data.dtype, compat=False) 

289 

290 if method == "time": 

291 if not needs_i8_conversion(index.dtype): 

292 raise ValueError( 

293 "time-weighted interpolation only works " 

294 "on Series or DataFrames with a " 

295 "DatetimeIndex" 

296 ) 

297 method = "values" 

298 

299 valid_limit_directions = ["forward", "backward", "both"] 

300 limit_direction = limit_direction.lower() 

301 if limit_direction not in valid_limit_directions: 

302 raise ValueError( 

303 "Invalid limit_direction: expecting one of " 

304 f"{valid_limit_directions}, got '{limit_direction}'." 

305 ) 

306 

307 if limit_area is not None: 

308 valid_limit_areas = ["inside", "outside"] 

309 limit_area = limit_area.lower() 

310 if limit_area not in valid_limit_areas: 

311 raise ValueError( 

312 f"Invalid limit_area: expecting one of {valid_limit_areas}, got " 

313 f"{limit_area}." 

314 ) 

315 

316 # default limit is unlimited GH #16282 

317 limit = algos.validate_limit(nobs=None, limit=limit) 

318 

319 indices = _index_to_interp_indices(index, method) 

320 

321 def func(yvalues: np.ndarray) -> None: 

322 # process 1-d slices in the axis direction 

323 

324 _interpolate_1d( 

325 indices=indices, 

326 yvalues=yvalues, 

327 method=method, 

328 limit=limit, 

329 limit_direction=limit_direction, 

330 limit_area=limit_area, 

331 fill_value=fill_value, 

332 bounds_error=False, 

333 **kwargs, 

334 ) 

335 

336 # error: Argument 1 to "apply_along_axis" has incompatible type 

337 # "Callable[[ndarray[Any, Any]], None]"; expected "Callable[..., 

338 # Union[_SupportsArray[dtype[<nothing>]], Sequence[_SupportsArray 

339 # [dtype[<nothing>]]], Sequence[Sequence[_SupportsArray[dtype[<nothing>]]]], 

340 # Sequence[Sequence[Sequence[_SupportsArray[dtype[<nothing>]]]]], 

341 # Sequence[Sequence[Sequence[Sequence[_SupportsArray[dtype[<nothing>]]]]]]]]" 

342 np.apply_along_axis(func, axis, data) # type: ignore[arg-type] 

343 return 

344 

345 

346def _index_to_interp_indices(index: Index, method: str) -> np.ndarray: 

347 """ 

348 Convert Index to ndarray of indices to pass to NumPy/SciPy. 

349 """ 

350 xarr = index._values 

351 if needs_i8_conversion(xarr.dtype): 

352 # GH#1646 for dt64tz 

353 xarr = xarr.view("i8") 

354 

355 if method == "linear": 

356 inds = xarr 

357 inds = cast(np.ndarray, inds) 

358 else: 

359 inds = np.asarray(xarr) 

360 

361 if method in ("values", "index"): 

362 if inds.dtype == np.object_: 

363 inds = lib.maybe_convert_objects(inds) 

364 

365 return inds 

366 

367 

368def _interpolate_1d( 

369 indices: np.ndarray, 

370 yvalues: np.ndarray, 

371 method: str | None = "linear", 

372 limit: int | None = None, 

373 limit_direction: str = "forward", 

374 limit_area: str | None = None, 

375 fill_value: Any | None = None, 

376 bounds_error: bool = False, 

377 order: int | None = None, 

378 **kwargs, 

379): 

380 """ 

381 Logic for the 1-d interpolation. The input 

382 indices and yvalues will each be 1-d arrays of the same length. 

383 

384 Bounds_error is currently hardcoded to False since non-scipy ones don't 

385 take it as an argument. 

386 

387 Notes 

388 ----- 

389 Fills 'yvalues' in-place. 

390 """ 

391 

392 invalid = isna(yvalues) 

393 valid = ~invalid 

394 

395 if not valid.any(): 

396 return 

397 

398 if valid.all(): 

399 return 

400 

401 # These are sets of index pointers to invalid values... i.e. {0, 1, etc... 

402 all_nans = set(np.flatnonzero(invalid)) 

403 

404 first_valid_index = find_valid_index(yvalues, how="first") 

405 if first_valid_index is None: # no nan found in start 

406 first_valid_index = 0 

407 start_nans = set(range(first_valid_index)) 

408 

409 last_valid_index = find_valid_index(yvalues, how="last") 

410 if last_valid_index is None: # no nan found in end 

411 last_valid_index = len(yvalues) 

412 end_nans = set(range(1 + last_valid_index, len(valid))) 

413 

414 # Like the sets above, preserve_nans contains indices of invalid values, 

415 # but in this case, it is the final set of indices that need to be 

416 # preserved as NaN after the interpolation. 

417 

418 # For example if limit_direction='forward' then preserve_nans will 

419 # contain indices of NaNs at the beginning of the series, and NaNs that 

420 # are more than'limit' away from the prior non-NaN. 

421 

422 # set preserve_nans based on direction using _interp_limit 

423 preserve_nans: list | set 

424 if limit_direction == "forward": 

425 preserve_nans = start_nans | set(_interp_limit(invalid, limit, 0)) 

426 elif limit_direction == "backward": 

427 preserve_nans = end_nans | set(_interp_limit(invalid, 0, limit)) 

428 else: 

429 # both directions... just use _interp_limit 

430 preserve_nans = set(_interp_limit(invalid, limit, limit)) 

431 

432 # if limit_area is set, add either mid or outside indices 

433 # to preserve_nans GH #16284 

434 if limit_area == "inside": 

435 # preserve NaNs on the outside 

436 preserve_nans |= start_nans | end_nans 

437 elif limit_area == "outside": 

438 # preserve NaNs on the inside 

439 mid_nans = all_nans - start_nans - end_nans 

440 preserve_nans |= mid_nans 

441 

442 # sort preserve_nans and convert to list 

443 preserve_nans = sorted(preserve_nans) 

444 

445 if method in NP_METHODS: 

446 # np.interp requires sorted X values, #21037 

447 

448 indexer = np.argsort(indices[valid]) 

449 yvalues[invalid] = np.interp( 

450 indices[invalid], indices[valid][indexer], yvalues[valid][indexer] 

451 ) 

452 else: 

453 yvalues[invalid] = _interpolate_scipy_wrapper( 

454 indices[valid], 

455 yvalues[valid], 

456 indices[invalid], 

457 method=method, 

458 fill_value=fill_value, 

459 bounds_error=bounds_error, 

460 order=order, 

461 **kwargs, 

462 ) 

463 

464 yvalues[preserve_nans] = np.nan 

465 return 

466 

467 

468def _interpolate_scipy_wrapper( 

469 x, y, new_x, method, fill_value=None, bounds_error=False, order=None, **kwargs 

470): 

471 """ 

472 Passed off to scipy.interpolate.interp1d. method is scipy's kind. 

473 Returns an array interpolated at new_x. Add any new methods to 

474 the list in _clean_interp_method. 

475 """ 

476 extra = f"{method} interpolation requires SciPy." 

477 import_optional_dependency("scipy", extra=extra) 

478 from scipy import interpolate 

479 

480 new_x = np.asarray(new_x) 

481 

482 # ignores some kwargs that could be passed along. 

483 alt_methods = { 

484 "barycentric": interpolate.barycentric_interpolate, 

485 "krogh": interpolate.krogh_interpolate, 

486 "from_derivatives": _from_derivatives, 

487 "piecewise_polynomial": _from_derivatives, 

488 } 

489 

490 if getattr(x, "_is_all_dates", False): 

491 # GH 5975, scipy.interp1d can't handle datetime64s 

492 x, new_x = x._values.astype("i8"), new_x.astype("i8") 

493 

494 if method == "pchip": 

495 alt_methods["pchip"] = interpolate.pchip_interpolate 

496 elif method == "akima": 

497 alt_methods["akima"] = _akima_interpolate 

498 elif method == "cubicspline": 

499 alt_methods["cubicspline"] = _cubicspline_interpolate 

500 

501 interp1d_methods = [ 

502 "nearest", 

503 "zero", 

504 "slinear", 

505 "quadratic", 

506 "cubic", 

507 "polynomial", 

508 ] 

509 if method in interp1d_methods: 

510 if method == "polynomial": 

511 method = order 

512 terp = interpolate.interp1d( 

513 x, y, kind=method, fill_value=fill_value, bounds_error=bounds_error 

514 ) 

515 new_y = terp(new_x) 

516 elif method == "spline": 

517 # GH #10633, #24014 

518 if isna(order) or (order <= 0): 

519 raise ValueError( 

520 f"order needs to be specified and greater than 0; got order: {order}" 

521 ) 

522 terp = interpolate.UnivariateSpline(x, y, k=order, **kwargs) 

523 new_y = terp(new_x) 

524 else: 

525 # GH 7295: need to be able to write for some reason 

526 # in some circumstances: check all three 

527 if not x.flags.writeable: 

528 x = x.copy() 

529 if not y.flags.writeable: 

530 y = y.copy() 

531 if not new_x.flags.writeable: 

532 new_x = new_x.copy() 

533 method = alt_methods[method] 

534 new_y = method(x, y, new_x, **kwargs) 

535 return new_y 

536 

537 

538def _from_derivatives(xi, yi, x, order=None, der=0, extrapolate=False): 

539 """ 

540 Convenience function for interpolate.BPoly.from_derivatives. 

541 

542 Construct a piecewise polynomial in the Bernstein basis, compatible 

543 with the specified values and derivatives at breakpoints. 

544 

545 Parameters 

546 ---------- 

547 xi : array-like 

548 sorted 1D array of x-coordinates 

549 yi : array-like or list of array-likes 

550 yi[i][j] is the j-th derivative known at xi[i] 

551 order: None or int or array-like of ints. Default: None. 

552 Specifies the degree of local polynomials. If not None, some 

553 derivatives are ignored. 

554 der : int or list 

555 How many derivatives to extract; None for all potentially nonzero 

556 derivatives (that is a number equal to the number of points), or a 

557 list of derivatives to extract. This number includes the function 

558 value as 0th derivative. 

559 extrapolate : bool, optional 

560 Whether to extrapolate to ouf-of-bounds points based on first and last 

561 intervals, or to return NaNs. Default: True. 

562 

563 See Also 

564 -------- 

565 scipy.interpolate.BPoly.from_derivatives 

566 

567 Returns 

568 ------- 

569 y : scalar or array-like 

570 The result, of length R or length M or M by R. 

571 """ 

572 from scipy import interpolate 

573 

574 # return the method for compat with scipy version & backwards compat 

575 method = interpolate.BPoly.from_derivatives 

576 m = method(xi, yi.reshape(-1, 1), orders=order, extrapolate=extrapolate) 

577 

578 return m(x) 

579 

580 

581def _akima_interpolate(xi, yi, x, der=0, axis=0): 

582 """ 

583 Convenience function for akima interpolation. 

584 xi and yi are arrays of values used to approximate some function f, 

585 with ``yi = f(xi)``. 

586 

587 See `Akima1DInterpolator` for details. 

588 

589 Parameters 

590 ---------- 

591 xi : array-like 

592 A sorted list of x-coordinates, of length N. 

593 yi : array-like 

594 A 1-D array of real values. `yi`'s length along the interpolation 

595 axis must be equal to the length of `xi`. If N-D array, use axis 

596 parameter to select correct axis. 

597 x : scalar or array-like 

598 Of length M. 

599 der : int, optional 

600 How many derivatives to extract; None for all potentially 

601 nonzero derivatives (that is a number equal to the number 

602 of points), or a list of derivatives to extract. This number 

603 includes the function value as 0th derivative. 

604 axis : int, optional 

605 Axis in the yi array corresponding to the x-coordinate values. 

606 

607 See Also 

608 -------- 

609 scipy.interpolate.Akima1DInterpolator 

610 

611 Returns 

612 ------- 

613 y : scalar or array-like 

614 The result, of length R or length M or M by R, 

615 

616 """ 

617 from scipy import interpolate 

618 

619 P = interpolate.Akima1DInterpolator(xi, yi, axis=axis) 

620 

621 return P(x, nu=der) 

622 

623 

624def _cubicspline_interpolate(xi, yi, x, axis=0, bc_type="not-a-knot", extrapolate=None): 

625 """ 

626 Convenience function for cubic spline data interpolator. 

627 

628 See `scipy.interpolate.CubicSpline` for details. 

629 

630 Parameters 

631 ---------- 

632 xi : array-like, shape (n,) 

633 1-d array containing values of the independent variable. 

634 Values must be real, finite and in strictly increasing order. 

635 yi : array-like 

636 Array containing values of the dependent variable. It can have 

637 arbitrary number of dimensions, but the length along ``axis`` 

638 (see below) must match the length of ``x``. Values must be finite. 

639 x : scalar or array-like, shape (m,) 

640 axis : int, optional 

641 Axis along which `y` is assumed to be varying. Meaning that for 

642 ``x[i]`` the corresponding values are ``np.take(y, i, axis=axis)``. 

643 Default is 0. 

644 bc_type : string or 2-tuple, optional 

645 Boundary condition type. Two additional equations, given by the 

646 boundary conditions, are required to determine all coefficients of 

647 polynomials on each segment [2]_. 

648 If `bc_type` is a string, then the specified condition will be applied 

649 at both ends of a spline. Available conditions are: 

650 * 'not-a-knot' (default): The first and second segment at a curve end 

651 are the same polynomial. It is a good default when there is no 

652 information on boundary conditions. 

653 * 'periodic': The interpolated functions is assumed to be periodic 

654 of period ``x[-1] - x[0]``. The first and last value of `y` must be 

655 identical: ``y[0] == y[-1]``. This boundary condition will result in 

656 ``y'[0] == y'[-1]`` and ``y''[0] == y''[-1]``. 

657 * 'clamped': The first derivative at curves ends are zero. Assuming 

658 a 1D `y`, ``bc_type=((1, 0.0), (1, 0.0))`` is the same condition. 

659 * 'natural': The second derivative at curve ends are zero. Assuming 

660 a 1D `y`, ``bc_type=((2, 0.0), (2, 0.0))`` is the same condition. 

661 If `bc_type` is a 2-tuple, the first and the second value will be 

662 applied at the curve start and end respectively. The tuple values can 

663 be one of the previously mentioned strings (except 'periodic') or a 

664 tuple `(order, deriv_values)` allowing to specify arbitrary 

665 derivatives at curve ends: 

666 * `order`: the derivative order, 1 or 2. 

667 * `deriv_value`: array-like containing derivative values, shape must 

668 be the same as `y`, excluding ``axis`` dimension. For example, if 

669 `y` is 1D, then `deriv_value` must be a scalar. If `y` is 3D with 

670 the shape (n0, n1, n2) and axis=2, then `deriv_value` must be 2D 

671 and have the shape (n0, n1). 

672 extrapolate : {bool, 'periodic', None}, optional 

673 If bool, determines whether to extrapolate to out-of-bounds points 

674 based on first and last intervals, or to return NaNs. If 'periodic', 

675 periodic extrapolation is used. If None (default), ``extrapolate`` is 

676 set to 'periodic' for ``bc_type='periodic'`` and to True otherwise. 

677 

678 See Also 

679 -------- 

680 scipy.interpolate.CubicHermiteSpline 

681 

682 Returns 

683 ------- 

684 y : scalar or array-like 

685 The result, of shape (m,) 

686 

687 References 

688 ---------- 

689 .. [1] `Cubic Spline Interpolation 

690 <https://en.wikiversity.org/wiki/Cubic_Spline_Interpolation>`_ 

691 on Wikiversity. 

692 .. [2] Carl de Boor, "A Practical Guide to Splines", Springer-Verlag, 1978. 

693 """ 

694 from scipy import interpolate 

695 

696 P = interpolate.CubicSpline( 

697 xi, yi, axis=axis, bc_type=bc_type, extrapolate=extrapolate 

698 ) 

699 

700 return P(x) 

701 

702 

703def _interpolate_with_limit_area( 

704 values: np.ndarray, method: str, limit: int | None, limit_area: str | None 

705) -> None: 

706 """ 

707 Apply interpolation and limit_area logic to values along a to-be-specified axis. 

708 

709 Parameters 

710 ---------- 

711 values: np.ndarray 

712 Input array. 

713 method: str 

714 Interpolation method. Could be "bfill" or "pad" 

715 limit: int, optional 

716 Index limit on interpolation. 

717 limit_area: str 

718 Limit area for interpolation. Can be "inside" or "outside" 

719 

720 Notes 

721 ----- 

722 Modifies values in-place. 

723 """ 

724 

725 invalid = isna(values) 

726 

727 if not invalid.all(): 

728 first = find_valid_index(values, how="first") 

729 if first is None: 

730 first = 0 

731 last = find_valid_index(values, how="last") 

732 if last is None: 

733 last = len(values) 

734 

735 interpolate_2d( 

736 values, 

737 method=method, 

738 limit=limit, 

739 ) 

740 

741 if limit_area == "inside": 

742 invalid[first : last + 1] = False 

743 elif limit_area == "outside": 

744 invalid[:first] = invalid[last + 1 :] = False 

745 

746 values[invalid] = np.nan 

747 

748 return 

749 

750 

751def interpolate_2d( 

752 values: np.ndarray, 

753 method: str = "pad", 

754 axis: Axis = 0, 

755 limit: int | None = None, 

756 limit_area: str | None = None, 

757) -> None: 

758 """ 

759 Perform an actual interpolation of values, values will be make 2-d if 

760 needed fills inplace, returns the result. 

761 

762 Parameters 

763 ---------- 

764 values: np.ndarray 

765 Input array. 

766 method: str, default "pad" 

767 Interpolation method. Could be "bfill" or "pad" 

768 axis: 0 or 1 

769 Interpolation axis 

770 limit: int, optional 

771 Index limit on interpolation. 

772 limit_area: str, optional 

773 Limit area for interpolation. Can be "inside" or "outside" 

774 

775 Notes 

776 ----- 

777 Modifies values in-place. 

778 """ 

779 if limit_area is not None: 

780 np.apply_along_axis( 

781 # error: Argument 1 to "apply_along_axis" has incompatible type 

782 # "partial[None]"; expected 

783 # "Callable[..., Union[_SupportsArray[dtype[<nothing>]], 

784 # Sequence[_SupportsArray[dtype[<nothing>]]], 

785 # Sequence[Sequence[_SupportsArray[dtype[<nothing>]]]], 

786 # Sequence[Sequence[Sequence[_SupportsArray[dtype[<nothing>]]]]], 

787 # Sequence[Sequence[Sequence[Sequence[_ 

788 # SupportsArray[dtype[<nothing>]]]]]]]]" 

789 partial( # type: ignore[arg-type] 

790 _interpolate_with_limit_area, 

791 method=method, 

792 limit=limit, 

793 limit_area=limit_area, 

794 ), 

795 # error: Argument 2 to "apply_along_axis" has incompatible type 

796 # "Union[str, int]"; expected "SupportsIndex" 

797 axis, # type: ignore[arg-type] 

798 values, 

799 ) 

800 return 

801 

802 transf = (lambda x: x) if axis == 0 else (lambda x: x.T) 

803 

804 # reshape a 1 dim if needed 

805 if values.ndim == 1: 

806 if axis != 0: # pragma: no cover 

807 raise AssertionError("cannot interpolate on a ndim == 1 with axis != 0") 

808 values = values.reshape(tuple((1,) + values.shape)) 

809 

810 method = clean_fill_method(method) 

811 tvalues = transf(values) 

812 

813 # _pad_2d and _backfill_2d both modify tvalues inplace 

814 if method == "pad": 

815 _pad_2d(tvalues, limit=limit) 

816 else: 

817 _backfill_2d(tvalues, limit=limit) 

818 

819 return 

820 

821 

822def _fillna_prep( 

823 values, mask: npt.NDArray[np.bool_] | None = None 

824) -> npt.NDArray[np.bool_]: 

825 # boilerplate for _pad_1d, _backfill_1d, _pad_2d, _backfill_2d 

826 

827 if mask is None: 

828 mask = isna(values) 

829 

830 mask = mask.view(np.uint8) 

831 return mask 

832 

833 

834def _datetimelike_compat(func: F) -> F: 

835 """ 

836 Wrapper to handle datetime64 and timedelta64 dtypes. 

837 """ 

838 

839 @wraps(func) 

840 def new_func(values, limit=None, mask=None): 

841 if needs_i8_conversion(values.dtype): 

842 if mask is None: 

843 # This needs to occur before casting to int64 

844 mask = isna(values) 

845 

846 result, mask = func(values.view("i8"), limit=limit, mask=mask) 

847 return result.view(values.dtype), mask 

848 

849 return func(values, limit=limit, mask=mask) 

850 

851 return cast(F, new_func) 

852 

853 

854@_datetimelike_compat 

855def _pad_1d( 

856 values: np.ndarray, 

857 limit: int | None = None, 

858 mask: npt.NDArray[np.bool_] | None = None, 

859) -> tuple[np.ndarray, npt.NDArray[np.bool_]]: 

860 mask = _fillna_prep(values, mask) 

861 algos.pad_inplace(values, mask, limit=limit) 

862 return values, mask 

863 

864 

865@_datetimelike_compat 

866def _backfill_1d( 

867 values: np.ndarray, 

868 limit: int | None = None, 

869 mask: npt.NDArray[np.bool_] | None = None, 

870) -> tuple[np.ndarray, npt.NDArray[np.bool_]]: 

871 mask = _fillna_prep(values, mask) 

872 algos.backfill_inplace(values, mask, limit=limit) 

873 return values, mask 

874 

875 

876@_datetimelike_compat 

877def _pad_2d(values: np.ndarray, limit=None, mask: npt.NDArray[np.bool_] | None = None): 

878 mask = _fillna_prep(values, mask) 

879 

880 if np.all(values.shape): 

881 algos.pad_2d_inplace(values, mask, limit=limit) 

882 else: 

883 # for test coverage 

884 pass 

885 return values, mask 

886 

887 

888@_datetimelike_compat 

889def _backfill_2d(values, limit=None, mask: npt.NDArray[np.bool_] | None = None): 

890 mask = _fillna_prep(values, mask) 

891 

892 if np.all(values.shape): 

893 algos.backfill_2d_inplace(values, mask, limit=limit) 

894 else: 

895 # for test coverage 

896 pass 

897 return values, mask 

898 

899 

900_fill_methods = {"pad": _pad_1d, "backfill": _backfill_1d} 

901 

902 

903def get_fill_func(method, ndim: int = 1): 

904 method = clean_fill_method(method) 

905 if ndim == 1: 

906 return _fill_methods[method] 

907 return {"pad": _pad_2d, "backfill": _backfill_2d}[method] 

908 

909 

910def clean_reindex_fill_method(method) -> str | None: 

911 return clean_fill_method(method, allow_nearest=True) 

912 

913 

914def _interp_limit(invalid: npt.NDArray[np.bool_], fw_limit, bw_limit): 

915 """ 

916 Get indexers of values that won't be filled 

917 because they exceed the limits. 

918 

919 Parameters 

920 ---------- 

921 invalid : np.ndarray[bool] 

922 fw_limit : int or None 

923 forward limit to index 

924 bw_limit : int or None 

925 backward limit to index 

926 

927 Returns 

928 ------- 

929 set of indexers 

930 

931 Notes 

932 ----- 

933 This is equivalent to the more readable, but slower 

934 

935 .. code-block:: python 

936 

937 def _interp_limit(invalid, fw_limit, bw_limit): 

938 for x in np.where(invalid)[0]: 

939 if invalid[max(0, x - fw_limit):x + bw_limit + 1].all(): 

940 yield x 

941 """ 

942 # handle forward first; the backward direction is the same except 

943 # 1. operate on the reversed array 

944 # 2. subtract the returned indices from N - 1 

945 N = len(invalid) 

946 f_idx = set() 

947 b_idx = set() 

948 

949 def inner(invalid, limit): 

950 limit = min(limit, N) 

951 windowed = _rolling_window(invalid, limit + 1).all(1) 

952 idx = set(np.where(windowed)[0] + limit) | set( 

953 np.where((~invalid[: limit + 1]).cumsum() == 0)[0] 

954 ) 

955 return idx 

956 

957 if fw_limit is not None: 

958 

959 if fw_limit == 0: 

960 f_idx = set(np.where(invalid)[0]) 

961 else: 

962 f_idx = inner(invalid, fw_limit) 

963 

964 if bw_limit is not None: 

965 

966 if bw_limit == 0: 

967 # then we don't even need to care about backwards 

968 # just use forwards 

969 return f_idx 

970 else: 

971 b_idx_inv = list(inner(invalid[::-1], bw_limit)) 

972 b_idx = set(N - 1 - np.asarray(b_idx_inv)) 

973 if fw_limit == 0: 

974 return b_idx 

975 

976 return f_idx & b_idx 

977 

978 

979def _rolling_window(a: npt.NDArray[np.bool_], window: int) -> npt.NDArray[np.bool_]: 

980 """ 

981 [True, True, False, True, False], 2 -> 

982 

983 [ 

984 [True, True], 

985 [True, False], 

986 [False, True], 

987 [True, False], 

988 ] 

989 """ 

990 # https://stackoverflow.com/a/6811241 

991 shape = a.shape[:-1] + (a.shape[-1] - window + 1, window) 

992 strides = a.strides + (a.strides[-1],) 

993 return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)