Coverage for /var/srv/projects/api.amasfac.comuna18.com/tmp/venv/lib/python3.9/site-packages/pandas/core/missing.py: 10%
307 statements
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
« prev ^ index » next coverage.py v6.4.4, created at 2023-07-17 14:22 -0600
1"""
2Routines for filling missing data.
3"""
4from __future__ import annotations
6from functools import (
7 partial,
8 wraps,
9)
10from typing import (
11 TYPE_CHECKING,
12 Any,
13 cast,
14)
16import numpy as np
18from pandas._libs import (
19 algos,
20 lib,
21)
22from pandas._typing import (
23 ArrayLike,
24 Axis,
25 F,
26 npt,
27)
28from pandas.compat._optional import import_optional_dependency
30from pandas.core.dtypes.cast import infer_dtype_from
31from pandas.core.dtypes.common import (
32 is_array_like,
33 is_numeric_v_string_like,
34 needs_i8_conversion,
35)
36from pandas.core.dtypes.missing import (
37 is_valid_na_for_dtype,
38 isna,
39 na_value_for_dtype,
40)
42if TYPE_CHECKING: 42 ↛ 43line 42 didn't jump to line 43, because the condition on line 42 was never true
43 from pandas import Index
46def check_value_size(value, mask: npt.NDArray[np.bool_], length: int):
47 """
48 Validate the size of the values passed to ExtensionArray.fillna.
49 """
50 if is_array_like(value):
51 if len(value) != length:
52 raise ValueError(
53 f"Length of 'value' does not match. Got ({len(value)}) "
54 f" expected {length}"
55 )
56 value = value[mask]
58 return value
61def mask_missing(arr: ArrayLike, values_to_mask) -> npt.NDArray[np.bool_]:
62 """
63 Return a masking array of same size/shape as arr
64 with entries equaling any member of values_to_mask set to True
66 Parameters
67 ----------
68 arr : ArrayLike
69 values_to_mask: list, tuple, or scalar
71 Returns
72 -------
73 np.ndarray[bool]
74 """
75 # When called from Block.replace/replace_list, values_to_mask is a scalar
76 # known to be holdable by arr.
77 # When called from Series._single_replace, values_to_mask is tuple or list
78 dtype, values_to_mask = infer_dtype_from(values_to_mask)
79 # error: Argument "dtype" to "array" has incompatible type "Union[dtype[Any],
80 # ExtensionDtype]"; expected "Union[dtype[Any], None, type, _SupportsDType, str,
81 # Union[Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]], List[Any],
82 # _DTypeDict, Tuple[Any, Any]]]"
83 values_to_mask = np.array(values_to_mask, dtype=dtype) # type: ignore[arg-type]
85 na_mask = isna(values_to_mask)
86 nonna = values_to_mask[~na_mask]
88 # GH 21977
89 mask = np.zeros(arr.shape, dtype=bool)
90 for x in nonna:
91 if is_numeric_v_string_like(arr, x):
92 # GH#29553 prevent numpy deprecation warnings
93 pass
94 else:
95 new_mask = arr == x
96 if not isinstance(new_mask, np.ndarray):
97 # usually BooleanArray
98 new_mask = new_mask.to_numpy(dtype=bool, na_value=False)
99 mask |= new_mask
101 if na_mask.any():
102 mask |= isna(arr)
104 return mask
107def clean_fill_method(method: str | None, allow_nearest: bool = False):
108 # asfreq is compat for resampling
109 if method in [None, "asfreq"]:
110 return None
112 if isinstance(method, str):
113 method = method.lower()
114 if method == "ffill":
115 method = "pad"
116 elif method == "bfill":
117 method = "backfill"
119 valid_methods = ["pad", "backfill"]
120 expecting = "pad (ffill) or backfill (bfill)"
121 if allow_nearest:
122 valid_methods.append("nearest")
123 expecting = "pad (ffill), backfill (bfill) or nearest"
124 if method not in valid_methods:
125 raise ValueError(f"Invalid fill method. Expecting {expecting}. Got {method}")
126 return method
129# interpolation methods that dispatch to np.interp
131NP_METHODS = ["linear", "time", "index", "values"]
133# interpolation methods that dispatch to _interpolate_scipy_wrapper
135SP_METHODS = [
136 "nearest",
137 "zero",
138 "slinear",
139 "quadratic",
140 "cubic",
141 "barycentric",
142 "krogh",
143 "spline",
144 "polynomial",
145 "from_derivatives",
146 "piecewise_polynomial",
147 "pchip",
148 "akima",
149 "cubicspline",
150]
153def clean_interp_method(method: str, index: Index, **kwargs) -> str:
154 order = kwargs.get("order")
156 if method in ("spline", "polynomial") and order is None:
157 raise ValueError("You must specify the order of the spline or polynomial.")
159 valid = NP_METHODS + SP_METHODS
160 if method not in valid:
161 raise ValueError(f"method must be one of {valid}. Got '{method}' instead.")
163 if method in ("krogh", "piecewise_polynomial", "pchip"):
164 if not index.is_monotonic_increasing:
165 raise ValueError(
166 f"{method} interpolation requires that the index be monotonic."
167 )
169 return method
172def find_valid_index(values, *, how: str) -> int | None:
173 """
174 Retrieves the index of the first valid value.
176 Parameters
177 ----------
178 values : ndarray or ExtensionArray
179 how : {'first', 'last'}
180 Use this parameter to change between the first or last valid index.
182 Returns
183 -------
184 int or None
185 """
186 assert how in ["first", "last"]
188 if len(values) == 0: # early stop
189 return None
191 is_valid = ~isna(values)
193 if values.ndim == 2:
194 is_valid = is_valid.any(axis=1) # reduce axis 1
196 if how == "first":
197 idxpos = is_valid[::].argmax()
199 elif how == "last":
200 idxpos = len(values) - 1 - is_valid[::-1].argmax()
202 chk_notna = is_valid[idxpos]
204 if not chk_notna:
205 return None
206 return idxpos
209def interpolate_array_2d(
210 data: np.ndarray,
211 method: str = "pad",
212 axis: int = 0,
213 index: Index | None = None,
214 limit: int | None = None,
215 limit_direction: str = "forward",
216 limit_area: str | None = None,
217 fill_value: Any | None = None,
218 coerce: bool = False,
219 downcast: str | None = None,
220 **kwargs,
221) -> None:
222 """
223 Wrapper to dispatch to either interpolate_2d or _interpolate_2d_with_fill.
225 Notes
226 -----
227 Alters 'data' in-place.
228 """
229 try:
230 m = clean_fill_method(method)
231 except ValueError:
232 m = None
234 if m is not None:
235 if fill_value is not None:
236 # similar to validate_fillna_kwargs
237 raise ValueError("Cannot pass both fill_value and method")
239 interpolate_2d(
240 data,
241 method=m,
242 axis=axis,
243 limit=limit,
244 limit_area=limit_area,
245 )
246 else:
247 assert index is not None # for mypy
249 _interpolate_2d_with_fill(
250 data=data,
251 index=index,
252 axis=axis,
253 method=method,
254 limit=limit,
255 limit_direction=limit_direction,
256 limit_area=limit_area,
257 fill_value=fill_value,
258 **kwargs,
259 )
260 return
263def _interpolate_2d_with_fill(
264 data: np.ndarray, # floating dtype
265 index: Index,
266 axis: int,
267 method: str = "linear",
268 limit: int | None = None,
269 limit_direction: str = "forward",
270 limit_area: str | None = None,
271 fill_value: Any | None = None,
272 **kwargs,
273) -> None:
274 """
275 Column-wise application of _interpolate_1d.
277 Notes
278 -----
279 Alters 'data' in-place.
281 The signature does differ from _interpolate_1d because it only
282 includes what is needed for Block.interpolate.
283 """
284 # validate the interp method
285 clean_interp_method(method, index, **kwargs)
287 if is_valid_na_for_dtype(fill_value, data.dtype):
288 fill_value = na_value_for_dtype(data.dtype, compat=False)
290 if method == "time":
291 if not needs_i8_conversion(index.dtype):
292 raise ValueError(
293 "time-weighted interpolation only works "
294 "on Series or DataFrames with a "
295 "DatetimeIndex"
296 )
297 method = "values"
299 valid_limit_directions = ["forward", "backward", "both"]
300 limit_direction = limit_direction.lower()
301 if limit_direction not in valid_limit_directions:
302 raise ValueError(
303 "Invalid limit_direction: expecting one of "
304 f"{valid_limit_directions}, got '{limit_direction}'."
305 )
307 if limit_area is not None:
308 valid_limit_areas = ["inside", "outside"]
309 limit_area = limit_area.lower()
310 if limit_area not in valid_limit_areas:
311 raise ValueError(
312 f"Invalid limit_area: expecting one of {valid_limit_areas}, got "
313 f"{limit_area}."
314 )
316 # default limit is unlimited GH #16282
317 limit = algos.validate_limit(nobs=None, limit=limit)
319 indices = _index_to_interp_indices(index, method)
321 def func(yvalues: np.ndarray) -> None:
322 # process 1-d slices in the axis direction
324 _interpolate_1d(
325 indices=indices,
326 yvalues=yvalues,
327 method=method,
328 limit=limit,
329 limit_direction=limit_direction,
330 limit_area=limit_area,
331 fill_value=fill_value,
332 bounds_error=False,
333 **kwargs,
334 )
336 # error: Argument 1 to "apply_along_axis" has incompatible type
337 # "Callable[[ndarray[Any, Any]], None]"; expected "Callable[...,
338 # Union[_SupportsArray[dtype[<nothing>]], Sequence[_SupportsArray
339 # [dtype[<nothing>]]], Sequence[Sequence[_SupportsArray[dtype[<nothing>]]]],
340 # Sequence[Sequence[Sequence[_SupportsArray[dtype[<nothing>]]]]],
341 # Sequence[Sequence[Sequence[Sequence[_SupportsArray[dtype[<nothing>]]]]]]]]"
342 np.apply_along_axis(func, axis, data) # type: ignore[arg-type]
343 return
346def _index_to_interp_indices(index: Index, method: str) -> np.ndarray:
347 """
348 Convert Index to ndarray of indices to pass to NumPy/SciPy.
349 """
350 xarr = index._values
351 if needs_i8_conversion(xarr.dtype):
352 # GH#1646 for dt64tz
353 xarr = xarr.view("i8")
355 if method == "linear":
356 inds = xarr
357 inds = cast(np.ndarray, inds)
358 else:
359 inds = np.asarray(xarr)
361 if method in ("values", "index"):
362 if inds.dtype == np.object_:
363 inds = lib.maybe_convert_objects(inds)
365 return inds
368def _interpolate_1d(
369 indices: np.ndarray,
370 yvalues: np.ndarray,
371 method: str | None = "linear",
372 limit: int | None = None,
373 limit_direction: str = "forward",
374 limit_area: str | None = None,
375 fill_value: Any | None = None,
376 bounds_error: bool = False,
377 order: int | None = None,
378 **kwargs,
379):
380 """
381 Logic for the 1-d interpolation. The input
382 indices and yvalues will each be 1-d arrays of the same length.
384 Bounds_error is currently hardcoded to False since non-scipy ones don't
385 take it as an argument.
387 Notes
388 -----
389 Fills 'yvalues' in-place.
390 """
392 invalid = isna(yvalues)
393 valid = ~invalid
395 if not valid.any():
396 return
398 if valid.all():
399 return
401 # These are sets of index pointers to invalid values... i.e. {0, 1, etc...
402 all_nans = set(np.flatnonzero(invalid))
404 first_valid_index = find_valid_index(yvalues, how="first")
405 if first_valid_index is None: # no nan found in start
406 first_valid_index = 0
407 start_nans = set(range(first_valid_index))
409 last_valid_index = find_valid_index(yvalues, how="last")
410 if last_valid_index is None: # no nan found in end
411 last_valid_index = len(yvalues)
412 end_nans = set(range(1 + last_valid_index, len(valid)))
414 # Like the sets above, preserve_nans contains indices of invalid values,
415 # but in this case, it is the final set of indices that need to be
416 # preserved as NaN after the interpolation.
418 # For example if limit_direction='forward' then preserve_nans will
419 # contain indices of NaNs at the beginning of the series, and NaNs that
420 # are more than'limit' away from the prior non-NaN.
422 # set preserve_nans based on direction using _interp_limit
423 preserve_nans: list | set
424 if limit_direction == "forward":
425 preserve_nans = start_nans | set(_interp_limit(invalid, limit, 0))
426 elif limit_direction == "backward":
427 preserve_nans = end_nans | set(_interp_limit(invalid, 0, limit))
428 else:
429 # both directions... just use _interp_limit
430 preserve_nans = set(_interp_limit(invalid, limit, limit))
432 # if limit_area is set, add either mid or outside indices
433 # to preserve_nans GH #16284
434 if limit_area == "inside":
435 # preserve NaNs on the outside
436 preserve_nans |= start_nans | end_nans
437 elif limit_area == "outside":
438 # preserve NaNs on the inside
439 mid_nans = all_nans - start_nans - end_nans
440 preserve_nans |= mid_nans
442 # sort preserve_nans and convert to list
443 preserve_nans = sorted(preserve_nans)
445 if method in NP_METHODS:
446 # np.interp requires sorted X values, #21037
448 indexer = np.argsort(indices[valid])
449 yvalues[invalid] = np.interp(
450 indices[invalid], indices[valid][indexer], yvalues[valid][indexer]
451 )
452 else:
453 yvalues[invalid] = _interpolate_scipy_wrapper(
454 indices[valid],
455 yvalues[valid],
456 indices[invalid],
457 method=method,
458 fill_value=fill_value,
459 bounds_error=bounds_error,
460 order=order,
461 **kwargs,
462 )
464 yvalues[preserve_nans] = np.nan
465 return
468def _interpolate_scipy_wrapper(
469 x, y, new_x, method, fill_value=None, bounds_error=False, order=None, **kwargs
470):
471 """
472 Passed off to scipy.interpolate.interp1d. method is scipy's kind.
473 Returns an array interpolated at new_x. Add any new methods to
474 the list in _clean_interp_method.
475 """
476 extra = f"{method} interpolation requires SciPy."
477 import_optional_dependency("scipy", extra=extra)
478 from scipy import interpolate
480 new_x = np.asarray(new_x)
482 # ignores some kwargs that could be passed along.
483 alt_methods = {
484 "barycentric": interpolate.barycentric_interpolate,
485 "krogh": interpolate.krogh_interpolate,
486 "from_derivatives": _from_derivatives,
487 "piecewise_polynomial": _from_derivatives,
488 }
490 if getattr(x, "_is_all_dates", False):
491 # GH 5975, scipy.interp1d can't handle datetime64s
492 x, new_x = x._values.astype("i8"), new_x.astype("i8")
494 if method == "pchip":
495 alt_methods["pchip"] = interpolate.pchip_interpolate
496 elif method == "akima":
497 alt_methods["akima"] = _akima_interpolate
498 elif method == "cubicspline":
499 alt_methods["cubicspline"] = _cubicspline_interpolate
501 interp1d_methods = [
502 "nearest",
503 "zero",
504 "slinear",
505 "quadratic",
506 "cubic",
507 "polynomial",
508 ]
509 if method in interp1d_methods:
510 if method == "polynomial":
511 method = order
512 terp = interpolate.interp1d(
513 x, y, kind=method, fill_value=fill_value, bounds_error=bounds_error
514 )
515 new_y = terp(new_x)
516 elif method == "spline":
517 # GH #10633, #24014
518 if isna(order) or (order <= 0):
519 raise ValueError(
520 f"order needs to be specified and greater than 0; got order: {order}"
521 )
522 terp = interpolate.UnivariateSpline(x, y, k=order, **kwargs)
523 new_y = terp(new_x)
524 else:
525 # GH 7295: need to be able to write for some reason
526 # in some circumstances: check all three
527 if not x.flags.writeable:
528 x = x.copy()
529 if not y.flags.writeable:
530 y = y.copy()
531 if not new_x.flags.writeable:
532 new_x = new_x.copy()
533 method = alt_methods[method]
534 new_y = method(x, y, new_x, **kwargs)
535 return new_y
538def _from_derivatives(xi, yi, x, order=None, der=0, extrapolate=False):
539 """
540 Convenience function for interpolate.BPoly.from_derivatives.
542 Construct a piecewise polynomial in the Bernstein basis, compatible
543 with the specified values and derivatives at breakpoints.
545 Parameters
546 ----------
547 xi : array-like
548 sorted 1D array of x-coordinates
549 yi : array-like or list of array-likes
550 yi[i][j] is the j-th derivative known at xi[i]
551 order: None or int or array-like of ints. Default: None.
552 Specifies the degree of local polynomials. If not None, some
553 derivatives are ignored.
554 der : int or list
555 How many derivatives to extract; None for all potentially nonzero
556 derivatives (that is a number equal to the number of points), or a
557 list of derivatives to extract. This number includes the function
558 value as 0th derivative.
559 extrapolate : bool, optional
560 Whether to extrapolate to ouf-of-bounds points based on first and last
561 intervals, or to return NaNs. Default: True.
563 See Also
564 --------
565 scipy.interpolate.BPoly.from_derivatives
567 Returns
568 -------
569 y : scalar or array-like
570 The result, of length R or length M or M by R.
571 """
572 from scipy import interpolate
574 # return the method for compat with scipy version & backwards compat
575 method = interpolate.BPoly.from_derivatives
576 m = method(xi, yi.reshape(-1, 1), orders=order, extrapolate=extrapolate)
578 return m(x)
581def _akima_interpolate(xi, yi, x, der=0, axis=0):
582 """
583 Convenience function for akima interpolation.
584 xi and yi are arrays of values used to approximate some function f,
585 with ``yi = f(xi)``.
587 See `Akima1DInterpolator` for details.
589 Parameters
590 ----------
591 xi : array-like
592 A sorted list of x-coordinates, of length N.
593 yi : array-like
594 A 1-D array of real values. `yi`'s length along the interpolation
595 axis must be equal to the length of `xi`. If N-D array, use axis
596 parameter to select correct axis.
597 x : scalar or array-like
598 Of length M.
599 der : int, optional
600 How many derivatives to extract; None for all potentially
601 nonzero derivatives (that is a number equal to the number
602 of points), or a list of derivatives to extract. This number
603 includes the function value as 0th derivative.
604 axis : int, optional
605 Axis in the yi array corresponding to the x-coordinate values.
607 See Also
608 --------
609 scipy.interpolate.Akima1DInterpolator
611 Returns
612 -------
613 y : scalar or array-like
614 The result, of length R or length M or M by R,
616 """
617 from scipy import interpolate
619 P = interpolate.Akima1DInterpolator(xi, yi, axis=axis)
621 return P(x, nu=der)
624def _cubicspline_interpolate(xi, yi, x, axis=0, bc_type="not-a-knot", extrapolate=None):
625 """
626 Convenience function for cubic spline data interpolator.
628 See `scipy.interpolate.CubicSpline` for details.
630 Parameters
631 ----------
632 xi : array-like, shape (n,)
633 1-d array containing values of the independent variable.
634 Values must be real, finite and in strictly increasing order.
635 yi : array-like
636 Array containing values of the dependent variable. It can have
637 arbitrary number of dimensions, but the length along ``axis``
638 (see below) must match the length of ``x``. Values must be finite.
639 x : scalar or array-like, shape (m,)
640 axis : int, optional
641 Axis along which `y` is assumed to be varying. Meaning that for
642 ``x[i]`` the corresponding values are ``np.take(y, i, axis=axis)``.
643 Default is 0.
644 bc_type : string or 2-tuple, optional
645 Boundary condition type. Two additional equations, given by the
646 boundary conditions, are required to determine all coefficients of
647 polynomials on each segment [2]_.
648 If `bc_type` is a string, then the specified condition will be applied
649 at both ends of a spline. Available conditions are:
650 * 'not-a-knot' (default): The first and second segment at a curve end
651 are the same polynomial. It is a good default when there is no
652 information on boundary conditions.
653 * 'periodic': The interpolated functions is assumed to be periodic
654 of period ``x[-1] - x[0]``. The first and last value of `y` must be
655 identical: ``y[0] == y[-1]``. This boundary condition will result in
656 ``y'[0] == y'[-1]`` and ``y''[0] == y''[-1]``.
657 * 'clamped': The first derivative at curves ends are zero. Assuming
658 a 1D `y`, ``bc_type=((1, 0.0), (1, 0.0))`` is the same condition.
659 * 'natural': The second derivative at curve ends are zero. Assuming
660 a 1D `y`, ``bc_type=((2, 0.0), (2, 0.0))`` is the same condition.
661 If `bc_type` is a 2-tuple, the first and the second value will be
662 applied at the curve start and end respectively. The tuple values can
663 be one of the previously mentioned strings (except 'periodic') or a
664 tuple `(order, deriv_values)` allowing to specify arbitrary
665 derivatives at curve ends:
666 * `order`: the derivative order, 1 or 2.
667 * `deriv_value`: array-like containing derivative values, shape must
668 be the same as `y`, excluding ``axis`` dimension. For example, if
669 `y` is 1D, then `deriv_value` must be a scalar. If `y` is 3D with
670 the shape (n0, n1, n2) and axis=2, then `deriv_value` must be 2D
671 and have the shape (n0, n1).
672 extrapolate : {bool, 'periodic', None}, optional
673 If bool, determines whether to extrapolate to out-of-bounds points
674 based on first and last intervals, or to return NaNs. If 'periodic',
675 periodic extrapolation is used. If None (default), ``extrapolate`` is
676 set to 'periodic' for ``bc_type='periodic'`` and to True otherwise.
678 See Also
679 --------
680 scipy.interpolate.CubicHermiteSpline
682 Returns
683 -------
684 y : scalar or array-like
685 The result, of shape (m,)
687 References
688 ----------
689 .. [1] `Cubic Spline Interpolation
690 <https://en.wikiversity.org/wiki/Cubic_Spline_Interpolation>`_
691 on Wikiversity.
692 .. [2] Carl de Boor, "A Practical Guide to Splines", Springer-Verlag, 1978.
693 """
694 from scipy import interpolate
696 P = interpolate.CubicSpline(
697 xi, yi, axis=axis, bc_type=bc_type, extrapolate=extrapolate
698 )
700 return P(x)
703def _interpolate_with_limit_area(
704 values: np.ndarray, method: str, limit: int | None, limit_area: str | None
705) -> None:
706 """
707 Apply interpolation and limit_area logic to values along a to-be-specified axis.
709 Parameters
710 ----------
711 values: np.ndarray
712 Input array.
713 method: str
714 Interpolation method. Could be "bfill" or "pad"
715 limit: int, optional
716 Index limit on interpolation.
717 limit_area: str
718 Limit area for interpolation. Can be "inside" or "outside"
720 Notes
721 -----
722 Modifies values in-place.
723 """
725 invalid = isna(values)
727 if not invalid.all():
728 first = find_valid_index(values, how="first")
729 if first is None:
730 first = 0
731 last = find_valid_index(values, how="last")
732 if last is None:
733 last = len(values)
735 interpolate_2d(
736 values,
737 method=method,
738 limit=limit,
739 )
741 if limit_area == "inside":
742 invalid[first : last + 1] = False
743 elif limit_area == "outside":
744 invalid[:first] = invalid[last + 1 :] = False
746 values[invalid] = np.nan
748 return
751def interpolate_2d(
752 values: np.ndarray,
753 method: str = "pad",
754 axis: Axis = 0,
755 limit: int | None = None,
756 limit_area: str | None = None,
757) -> None:
758 """
759 Perform an actual interpolation of values, values will be make 2-d if
760 needed fills inplace, returns the result.
762 Parameters
763 ----------
764 values: np.ndarray
765 Input array.
766 method: str, default "pad"
767 Interpolation method. Could be "bfill" or "pad"
768 axis: 0 or 1
769 Interpolation axis
770 limit: int, optional
771 Index limit on interpolation.
772 limit_area: str, optional
773 Limit area for interpolation. Can be "inside" or "outside"
775 Notes
776 -----
777 Modifies values in-place.
778 """
779 if limit_area is not None:
780 np.apply_along_axis(
781 # error: Argument 1 to "apply_along_axis" has incompatible type
782 # "partial[None]"; expected
783 # "Callable[..., Union[_SupportsArray[dtype[<nothing>]],
784 # Sequence[_SupportsArray[dtype[<nothing>]]],
785 # Sequence[Sequence[_SupportsArray[dtype[<nothing>]]]],
786 # Sequence[Sequence[Sequence[_SupportsArray[dtype[<nothing>]]]]],
787 # Sequence[Sequence[Sequence[Sequence[_
788 # SupportsArray[dtype[<nothing>]]]]]]]]"
789 partial( # type: ignore[arg-type]
790 _interpolate_with_limit_area,
791 method=method,
792 limit=limit,
793 limit_area=limit_area,
794 ),
795 # error: Argument 2 to "apply_along_axis" has incompatible type
796 # "Union[str, int]"; expected "SupportsIndex"
797 axis, # type: ignore[arg-type]
798 values,
799 )
800 return
802 transf = (lambda x: x) if axis == 0 else (lambda x: x.T)
804 # reshape a 1 dim if needed
805 if values.ndim == 1:
806 if axis != 0: # pragma: no cover
807 raise AssertionError("cannot interpolate on a ndim == 1 with axis != 0")
808 values = values.reshape(tuple((1,) + values.shape))
810 method = clean_fill_method(method)
811 tvalues = transf(values)
813 # _pad_2d and _backfill_2d both modify tvalues inplace
814 if method == "pad":
815 _pad_2d(tvalues, limit=limit)
816 else:
817 _backfill_2d(tvalues, limit=limit)
819 return
822def _fillna_prep(
823 values, mask: npt.NDArray[np.bool_] | None = None
824) -> npt.NDArray[np.bool_]:
825 # boilerplate for _pad_1d, _backfill_1d, _pad_2d, _backfill_2d
827 if mask is None:
828 mask = isna(values)
830 mask = mask.view(np.uint8)
831 return mask
834def _datetimelike_compat(func: F) -> F:
835 """
836 Wrapper to handle datetime64 and timedelta64 dtypes.
837 """
839 @wraps(func)
840 def new_func(values, limit=None, mask=None):
841 if needs_i8_conversion(values.dtype):
842 if mask is None:
843 # This needs to occur before casting to int64
844 mask = isna(values)
846 result, mask = func(values.view("i8"), limit=limit, mask=mask)
847 return result.view(values.dtype), mask
849 return func(values, limit=limit, mask=mask)
851 return cast(F, new_func)
854@_datetimelike_compat
855def _pad_1d(
856 values: np.ndarray,
857 limit: int | None = None,
858 mask: npt.NDArray[np.bool_] | None = None,
859) -> tuple[np.ndarray, npt.NDArray[np.bool_]]:
860 mask = _fillna_prep(values, mask)
861 algos.pad_inplace(values, mask, limit=limit)
862 return values, mask
865@_datetimelike_compat
866def _backfill_1d(
867 values: np.ndarray,
868 limit: int | None = None,
869 mask: npt.NDArray[np.bool_] | None = None,
870) -> tuple[np.ndarray, npt.NDArray[np.bool_]]:
871 mask = _fillna_prep(values, mask)
872 algos.backfill_inplace(values, mask, limit=limit)
873 return values, mask
876@_datetimelike_compat
877def _pad_2d(values: np.ndarray, limit=None, mask: npt.NDArray[np.bool_] | None = None):
878 mask = _fillna_prep(values, mask)
880 if np.all(values.shape):
881 algos.pad_2d_inplace(values, mask, limit=limit)
882 else:
883 # for test coverage
884 pass
885 return values, mask
888@_datetimelike_compat
889def _backfill_2d(values, limit=None, mask: npt.NDArray[np.bool_] | None = None):
890 mask = _fillna_prep(values, mask)
892 if np.all(values.shape):
893 algos.backfill_2d_inplace(values, mask, limit=limit)
894 else:
895 # for test coverage
896 pass
897 return values, mask
900_fill_methods = {"pad": _pad_1d, "backfill": _backfill_1d}
903def get_fill_func(method, ndim: int = 1):
904 method = clean_fill_method(method)
905 if ndim == 1:
906 return _fill_methods[method]
907 return {"pad": _pad_2d, "backfill": _backfill_2d}[method]
910def clean_reindex_fill_method(method) -> str | None:
911 return clean_fill_method(method, allow_nearest=True)
914def _interp_limit(invalid: npt.NDArray[np.bool_], fw_limit, bw_limit):
915 """
916 Get indexers of values that won't be filled
917 because they exceed the limits.
919 Parameters
920 ----------
921 invalid : np.ndarray[bool]
922 fw_limit : int or None
923 forward limit to index
924 bw_limit : int or None
925 backward limit to index
927 Returns
928 -------
929 set of indexers
931 Notes
932 -----
933 This is equivalent to the more readable, but slower
935 .. code-block:: python
937 def _interp_limit(invalid, fw_limit, bw_limit):
938 for x in np.where(invalid)[0]:
939 if invalid[max(0, x - fw_limit):x + bw_limit + 1].all():
940 yield x
941 """
942 # handle forward first; the backward direction is the same except
943 # 1. operate on the reversed array
944 # 2. subtract the returned indices from N - 1
945 N = len(invalid)
946 f_idx = set()
947 b_idx = set()
949 def inner(invalid, limit):
950 limit = min(limit, N)
951 windowed = _rolling_window(invalid, limit + 1).all(1)
952 idx = set(np.where(windowed)[0] + limit) | set(
953 np.where((~invalid[: limit + 1]).cumsum() == 0)[0]
954 )
955 return idx
957 if fw_limit is not None:
959 if fw_limit == 0:
960 f_idx = set(np.where(invalid)[0])
961 else:
962 f_idx = inner(invalid, fw_limit)
964 if bw_limit is not None:
966 if bw_limit == 0:
967 # then we don't even need to care about backwards
968 # just use forwards
969 return f_idx
970 else:
971 b_idx_inv = list(inner(invalid[::-1], bw_limit))
972 b_idx = set(N - 1 - np.asarray(b_idx_inv))
973 if fw_limit == 0:
974 return b_idx
976 return f_idx & b_idx
979def _rolling_window(a: npt.NDArray[np.bool_], window: int) -> npt.NDArray[np.bool_]:
980 """
981 [True, True, False, True, False], 2 ->
983 [
984 [True, True],
985 [True, False],
986 [False, True],
987 [True, False],
988 ]
989 """
990 # https://stackoverflow.com/a/6811241
991 shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
992 strides = a.strides + (a.strides[-1],)
993 return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)