""" For compatibility with numpy libraries, pandas functions or methods have to accept '*args' and '**kwargs' parameters to accommodate numpy arguments that are not actually used or respected in the pandas implementation. To ensure that users do not abuse these parameters, validation is performed in 'validators.py' to make sure that any extra parameters passed correspond ONLY to those in the numpy signature. Part of that validation includes whether or not the user attempted to pass in non-default values for these extraneous parameters. As we want to discourage users from relying on these parameters when calling the pandas implementation, we want them only to pass in the default values for these parameters. This module provides a set of commonly used default arguments for functions and methods that are spread throughout the codebase. This module will make it easier to adjust to future upstream changes in the analogous numpy signatures. """ from __future__ import annotations from typing import ( TYPE_CHECKING, Any, TypeVar, cast, overload, ) import numpy as np from numpy import ndarray from pandas._libs.lib import ( is_bool, is_integer, ) from pandas.errors import UnsupportedFunctionCall from pandas.util._validators import ( validate_args, validate_args_and_kwargs, validate_kwargs, ) if TYPE_CHECKING: from pandas._typing import ( Axis, AxisInt, ) AxisNoneT = TypeVar("AxisNoneT", Axis, None) class CompatValidator: def __init__( self, defaults, fname=None, method: str | None = None, max_fname_arg_count=None, ) -> None: self.fname = fname self.method = method self.defaults = defaults self.max_fname_arg_count = max_fname_arg_count def __call__( self, args, kwargs, fname=None, max_fname_arg_count=None, method: str | None = None, ) -> None: if not args and not kwargs: return None fname = self.fname if fname is None else fname max_fname_arg_count = ( self.max_fname_arg_count if max_fname_arg_count is None else max_fname_arg_count ) method = self.method if method is None else method if method == "args": validate_args(fname, args, max_fname_arg_count, self.defaults) elif method == "kwargs": validate_kwargs(fname, kwargs, self.defaults) elif method == "both": validate_args_and_kwargs( fname, args, kwargs, max_fname_arg_count, self.defaults ) else: raise ValueError(f"invalid validation method '{method}'") ARGMINMAX_DEFAULTS = {"out": None} validate_argmin = CompatValidator( ARGMINMAX_DEFAULTS, fname="argmin", method="both", max_fname_arg_count=1 ) validate_argmax = CompatValidator( ARGMINMAX_DEFAULTS, fname="argmax", method="both", max_fname_arg_count=1 ) def process_skipna(skipna: bool | ndarray | None, args) -> tuple[bool, Any]: if isinstance(skipna, ndarray) or skipna is None: args = (skipna,) + args skipna = True return skipna, args def validate_argmin_with_skipna(skipna: bool | ndarray | None, args, kwargs) -> bool: """ If 'Series.argmin' is called via the 'numpy' library, the third parameter in its signature is 'out', which takes either an ndarray or 'None', so check if the 'skipna' parameter is either an instance of ndarray or is None, since 'skipna' itself should be a boolean """ skipna, args = process_skipna(skipna, args) validate_argmin(args, kwargs) return skipna def validate_argmax_with_skipna(skipna: bool | ndarray | None, args, kwargs) -> bool: """ If 'Series.argmax' is called via the 'numpy' library, the third parameter in its signature is 'out', which takes either an ndarray or 'None', so check if the 'skipna' parameter is either an instance of ndarray or is None, since 'skipna' itself should be a boolean """ skipna, args = process_skipna(skipna, args) validate_argmax(args, kwargs) return skipna ARGSORT_DEFAULTS: dict[str, int | str | None] = {} ARGSORT_DEFAULTS["axis"] = -1 ARGSORT_DEFAULTS["kind"] = "quicksort" ARGSORT_DEFAULTS["order"] = None ARGSORT_DEFAULTS["kind"] = None ARGSORT_DEFAULTS["stable"] = None validate_argsort = CompatValidator( ARGSORT_DEFAULTS, fname="argsort", max_fname_arg_count=0, method="both" ) # two different signatures of argsort, this second validation for when the # `kind` param is supported ARGSORT_DEFAULTS_KIND: dict[str, int | None] = {} ARGSORT_DEFAULTS_KIND["axis"] = -1 ARGSORT_DEFAULTS_KIND["order"] = None ARGSORT_DEFAULTS_KIND["stable"] = None validate_argsort_kind = CompatValidator( ARGSORT_DEFAULTS_KIND, fname="argsort", max_fname_arg_count=0, method="both" ) def validate_argsort_with_ascending(ascending: bool | int | None, args, kwargs) -> bool: """ If 'Categorical.argsort' is called via the 'numpy' library, the first parameter in its signature is 'axis', which takes either an integer or 'None', so check if the 'ascending' parameter has either integer type or is None, since 'ascending' itself should be a boolean """ if is_integer(ascending) or ascending is None: args = (ascending,) + args ascending = True validate_argsort_kind(args, kwargs, max_fname_arg_count=3) ascending = cast(bool, ascending) return ascending CLIP_DEFAULTS: dict[str, Any] = {"out": None} validate_clip = CompatValidator( CLIP_DEFAULTS, fname="clip", method="both", max_fname_arg_count=3 ) @overload def validate_clip_with_axis(axis: ndarray, args, kwargs) -> None: ... @overload def validate_clip_with_axis(axis: AxisNoneT, args, kwargs) -> AxisNoneT: ... def validate_clip_with_axis( axis: ndarray | AxisNoneT, args, kwargs ) -> AxisNoneT | None: """ If 'NDFrame.clip' is called via the numpy library, the third parameter in its signature is 'out', which can takes an ndarray, so check if the 'axis' parameter is an instance of ndarray, since 'axis' itself should either be an integer or None """ if isinstance(axis, ndarray): args = (axis,) + args # error: Incompatible types in assignment (expression has type "None", # variable has type "Union[ndarray[Any, Any], str, int]") axis = None # type: ignore[assignment] validate_clip(args, kwargs) # error: Incompatible return value type (got "Union[ndarray[Any, Any], # str, int]", expected "Union[str, int, None]") return axis # type: ignore[return-value] CUM_FUNC_DEFAULTS: dict[str, Any] = {} CUM_FUNC_DEFAULTS["dtype"] = None CUM_FUNC_DEFAULTS["out"] = None validate_cum_func = CompatValidator( CUM_FUNC_DEFAULTS, method="both", max_fname_arg_count=1 ) validate_cumsum = CompatValidator( CUM_FUNC_DEFAULTS, fname="cumsum", method="both", max_fname_arg_count=1 ) def validate_cum_func_with_skipna(skipna: bool, args, kwargs, name) -> bool: """ If this function is called via the 'numpy' library, the third parameter in its signature is 'dtype', which takes either a 'numpy' dtype or 'None', so check if the 'skipna' parameter is a boolean or not """ if not is_bool(skipna): args = (skipna,) + args skipna = True elif isinstance(skipna, np.bool_): skipna = bool(skipna) validate_cum_func(args, kwargs, fname=name) return skipna ALLANY_DEFAULTS: dict[str, bool | None] = {} ALLANY_DEFAULTS["dtype"] = None ALLANY_DEFAULTS["out"] = None ALLANY_DEFAULTS["keepdims"] = False ALLANY_DEFAULTS["axis"] = None validate_all = CompatValidator( ALLANY_DEFAULTS, fname="all", method="both", max_fname_arg_count=1 ) validate_any = CompatValidator( ALLANY_DEFAULTS, fname="any", method="both", max_fname_arg_count=1 ) LOGICAL_FUNC_DEFAULTS = {"out": None, "keepdims": False} validate_logical_func = CompatValidator(LOGICAL_FUNC_DEFAULTS, method="kwargs") MINMAX_DEFAULTS = {"axis": None, "dtype": None, "out": None, "keepdims": False} validate_min = CompatValidator( MINMAX_DEFAULTS, fname="min", method="both", max_fname_arg_count=1 ) validate_max = CompatValidator( MINMAX_DEFAULTS, fname="max", method="both", max_fname_arg_count=1 ) RESHAPE_DEFAULTS: dict[str, str] = {"order": "C"} validate_reshape = CompatValidator( RESHAPE_DEFAULTS, fname="reshape", method="both", max_fname_arg_count=1 ) REPEAT_DEFAULTS: dict[str, Any] = {"axis": None} validate_repeat = CompatValidator( REPEAT_DEFAULTS, fname="repeat", method="both", max_fname_arg_count=1 ) ROUND_DEFAULTS: dict[str, Any] = {"out": None} validate_round = CompatValidator( ROUND_DEFAULTS, fname="round", method="both", max_fname_arg_count=1 ) SORT_DEFAULTS: dict[str, int | str | None] = {} SORT_DEFAULTS["axis"] = -1 SORT_DEFAULTS["kind"] = "quicksort" SORT_DEFAULTS["order"] = None validate_sort = CompatValidator(SORT_DEFAULTS, fname="sort", method="kwargs") STAT_FUNC_DEFAULTS: dict[str, Any | None] = {} STAT_FUNC_DEFAULTS["dtype"] = None STAT_FUNC_DEFAULTS["out"] = None SUM_DEFAULTS = STAT_FUNC_DEFAULTS.copy() SUM_DEFAULTS["axis"] = None SUM_DEFAULTS["keepdims"] = False SUM_DEFAULTS["initial"] = None PROD_DEFAULTS = SUM_DEFAULTS.copy() MEAN_DEFAULTS = SUM_DEFAULTS.copy() MEDIAN_DEFAULTS = STAT_FUNC_DEFAULTS.copy() MEDIAN_DEFAULTS["overwrite_input"] = False MEDIAN_DEFAULTS["keepdims"] = False STAT_FUNC_DEFAULTS["keepdims"] = False validate_stat_func = CompatValidator(STAT_FUNC_DEFAULTS, method="kwargs") validate_sum = CompatValidator( SUM_DEFAULTS, fname="sum", method="both", max_fname_arg_count=1 ) validate_prod = CompatValidator( PROD_DEFAULTS, fname="prod", method="both", max_fname_arg_count=1 ) validate_mean = CompatValidator( MEAN_DEFAULTS, fname="mean", method="both", max_fname_arg_count=1 ) validate_median = CompatValidator( MEDIAN_DEFAULTS, fname="median", method="both", max_fname_arg_count=1 ) STAT_DDOF_FUNC_DEFAULTS: dict[str, bool | None] = {} STAT_DDOF_FUNC_DEFAULTS["dtype"] = None STAT_DDOF_FUNC_DEFAULTS["out"] = None STAT_DDOF_FUNC_DEFAULTS["keepdims"] = False validate_stat_ddof_func = CompatValidator(STAT_DDOF_FUNC_DEFAULTS, method="kwargs") TAKE_DEFAULTS: dict[str, str | None] = {} TAKE_DEFAULTS["out"] = None TAKE_DEFAULTS["mode"] = "raise" validate_take = CompatValidator(TAKE_DEFAULTS, fname="take", method="kwargs") def validate_take_with_convert(convert: ndarray | bool | None, args, kwargs) -> bool: """ If this function is called via the 'numpy' library, the third parameter in its signature is 'axis', which takes either an ndarray or 'None', so check if the 'convert' parameter is either an instance of ndarray or is None """ if isinstance(convert, ndarray) or convert is None: args = (convert,) + args convert = True validate_take(args, kwargs, max_fname_arg_count=3, method="both") return convert TRANSPOSE_DEFAULTS = {"axes": None} validate_transpose = CompatValidator( TRANSPOSE_DEFAULTS, fname="transpose", method="both", max_fname_arg_count=0 ) def validate_groupby_func(name: str, args, kwargs, allowed=None) -> None: """ 'args' and 'kwargs' should be empty, except for allowed kwargs because all of their necessary parameters are explicitly listed in the function signature """ if allowed is None: allowed = [] kwargs = set(kwargs) - set(allowed) if len(args) + len(kwargs) > 0: raise UnsupportedFunctionCall( "numpy operations are not valid with groupby. " f"Use .groupby(...).{name}() instead" ) RESAMPLER_NUMPY_OPS = ("min", "max", "sum", "prod", "mean", "std", "var") def validate_resampler_func(method: str, args, kwargs) -> None: """ 'args' and 'kwargs' should be empty because all of their necessary parameters are explicitly listed in the function signature """ if len(args) + len(kwargs) > 0: if method in RESAMPLER_NUMPY_OPS: raise UnsupportedFunctionCall( "numpy operations are not valid with resample. " f"Use .resample(...).{method}() instead" ) raise TypeError("too many arguments passed in") def validate_minmax_axis(axis: AxisInt | None, ndim: int = 1) -> None: """ Ensure that the axis argument passed to min, max, argmin, or argmax is zero or None, as otherwise it will be incorrectly ignored. Parameters ---------- axis : int or None ndim : int, default 1 Raises ------ ValueError """ if axis is None: return if axis >= ndim or (axis < 0 and ndim + axis < 0): raise ValueError(f"`axis` must be fewer than the number of dimensions ({ndim})") _validation_funcs = { "median": validate_median, "mean": validate_mean, "min": validate_min, "max": validate_max, "sum": validate_sum, "prod": validate_prod, } def validate_func(fname, args, kwargs) -> None: if fname not in _validation_funcs: return validate_stat_func(args, kwargs, fname=fname) validation_func = _validation_funcs[fname] return validation_func(args, kwargs)