Source code for marine_qc.validations

"""Module containing base QC which call multiple QC functions and could be applied on a DataBundle."""

from __future__ import annotations
import collections.abc as abc
import inspect
from collections.abc import Callable, Iterable, Mapping, Sequence
from types import UnionType
from typing import (
    Annotated,
    Any,
    Literal,
    Union,
    get_args,
    get_origin,
    get_type_hints,
)

import numpy as np
import numpy.typing as npt
import pandas as pd

from .auxiliary import DECORATOR_HANDLERS, DECORATOR_KWARGS
from .external_clim import get_climatological_value  # noqa: F401
from .qc_grouped_reports import (  # noqa: F401
    do_bayesian_buddy_check,
    do_mds_buddy_check,
)
from .qc_individual_reports import (  # noqa: F401
    do_climatology_check,
    do_date_check,
    do_day_check,
    do_hard_limit_check,
    do_missing_value_check,
    do_missing_value_clim_check,
    do_night_check,
    do_position_check,
    do_sst_freeze_check,
    do_supersaturation_check,
    do_time_check,
    do_wind_consistency_check,
)
from .qc_sequential_reports import (  # noqa: F401
    do_few_check,
    do_iquam_track_check,
    do_spike_check,
    do_track_check,
    find_multiple_rounded_values,
    find_repeated_values,
    find_saturated_runs,
)


def _validate_non_generic(value: Any, expected: Any) -> bool:
    """
    Validate a non-generic type (str, int, float, etc.).

    Parameters
    ----------
    value : Any
        The value to validate.
    expected : Any
        The expected type.

    Returns
    -------
    bool
        True if `value` matches `expected`, False otherwise.
    """
    if isinstance(expected, type):
        return isinstance(value, expected)
    return False


def _validate_mapping(value: Mapping[Any, Any], origin: type, args: tuple[Any, ...]) -> bool:
    """
    Validate a mapping type (dict, Mapping).

    Parameters
    ----------
    value : Mapping[Any, Any]
        The value to validate.
    origin : type
        The mapping type (e.g., dict).
    args : tuple[Any, ...]
        Expected key and value types.

    Returns
    -------
    bool
        True if `value` matches the mapping type and key/value types, False otherwise.
    """
    if not isinstance(value, origin):
        return False
    if not args:
        return True
    key_type, val_type = args
    return all(validate_type(k, key_type) and validate_type(v, val_type) for k, v in value.items())


def _validate_iterable(value: Iterable[Any], origin: type, args: tuple[Any, ...]) -> bool:
    """
    Validate an iterable type (list, set, frozenset).

    Parameters
    ----------
    value : Any
        The value to validate.
    origin : type
        The iterable type.
    args : tuple[Any, ...]
        Expected element types.

    Returns
    -------
    bool
        True if all elements match the expected type, False otherwise.
    """
    if not isinstance(value, origin):
        return False
    if not args:
        return True
    elem_type = args[0]
    return all(validate_type(v, elem_type) for v in value)


def _validate_sequence(value: Any, args: tuple[Any, ...]) -> bool:
    """
    Validate a generic sequence type (e.g., Sequence[int]).

    Parameters
    ----------
    value : Any
        The value to validate.
    args : tuple[Any, ...]
        Expected element types.

    Returns
    -------
    bool
        True if all elements match the expected type, False otherwise.
    """
    if not isinstance(value, abc.Sequence) or isinstance(value, (str, bytes)):
        return False
    if not args:
        return True
    elem_type = args[0]
    return all(validate_type(v, elem_type) for v in value)


def _validate_tuple(value: Any, args: tuple[Any, ...]) -> bool:
    """
    Validate a tuple type (fixed-length or homogeneous).

    Parameters
    ----------
    value : Any
        The value to validate.
    args : tuple[Any, ...]
        Expected element types.

    Returns
    -------
    bool
        True if the tuple matches the expected types and length, False otherwise.
    """
    if not isinstance(value, abc.Sequence) or isinstance(value, (str, bytes)):
        return False
    if not args:
        return True
    if len(args) == 2 and args[1] is Ellipsis:
        return all(validate_type(v, args[0]) for v in value)
    if len(args) != len(value):
        return False
    return all(validate_type(v, t) for v, t in zip(value, args, strict=False))


def _validate_ndarray(value: Any, args: tuple[Any, ...]) -> bool:
    """
    Validate a numpy ndarray type, optionally checking dtype.

    Parameters
    ----------
    value : Any
        The value to validate.
    args : tuple[Any, ...]
        Expected dtype (first argument may be `Any` or unspecified).

    Returns
    -------
    bool
        True if `value` is an ndarray and matches expected dtype, False otherwise.
    """
    if not isinstance(value, np.ndarray):
        return False

    if not args:
        return True

    if len(args) < 2:
        return True

    expected_dtype = args[1]

    inner = get_args(expected_dtype)
    if inner:
        expected_dtype = inner[0]

    if expected_dtype in (Any, None):
        return True

    try:
        return np.issubdtype(value.dtype, expected_dtype)
    except TypeError:
        return False


def _safe_isinstance(value: Any, origin: Any) -> bool:
    """
    Safely check if value is an instance of a type, avoiding TypeError for weird generics.

    Parameters
    ----------
    value : Any
        Value to check.
    origin : Any
        Type or generic to check against.

    Returns
    -------
    bool
        True if `value` is an instance of `origin`, False otherwise.
    """
    try:
        return isinstance(value, origin)
    except TypeError:
        return False


[docs] def validate_type(value: Any, expected: Any) -> bool: """ Recursively validate that a value matches the expected type hint. Parameters ---------- value : Any The value to validate. expected : Any The expected value type for validation. Returns ------- bool - True if type of `value` does match `expected`. - False if type of `value` does not match `expected`. """ if expected is Any: return True origin = get_origin(expected) args = get_args(expected) if origin is Annotated: return validate_type(value, args[0]) if origin is Literal: return value in args if origin in (Union, UnionType): return any(validate_type(value, t) for t in args) if origin is abc.Callable: return callable(value) if origin is tuple: return _validate_tuple(value, args) if origin in (np.ndarray, npt.NDArray): return _validate_ndarray(value, args) if isinstance(expected, type) and issubclass(expected, (pd.DataFrame, pd.Series)): return isinstance(value, expected) if isinstance(origin, type): if issubclass(origin, abc.Mapping): return _validate_mapping(value, origin, args) if issubclass(origin, (list, set, frozenset)): return _validate_iterable(value, origin, args) if issubclass(origin, abc.Sequence): return _validate_sequence(value, args) if origin is None: return _validate_non_generic(value, expected) return _safe_isinstance(value, origin)
[docs] def validate_arg( key: str, value: Any, func_name: str, parameters: Mapping[str, inspect.Parameter], type_hints: Mapping[str, Any], reserved_keys: set[str], has_arguments: bool, ) -> None: """ Validate argument against a function's signature, taking decorators into account. Parameters ---------- key : str The name of the argument to validate. value : Any The value of the argument to validate. func_name : str The name of the function (used in error message). parameters : Mapping[str, inspect.Parameter] A mapping of parameter names to `inspect.Parameter` objects, typically from `inspect.signature(func).parameters`. type_hints : Mapping[str, type] A mapping of parameter names to expected types, typically from `typing.get_type_hints(func)`. reserved_keys : set[str] Argument names that are considered reserved and should nor raise errors. has_arguments : bool Whether the function accepts arbitrary arguments. """ if has_arguments or key in reserved_keys: return if key not in parameters: raise ValueError(f"Parameter '{key}' is not a valid parameter of function '{func_name}'.") expected = type_hints.get(key) if not expected or expected is inspect._empty: return if not validate_type(value, expected): raise TypeError(f"Parameter '{key}' does not match expected type {expected!r}. Got value {value!r} of type {type(value).__name__}.")
[docs] def validate_args( func: Callable[..., Any], args: Sequence[Any] | None = None, kwargs: Mapping[str, Any] | None = None, ) -> None: """ Validate positional and keyword arguments against a function's signature, taking decorators into account. This function checks that: - All provided keyword arguments correspond to valid parameters of the given function. - All required parameters of the function (i.e., parameters without default values) are present in the provided keyword arguments. Parameters ---------- func : Callable[..., Any] The function whose signature is used for validation. args : Sequence[Any], optional Sequence of arguments intended to be passed to `func`. kwargs : Mapping[str, Any], optional Dictionary of keyword arguments intended to be passed to `func`. Raises ------ ValueError If `kwargs` contains a key that is not a parameter of `func`. TypeError If a required parameter of `func` is missing from `kwargs`. """ def all_handlers(func: Callable[..., Any]) -> list[Callable[..., Any]]: """ Collect all decorator handlers applied to a function. Parameters ---------- func : Callable[..., Any] The function to inspect for applied decorator handlers. Returns ------- List[Callable[..., Any]] A list of all decorator handlers associated with the function, including handlers from wrapped functions. """ handlers: list[Callable[..., Any]] = [] current: Callable[..., Any] = func while True: handlers.extend(DECORATOR_HANDLERS.get(current, [])) if hasattr(current, "__wrapped__"): current = current.__wrapped__ else: break return handlers args = args or () if not isinstance(args, (list, tuple)): args = (args,) kwargs = kwargs or {} reserved_keys: set[str] = set() for handler in all_handlers(func): reserved_keys.update(DECORATOR_KWARGS.get(handler, set())) sig = inspect.signature(func) params = list(sig.parameters.values()) positional_params = [p for p in params if p.kind in (inspect.Parameter.POSITIONAL_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD)] has_args = any(p.kind == inspect.Parameter.VAR_POSITIONAL for p in params) if len(args) > len(positional_params) and not has_args: raise TypeError(f"Too many positional arguments for function '{func.__name__}'.") bound_args = [positional_params[i].name for i in range(min(len(args), len(positional_params)))] has_kwargs = any(p.kind == inspect.Parameter.VAR_KEYWORD for p in params) type_hints = get_type_hints(func) for i, arg in enumerate(args): validate_arg(bound_args[i], arg, func.__name__, sig.parameters, type_hints, reserved_keys, has_args) for key, value in kwargs.items(): validate_arg(key, value, func.__name__, sig.parameters, type_hints, reserved_keys, has_kwargs) for param in params: if ( param.default is inspect.Parameter.empty and param.kind in ( inspect.Parameter.POSITIONAL_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD, inspect.Parameter.KEYWORD_ONLY, ) and param.name not in kwargs and param.name not in bound_args ): raise TypeError(f"Required parameter '{param.name}' is missing for function '{func.__name__}'.")
[docs] def validate_dict(input_dict: Mapping[str, Mapping[str, Any]]) -> None: """ Validate that the input is a dictionary with string keys and dictionary values. This function checks that: - `input_dict` is a dictionary. - All keys in the dictionary are strings. - All top-level values in the dictionary are themselves dictionaries. Parameters ---------- input_dict : Mapping[str, Mapping[str, Any]] The object to validate. Raises ------ TypeError If `input_dict` is not a dictionary, if any key is not a string, or if any value is not a dictionary. """ if not isinstance(input_dict, Mapping): raise TypeError(f"input must be a dictionary, not {type(input_dict)}.") for k, v in input_dict.items(): if not isinstance(k, str): raise TypeError(f"input key {k} must be a string, not {type(k).__name__}.") if not isinstance(v, Mapping): raise TypeError(f"value for key {k} must be a dictionary, not {type(v).__name__}.")
[docs] def is_in_data(name: str, data: pd.Series | pd.DataFrame) -> bool: """ Return True if named column or variable, name, is in data. Parameters ---------- name : str Name of variable. data : pd.Series or pd.DataFrame Pandas Series or DataFrame to be tested. Returns ------- bool Returns True if name is one of the columns or variables in data, False otherwise. Raises ------ TypeError If data type is not pd.Series or pd.DataFrame. """ if isinstance(data, pd.Series): return bool(data.name == name) if isinstance(data, pd.DataFrame): return bool(name in data.columns) raise TypeError(f"Unsupported data type: {type(data)}")
[docs] def is_func_param(func: Callable[..., Any], param: str) -> bool: """ Return True if param is the name of a parameter of function func. Parameters ---------- func : Callable Function whose parameters are to be inspected. param : str Name of the parameter. Returns ------- bool Returns True if param is one of the functions parameters or the function uses ``**kwargs``. """ sig = inspect.signature(func) if "kwargs" in sig.parameters: return True return param in sig.parameters