Source code for marine_qc.qc_grouped_reports

"""
QC of grouped reports.

Module containing QC functions for quality control of grouped marine reports.
"""

from __future__ import annotations
import itertools
import math

import numpy as np
import pandas as pd

from .auxiliary import (
    SequenceDatetimeType,
    SequenceIntType,
    SequenceNumberType,
    convert_units,
    ensure_arrays,
    failed,
    inspect_arrays,
    isvalid,
    passed,
    post_format_return_type,
    untestable,
    untested,
)
from .buoy_tracking_qc import is_monotonic
from .external_clim import (
    ClimArgType,
    Climatology,
    inspect_climatology,
)
from .location_control import (
    mds_lat_to_yindex,
    mds_lat_to_yindex_fast,
    mds_lon_to_xindex,
    mds_lon_to_xindex_fast,
)
from .statistics import p_gross
from .time_control import (
    convert_date,
    pentad_to_month_day,
    which_pentad,
)



[docs]
def get_threshold_multiplier(total_nobs: int, nob_limits: list[int], multiplier_values: list[float]) -> float:
    """
    Find the highest value of i such that total_nobs is greater than nob_limits[i] and return multiplier_values[i].

    This routine is used by the buddy check. It's a bit niche.

    Parameters
    ----------
    total_nobs : int
        Total number of neighbour observations.
    nob_limits : list[int]
        List containing the limiting numbers of observations in ascending order first element must be zero.
    multiplier_values : list[float]
        List containing the multiplier values associated..

    Returns
    -------
    float
        The multiplier value.
    """
    if len(nob_limits) != len(multiplier_values):
        raise ValueError(f"Length mismatch: nob_limits has {len(nob_limits)}, but multiplier_values has{len(multiplier_values)}.")

    if min(multiplier_values) <= 0:
        raise ValueError("Invalid minimum multiplier_value: {min(multiplier_values)}. Must be greater than zero.")
    if nob_limits[0] != 0:
        raise ValueError(f"Invalid first nob_limits: {nob_limits[0]}. Must be zero.")
    if min(nob_limits) != 0:
        raise ValueError(f"Invalid minimum nob_limit: {min(nob_limits)}. Must be zero.")
    if not is_monotonic(nob_limits):
        raise ValueError("Invalid nob_limits: {nob_limits}. Must be in ascending order.")

    multiplier = -1.0
    if total_nobs == 0:
        multiplier = 4.0

    for i, limit in enumerate(nob_limits):
        if total_nobs > limit:
            multiplier = multiplier_values[i]

    if multiplier <= 0:
        raise ValueError(f"Invalid multiplier: {multiplier}. Must be positive.")

    return multiplier




[docs]
class SuperObsGrid:
    """Class for gridding data in buddy check, based on numpy arrays."""

    def __init__(self) -> None:
        """Initialise empty grid."""
        self.grid = np.zeros((360, 180, 73))  # type: np.ndarray
        self.buddy_mean = np.zeros((360, 180, 73))  # type: np.ndarray
        self.buddy_stdev = np.zeros((360, 180, 73))  # type: np.ndarray
        self.nobs = np.zeros((360, 180, 73))  # type: np.ndarray


[docs]
    @convert_date(["month", "day"])
    @inspect_arrays(["lat", "lon", "value", "month", "day"])
    def add_multiple_observations(
        self,
        lat: SequenceNumberType,
        lon: SequenceNumberType,
        value: SequenceNumberType,
        date: SequenceDatetimeType | None = None,
        month: SequenceIntType | None = None,
        day: SequenceIntType | None = None,
    ) -> None:
        """
        Add a series of observations to the grid and take the grid average.

        Parameters
        ----------
        lat : :py:obj:`~marine_qc.SequenceNumberType`
            1-dimensional latitude array.
        lon : :py:obj:`~marine_qc.SequenceNumberType`
            1-dimensional longitude array.
        value : :py:obj:`~marine_qc.SequenceNumberType`
            1-dimensional anomaly array.
        date : :py:obj:`~marine_qc.SequenceDatetimeType`, optional
            1-dimensional datetime array.
        month : :py:obj:`~marine_qc.SequenceIntType`, optional
            1-dimensional month array.
            Used if date is not provided.
        day : :py:obj:`~marine_qc.SequenceIntType`, optional
            1-dimensional day array.
            Used if date is not provided.

        Raises
        ------
        TypeError
            If `inspect_arrays` does not return np.ndarrays.

        Notes
        -----
        The observations should be anomalies.
        """
        lat_arr, lon_arr, value_arr, month_arr, day_arr = ensure_arrays(lat=lat, lon=lon, value=value, month=month, day=day)

        month_arr = np.where(np.isnan(month_arr), -1, month_arr).astype(int)
        day_arr = np.where(np.isnan(day_arr), -1, day_arr).astype(int)

        valid: np.ndarray = (
            np.atleast_1d(isvalid(lat_arr))
            & np.atleast_1d(isvalid(lon_arr))
            & np.atleast_1d(isvalid(month_arr))
            & np.atleast_1d(isvalid(day_arr))
            & np.atleast_1d(isvalid(value_arr))
        )
        valid &= (month_arr >= 1) & (month_arr <= 12)

        if not np.any(valid):
            return

        y_index: np.ndarray = mds_lat_to_yindex_fast(lat_arr[valid], res=1)
        x_index: np.ndarray = mds_lon_to_xindex_fast(lon_arr[valid], res=1)
        t_index: np.ndarray = Climatology.get_t_index(month_arr[valid], day_arr[valid], 73)

        unique_index: np.ndarray = t_index * 1000000 + y_index * 1000 + x_index

        df: pd.DataFrame = pd.DataFrame(
            {
                "uid": unique_index,
                "value": value_arr[valid],
                "x": x_index,
                "y": y_index,
                "t": t_index,
            }
        )

        grouped = df.groupby("uid")
        means = grouped["value"].mean().values
        nobs = grouped["value"].count().values
        x: np.ndarray = df.groupby("uid")["x"].first().values
        y: np.ndarray = df.groupby("uid")["y"].first().values
        t: np.ndarray = df.groupby("uid")["t"].first().values

        self.grid[x, y, t] = means
        self.nobs[x, y, t] = nobs



[docs]
    def add_single_observation(self, lat: float, lon: float, month: int, day: int, anom: float) -> None:
        """
        Add an anomaly to the grid from specified lat lon and date.

        Parameters
        ----------
        lat : float
            Latitude of the observation in degrees.
        lon : float
            Longitude of the observation in degrees.
        month : int
            Month of the observation.
        day : int
            Day of the observation.
        anom : float
            Value to be added to the grid.

        Returns
        -------
        None
            The function performs its operations in-place and does not return anything.
        """
        xindex = mds_lon_to_xindex(lon, res=1.0)
        yindex = mds_lat_to_yindex(lat, res=1.0)
        pindex = which_pentad(month, day) - 1

        if not (0 <= xindex < 360):
            raise ValueError(f"Invalid xindex: {xindex}. Must be between 0 and 360.")
        if not (0 <= yindex < 180):
            raise ValueError(f"Invalid yindex: {yindex}. Must be between 0 and 180.")
        if not (0 <= pindex < 73):
            raise ValueError(f"Invalid pindex: {pindex}. Must be between 0 and 72.")

        if anom is not None:
            self.grid[xindex, yindex, pindex] += anom
            self.nobs[xindex, yindex, pindex] += 1



[docs]
    def take_average(self) -> None:
        """Take the average of a grid to which reps have been added using add_rep."""
        nonmiss = np.nonzero(self.nobs)
        self.grid[nonmiss] = self.grid[nonmiss] / self.nobs[nonmiss]



[docs]
    def get_neighbour_anomalies(
        self,
        search_radius: list[int],
        xindex: int,
        yindex: int,
        pindex: int,
    ) -> tuple[list[float], list[float]]:
        """
        Search within a specified search radius of the given point and extract the neighbours for buddy check.

        Parameters
        ----------
        search_radius : list[int]
            Three element array search radius [lon, lat, time].
        xindex : int
            The xindex of the gridcell to start from.
        yindex : int
            The yindex of the gridcell to start from.
        pindex : int
            The pindex of the gridcell to start from.

        Returns
        -------
        tuple of list of float
            Anomalies and numbers of observations in two lists.
        """
        if len(search_radius) != 3:
            raise ValueError(f"Length mismatch (search_radius): {len(search_radius)}. Must be 3.")

        radcon = np.pi / 180.0
        latitude_approx = 89.5 - yindex

        xspan = search_radius[0]
        full_xspan = int(xspan / math.cos(latitude_approx * radcon))
        yspan = search_radius[1]
        pspan = search_radius[2]

        temp_anom: list[float] = []
        temp_nobs: list[float] = []

        for xpt, ypt, ppt in itertools.product(
            range(-1 * full_xspan, full_xspan + 1),
            range(-1 * yspan, yspan + 1),
            range(-1 * pspan, pspan + 1),
        ):
            # Skip the central grid cell, as we don't want to buddy check it against itself
            if xpt == 0 and ypt == 0 and ppt == 0:
                continue

            # Wrap at grid boundaries
            thisxx = (xindex + xpt) % 360
            thisyy = (yindex + ypt) % 180
            thispp = (pindex + ppt) % 73

            if self.nobs[thisxx, thisyy, thispp] != 0:
                temp_anom.append(self.grid[thisxx, thisyy, thispp])
                temp_nobs.append(self.nobs[thisxx, thisyy, thispp])

        return temp_anom, temp_nobs



[docs]
    def get_buddy_limits_with_parameters(
        self,
        pentad_stdev: Climatology,
        limits: list[list[int]],
        number_of_obs_thresholds: list[list[int]],
        multipliers: list[list[float]],
    ) -> None:
        """
        Get buddy limits with parameters.

        Parameters
        ----------
        pentad_stdev : :py:class:`~marine_qc.Climatology`
            Climatology containing the 3-dimensional latitude array containing the standard deviations.
        limits : list[list[int]]
            List of the limits.
        number_of_obs_thresholds : list[list[int]]
            List containing the number of obs thresholds.
        multipliers : list[list[float]]
            List containing the multipliers to be applied.

        Returns
        -------
        None
            The function performs its operations in-place and does not return anything.
        """
        nonmiss = np.nonzero(self.nobs)
        for i in range(len(nonmiss[0])):
            xindex = nonmiss[0][i]
            yindex = nonmiss[1][i]
            pindex = nonmiss[2][i]
            m, d = pentad_to_month_day(pindex + 1)

            # Originally get_value_mds_style - note: might be a mismatch
            stdev = pentad_stdev.get_value_fast(lat=89.5 - yindex, lon=-179.5 + xindex, month=m, day=d)

            if stdev is None or stdev < 0.0 or np.isnan(stdev):
                stdev = 1.0

            match_not_found = True

            for j, limit in enumerate(limits):
                temp_anom, temp_nobs = self.get_neighbour_anomalies(limit, xindex, yindex, pindex)

                if len(temp_anom) > 0 and match_not_found:
                    self.buddy_mean[xindex, yindex, pindex] = np.mean(temp_anom)
                    total_nobs = int(np.sum(temp_nobs))

                    self.buddy_stdev[xindex, yindex, pindex] = (
                        get_threshold_multiplier(total_nobs, number_of_obs_thresholds[j], multipliers[j]) * stdev
                    )

                    match_not_found = False

            if match_not_found:
                self.buddy_mean[xindex, yindex, pindex] = 0.0
                self.buddy_stdev[xindex, yindex, pindex] = 500.0



[docs]
    def get_new_buddy_limits(
        self,
        stdev1: Climatology,
        stdev2: Climatology,
        stdev3: Climatology,
        limits: list[int],
        sigma_m: float,
        noise_scaling: float,
    ) -> None:
        """
        Get buddy limits for new bayesian buddy check.

        Parameters
        ----------
        stdev1 : :py:class:`~marine_qc.Climatology`
            Field of standard deviations representing standard deviation of difference between target
            gridcell and complete neighbour average (grid area to neighbourhood difference).
        stdev2 : :py:class:`~marine_qc.Climatology`
            Field of standard deviations representing standard deviation of difference between a single
            observation and the target gridcell average (point to grid area difference).
        stdev3 : :py:class:`~marine_qc.Climatology`
            Field of standard deviations representing standard deviation of difference between random
            neighbour gridcell and full neighbour average (uncertainty in neighbour average).
        limits : list[int, int, int]
            Three membered list of number of degrees in latitude and longitude and number of pentads.
        sigma_m : float
            Estimated measurement error uncertainty.
        noise_scaling : float
            Scale noise by a factor of noise_scaling used to match observed variability.

        Returns
        -------
        None
            The function performs its operations in-place and does not return anything.

        Notes
        -----
        The original default values for limits, sigma_m, and noise_scaling originally defaulted to:

        * limits = (2, 2, 4)
        * sigma_m = 1.0
        * noise_scaling = 3.0
        """
        nonmiss = np.nonzero(self.nobs)

        for i in range(len(nonmiss[0])):
            xindex = nonmiss[0][i]
            yindex = nonmiss[1][i]
            pindex = nonmiss[2][i]

            m, d = pentad_to_month_day(pindex + 1)

            stdev1_ex = stdev1.get_value_fast(89.5 - yindex, -179.5 + xindex, month=m, day=d)
            stdev2_ex = stdev2.get_value_fast(89.5 - yindex, -179.5 + xindex, month=m, day=d)
            stdev3_ex = stdev3.get_value_fast(89.5 - yindex, -179.5 + xindex, month=m, day=d)

            if stdev1_ex is None or stdev1_ex < 0.0 or np.isnan(stdev1_ex):
                stdev1_ex = 1.0
            if stdev2_ex is None or stdev2_ex < 0.0 or np.isnan(stdev2_ex):
                stdev2_ex = 1.0
            if stdev3_ex is None or stdev3_ex < 0.0 or np.isnan(stdev3_ex):
                stdev3_ex = 1.0

            # if there is neighbour in that range then calculate a mean
            temp_anom, temp_nobs = self.get_neighbour_anomalies(limits, xindex, yindex, pindex)

            if len(temp_anom) > 0:
                self.buddy_mean[xindex, yindex, pindex] = np.mean(temp_anom)

                tot = 0.0
                ntot = 0.0
                for n_obs in temp_nobs:
                    # measurement error for each 1x1x5day cell
                    tot += sigma_m**2.0 / n_obs
                    # sampling error for each 1x1x5day cell
                    # multiply by three to match observed stdev
                    tot += noise_scaling * (stdev2_ex**2.0 / n_obs)
                    ntot += 1.0

                sigma_buddy = tot / (ntot**2.0)
                sigma_buddy += stdev3_ex**2.0 / ntot

                self.buddy_stdev[xindex, yindex, pindex] = math.sqrt(sigma_m**2.0 + stdev1_ex**2.0 + noise_scaling * stdev2_ex**2.0 + sigma_buddy)

            else:
                self.buddy_mean[xindex, yindex, pindex] = 0.0
                self.buddy_stdev[xindex, yindex, pindex] = 500.0



[docs]
    def get_buddy_mean(self, lat: float, lon: float, month: int, day: int) -> float:
        """
        Get the buddy mean from the grid for a specified time and place.

        Parameters
        ----------
        lat : float
            Latitude of the location for which the buddy mean is desired.
        lon : float
            Longitude of the location for which the buddy mean is desired.
        month : int
            Month for which the buddy mean is desired.
        day : int
            Day for which the buddy mean is desired.

        Returns
        -------
        float
            Buddy mean at the specified location.
        """
        xindex = mds_lon_to_xindex(lon, res=1.0)
        yindex = mds_lat_to_yindex(lat, res=1.0)
        pindex = which_pentad(month, day) - 1
        return float(self.buddy_mean[xindex, yindex, pindex])



[docs]
    def get_buddy_stdev(self, lat: float, lon: float, month: int, day: int) -> float:
        """
        Get the buddy standard deviation from the grid for a specified time and place.

        Parameters
        ----------
        lat : float
            Latitude of the location for which the buddy standard deviation is desired.
        lon : float
            Longitude of the location for which the buddy standard deviation is desired.
        month : int
            Month for which the buddy standard deviation is desired.
        day : int
            Day for which the buddy standard deviation is desired.

        Returns
        -------
        float
            Buddy standard deviation at the specified location.
        """
        xindex = mds_lon_to_xindex(lon, res=1.0)
        yindex = mds_lat_to_yindex(lat, res=1.0)
        pindex = which_pentad(month, day) - 1
        return float(self.buddy_stdev[xindex, yindex, pindex])





[docs]
@post_format_return_type(["value"])
@inspect_arrays(["lat", "lon", "date", "value", "climatology"])
@convert_units(lat="degrees", lon="degrees")
@inspect_climatology("climatology")
def do_mds_buddy_check(
    lat: SequenceNumberType,
    lon: SequenceNumberType,
    date: SequenceDatetimeType,
    value: SequenceNumberType,
    climatology: ClimArgType,
    standard_deviation: Climatology,
    limits: list[list[int]],
    number_of_obs_thresholds: list[list[int]],
    multipliers: list[list[float]],
    ignore_indexes: list[int] | None = None,
) -> SequenceIntType:
    """
    Do the old style buddy check.

    The buddy check compares an observation to the average of its near neighbours (called the buddy mean).
    Depending on how many neighbours there are and their proximity to the observation being
    tested a multiplier is set. If the difference between the observation and the buddy mean is larger than the
    multiplier times the standard deviation then the observation fails the buddy check. If no buddy observations are
    found within the specified limits, then the limits are expanded until the check runs out of specified limits or
    observations are found within the limits.

    Parameters
    ----------
    lat : :py:obj:`~marine_qc.SequenceNumberType`
        1-dimensional latitude array.
    lon : :py:obj:`~marine_qc.SequenceNumberType`
        1-dimensional longitude array.
    date : :py:obj:`~marine_qc.SequenceDatetimeType`
        1-dimensional date array.
    value : :py:obj:`~marine_qc.SequenceNumberType`
        1-dimensional anomaly array.
    climatology : :py:obj:`~marine_qc.ClimArgType`
        The climatological average(s) used to calculate anomalies.
        Can be a scalar, sequence, a one-dimensional NumPy array, a pandas Series,
        a :py:class:`~marine_qc.Climatology`, a path-like string on disk, a xarray Dataset or a xarray DataArray.
    standard_deviation : :py:class:`~marine_qc.Climatology`
        Field of standard deviations of 1x1xpentad standard deviations.
    limits : list[list]
        Limits a list of lists. Each list member is a three-membered list specifying the longitudinal, latitudinal,
        and time range within which buddies are sought at each level of search.
    number_of_obs_thresholds : list[list]
        Number of observations corresponding to each multiplier in `multipliers`. The initial list should be
        the same length as the limits list.
    multipliers : list[list]
        Multiplier, x, used for buddy check mu +- x * sigma. The list should have the same structure as
        `number_of_obs_threshold`.
    ignore_indexes : list[int], optional
        List of row numbers to be skipped.

    Returns
    -------
    :py:obj:`~marine_qc.SequenceIntType`
        Same type as input, but with integer values

        - Returns array/sequence/Series of 1s if the MDS buddy check fails
        - Returns or array/sequence/Series of 0s otherwise.

    Raises
    ------
    TypeError
        If `inspect_arrays` does not return np.ndarrays.

    Notes
    -----
    The limits, number_of_obs_thresholds, and multipliers parameters are rather complex. The buddy check basically
    looks within a lat-lon-time range specified by the first element in limits. If there are more than zero
    observations in the search range then a multiplier is chosen based on how many observations there are.

    If the first element of limits is [1,1,2] then we first look within a distance equivalent to 1 degree
    latitude and longitude at the equator and 2 pentads in time. If there are more than zero observations then we
    calculate the buddy mean, and we consult the number_of_obs_threshold. If, for example, this is [0, 5, 15, 100]
    then we look for the first entry where the number of obs is greater than that threshold. We then look up the
    multiplier in the appropriate list (say [4, 3.5, 3.0, 2.5]). If the difference between an observation and the
    buddy mean is greater than the multiplier times the standard deviation at that point then it fails the buddy
    check. So, if there were 10 observations then the multiplier would be 3.5.

    Previous versions had default values for the parameters of:

    * limits = [[1, 1, 2], [2, 2, 2], [1, 1, 4], [2, 2, 4]]
    * number_of_obs_thresholds = [[0, 5, 15, 100], [0], [0, 5, 15, 100], [0]]
    * multipliers = [[4.0, 3.5, 3.0, 2.5], [4.0], [4.0, 3.5, 3.0, 2.5], [4.0]]
    """
    if len(limits) != len(number_of_obs_thresholds) and len(limits) != len(multipliers):
        raise ValueError("Input parameter lists are not equal length")

    for i, thresholds in enumerate(number_of_obs_thresholds):
        if len(thresholds) != len(multipliers[i]):
            raise ValueError("Number of obs thresholds and multipliers have different shapes")

    lat_arr, lon_arr, date_arr, value_arr, climatology_arr = ensure_arrays(lat=lat, lon=lon, date=date, value=value, climatology=climatology)

    anoms_arr = value_arr - climatology_arr

    # calculate superob averages and numbers of observations
    grid = SuperObsGrid()
    grid.add_multiple_observations(lat=lat_arr, lon=lon_arr, value=anoms_arr, date=date_arr)
    grid.get_buddy_limits_with_parameters(standard_deviation, limits, number_of_obs_thresholds, multipliers)

    numobs = len(lat_arr)
    qc_outcomes = np.zeros(numobs) + untested

    if ignore_indexes is None:
        ignore_indexes = []

    valid_mask: np.ndarray = np.atleast_1d(isvalid(anoms_arr))

    # finally loop over all reports and update buddy QC
    for i in range(numobs):
        if i in ignore_indexes or not valid_mask[i]:
            continue

        lat_i: float = lat_arr[i]
        lon_i: float = lon_arr[i]
        mon_i: pd.Timestamp = pd.Timestamp(date_arr[i]).month
        day_i: pd.Timestamp = pd.Timestamp(date_arr[i]).day

        # if the SST anomaly differs from the neighbour average by more than the calculated range then reject
        anom_i = anoms_arr[i]
        bm = grid.get_buddy_mean(lat_i, lon_i, mon_i, day_i)
        bsd = grid.get_buddy_stdev(lat_i, lon_i, mon_i, day_i)

        qc_outcomes[i] = passed
        if bsd == 500.0 or np.isnan(bsd):
            qc_outcomes[i] = untestable
        elif abs(anom_i - bm) >= bsd:
            qc_outcomes[i] = failed

    del grid

    return qc_outcomes




[docs]
@post_format_return_type(["value"])
@inspect_arrays(["lat", "lon", "date", "value", "climatology"])
@convert_units(lat="degrees", lon="degrees")
@inspect_climatology("climatology")
def do_bayesian_buddy_check(
    lat: SequenceNumberType,
    lon: SequenceNumberType,
    date: SequenceDatetimeType,
    value: SequenceNumberType,
    climatology: ClimArgType,
    stdev1: Climatology,
    stdev2: Climatology,
    stdev3: Climatology,
    prior_probability_of_gross_error: float,
    quantization_interval: float,
    one_sigma_measurement_uncertainty: float,
    limits: list[int],
    noise_scaling: float,
    maximum_anomaly: float,
    fail_probability: float,
    ignore_indexes: list[int] | None = None,
) -> SequenceIntType:
    """
    Do the Bayesian buddy check.

    The bayesian buddy check assigns a probability of gross error to each observation,
    which is rounded down to the tenth and then multiplied by 10 to yield a flag between 0 and 9.

    Parameters
    ----------
    lat : :py:obj:`~marine_qc.SequenceNumberType`
        1-dimensional latitude array.
    lon : :py:obj:`~marine_qc.SequenceNumberType`
        1-dimensional longitude array.
    date : :py:obj:`~marine_qc.SequenceDatetimeType`
        1-dimensional date array.
    value : :py:obj:`~marine_qc.SequenceNumberType`
        1-dimensional anomaly array.
    climatology : :py:obj:`~marine_qc.ClimArgType`
        The climatological average(s) used to calculate anomalies.
        Can be a scalar, sequence, a one-dimensional NumPy array, a pandas Series,
        a :py:class:`~marine_qc.Climatology`, a path-like string on disk, a xarray Dataset or a xarray DataArray.
    stdev1 : :py:class:`~marine_qc.Climatology`
        Field of standard deviations representing standard deviation of difference between
        target gridcell and complete neighbour average (grid area to neighbourhood difference).
    stdev2 : :py:class:`~marine_qc.Climatology`
        Field of standard deviations representing standard deviation of difference between
        a single observation and the target gridcell average (point to grid area difference).
    stdev3 : :py:class:`~marine_qc.Climatology`
        Field of standard deviations representing standard deviation of difference between
        random neighbour gridcell and full neighbour average (uncertainty in neighbour average).
    prior_probability_of_gross_error : float
        Prior probability of gross error, which is the background rate of gross errors.
    quantization_interval : float
        Smallest possible increment in the input values.
    one_sigma_measurement_uncertainty : float
        Estimated one sigma measurement uncertainty.
    limits : list[int]
        List with three members which specify the search range for the buddy check.
    noise_scaling : float
        Tuning parameter used to multiply stdev2. This was determined to be approximately 3.0 by comparison with
        observed point data. stdev2 was estimated from OSTIA data and typically underestimates the point to
        area-average difference by this factor.
    maximum_anomaly : float
        Largest absolute anomaly, assumes that the maximum and minimum anomalies have the same magnitude.
    fail_probability : float
        Probability of gross error that corresponds to a failed test. Anything with a probability of gross error
        greater than fail_probability will be considered failing.
    ignore_indexes : list[int], optional
        List of row numbers to be skipped.

    Returns
    -------
    :py:obj:`~marine_qc.SequenceIntType`
        Same type as input, but with integer values

        - Returns array/sequence/Series of 2s if there are no buddies in the specified limits
        - Returns array/sequence/Series of 1s if the bayesian buddy check fails
        - Returns or array/sequence/Series of 0s otherwise.

    Raises
    ------
    TypeError
        If `inspect_arrays` does not return np.ndarrays.

    Notes
    -----
    In previous versions the default values for the parameters were:

    * prior_probability_of_gross_error = 0.05
    * quantization_interval = 0.1
    * limits = [2, 2, 4]
    * noise_scaling = 3.0
    * one_sigma_measurement_uncertainty = 1.0
    * maximum_anomaly = 8.0
    * fail_probability = 0.3
    """
    lat_arr, lon_arr, date_arr, value_arr, climatology_arr = ensure_arrays(lat=lat, lon=lon, date=date, value=value, climatology=climatology)

    numobs = len(lat_arr)
    p0 = prior_probability_of_gross_error
    q = quantization_interval
    sigma_m = one_sigma_measurement_uncertainty

    # previous upper QC limits set. Ideally, this should be set based on any previous QC checks.
    # The original default was 8 because the climatology check had a range of +-8C. However, a
    # climatology plus standard deviation check might narrow that range, and it might also be
    # spatially varying. There is currently no means of expressing that here.
    r_hi = maximum_anomaly
    r_lo = -1.0 * r_hi  # previous lower QC limit set

    # Return untestable if any parameters is invalid
    if p0 < 0.0 or p0 > 1.0 or q <= 0.0 or r_hi < r_lo or sigma_m < 0.0:
        return np.zeros(numobs) + untestable

    anoms_arr = value_arr - climatology_arr

    grid = SuperObsGrid()
    grid.add_multiple_observations(lat=lat_arr, lon=lon_arr, value=anoms_arr, date=date_arr)
    grid.get_new_buddy_limits(stdev1, stdev2, stdev3, limits, sigma_m, noise_scaling)

    qc_outcomes = np.zeros(numobs) + untested

    if ignore_indexes is None:
        ignore_indexes = []

    for i in range(numobs):
        if i in ignore_indexes:
            continue

        anom_i = anoms_arr[i]

        if not isvalid(anom_i):
            continue

        lat_i = lat_arr[i]
        lon_i = lon_arr[i]
        mon_i = pd.Timestamp(date_arr[i]).month
        day_i = pd.Timestamp(date_arr[i]).day

        # Calculate the probability of gross error given the set-up
        buddy_stdev = grid.get_buddy_stdev(lat_i, lon_i, mon_i, day_i)

        try:
            ppp = p_gross(
                p0,
                q,
                r_hi,
                r_lo,
                anom_i,
                grid.get_buddy_mean(lat_i, lon_i, mon_i, day_i),
                buddy_stdev,
            )
        except (ValueError, ZeroDivisionError):
            qc_outcomes[i] = failed
            continue

        # QC outcome is based on the probability of gross error. If the probability of gross error is larger than
        # the fail_probability then it is considered a fail.
        qc_outcomes[i] = passed
        if ppp > fail_probability:
            qc_outcomes[i] = failed
        if buddy_stdev == 500.0:
            qc_outcomes[i] = untestable

    del grid

    return qc_outcomes