Source code for marine_qc.qc_grouped_reports

"""
QC of grouped reports.

Module containing QC functions for quality control of grouped marine reports.
"""

from __future__ import annotations
import itertools
import math

import numpy as np
import pandas as pd

from .auxiliary import (
    SequenceDatetimeType,
    SequenceIntType,
    SequenceNumberType,
    convert_units,
    ensure_arrays,
    failed,
    inspect_arrays,
    isvalid,
    passed,
    post_format_return_type,
    untestable,
    untested,
)
from .buoy_tracking_qc import is_monotonic
from .external_clim import (
    ClimArgType,
    Climatology,
    inspect_climatology,
)
from .location_control import (
    mds_lat_to_yindex,
    mds_lat_to_yindex_fast,
    mds_lon_to_xindex,
    mds_lon_to_xindex_fast,
)
from .statistics import p_gross
from .time_control import (
    convert_date,
    pentad_to_month_day,
    which_pentad,
)


[docs] def get_threshold_multiplier(total_nobs: int, nob_limits: list[int], multiplier_values: list[float]) -> float: """ Find the highest value of i such that total_nobs is greater than nob_limits[i] and return multiplier_values[i]. This routine is used by the buddy check. It's a bit niche. Parameters ---------- total_nobs : int Total number of neighbour observations. nob_limits : list[int] List containing the limiting numbers of observations in ascending order first element must be zero. multiplier_values : list[float] List containing the multiplier values associated.. Returns ------- float The multiplier value. """ if len(nob_limits) != len(multiplier_values): raise ValueError(f"Length mismatch: nob_limits has {len(nob_limits)}, but multiplier_values has{len(multiplier_values)}.") if min(multiplier_values) <= 0: raise ValueError("Invalid minimum multiplier_value: {min(multiplier_values)}. Must be greater than zero.") if nob_limits[0] != 0: raise ValueError(f"Invalid first nob_limits: {nob_limits[0]}. Must be zero.") if min(nob_limits) != 0: raise ValueError(f"Invalid minimum nob_limit: {min(nob_limits)}. Must be zero.") if not is_monotonic(nob_limits): raise ValueError("Invalid nob_limits: {nob_limits}. Must be in ascending order.") multiplier = -1.0 if total_nobs == 0: multiplier = 4.0 for i, limit in enumerate(nob_limits): if total_nobs > limit: multiplier = multiplier_values[i] if multiplier <= 0: raise ValueError(f"Invalid multiplier: {multiplier}. Must be positive.") return multiplier
[docs] class SuperObsGrid: """Class for gridding data in buddy check, based on numpy arrays.""" def __init__(self) -> None: """Initialise empty grid.""" self.grid = np.zeros((360, 180, 73)) # type: np.ndarray self.buddy_mean = np.zeros((360, 180, 73)) # type: np.ndarray self.buddy_stdev = np.zeros((360, 180, 73)) # type: np.ndarray self.nobs = np.zeros((360, 180, 73)) # type: np.ndarray
[docs] @convert_date(["month", "day"]) @inspect_arrays(["lat", "lon", "value", "month", "day"]) def add_multiple_observations( self, lat: SequenceNumberType, lon: SequenceNumberType, value: SequenceNumberType, date: SequenceDatetimeType | None = None, month: SequenceIntType | None = None, day: SequenceIntType | None = None, ) -> None: """ Add a series of observations to the grid and take the grid average. Parameters ---------- lat : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional latitude array. lon : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional longitude array. value : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional anomaly array. date : :py:obj:`~marine_qc.SequenceDatetimeType`, optional 1-dimensional datetime array. month : :py:obj:`~marine_qc.SequenceIntType`, optional 1-dimensional month array. Used if date is not provided. day : :py:obj:`~marine_qc.SequenceIntType`, optional 1-dimensional day array. Used if date is not provided. Raises ------ TypeError If `inspect_arrays` does not return np.ndarrays. Notes ----- The observations should be anomalies. """ lat_arr, lon_arr, value_arr, month_arr, day_arr = ensure_arrays(lat=lat, lon=lon, value=value, month=month, day=day) month_arr = np.where(np.isnan(month_arr), -1, month_arr).astype(int) day_arr = np.where(np.isnan(day_arr), -1, day_arr).astype(int) valid: np.ndarray = ( np.atleast_1d(isvalid(lat_arr)) & np.atleast_1d(isvalid(lon_arr)) & np.atleast_1d(isvalid(month_arr)) & np.atleast_1d(isvalid(day_arr)) & np.atleast_1d(isvalid(value_arr)) ) valid &= (month_arr >= 1) & (month_arr <= 12) if not np.any(valid): return y_index: np.ndarray = mds_lat_to_yindex_fast(lat_arr[valid], res=1) x_index: np.ndarray = mds_lon_to_xindex_fast(lon_arr[valid], res=1) t_index: np.ndarray = Climatology.get_t_index(month_arr[valid], day_arr[valid], 73) unique_index: np.ndarray = t_index * 1000000 + y_index * 1000 + x_index df: pd.DataFrame = pd.DataFrame( { "uid": unique_index, "value": value_arr[valid], "x": x_index, "y": y_index, "t": t_index, } ) grouped = df.groupby("uid") means = grouped["value"].mean().values nobs = grouped["value"].count().values x: np.ndarray = df.groupby("uid")["x"].first().values y: np.ndarray = df.groupby("uid")["y"].first().values t: np.ndarray = df.groupby("uid")["t"].first().values self.grid[x, y, t] = means self.nobs[x, y, t] = nobs
[docs] def add_single_observation(self, lat: float, lon: float, month: int, day: int, anom: float) -> None: """ Add an anomaly to the grid from specified lat lon and date. Parameters ---------- lat : float Latitude of the observation in degrees. lon : float Longitude of the observation in degrees. month : int Month of the observation. day : int Day of the observation. anom : float Value to be added to the grid. Returns ------- None The function performs its operations in-place and does not return anything. """ xindex = mds_lon_to_xindex(lon, res=1.0) yindex = mds_lat_to_yindex(lat, res=1.0) pindex = which_pentad(month, day) - 1 if not (0 <= xindex < 360): raise ValueError(f"Invalid xindex: {xindex}. Must be between 0 and 360.") if not (0 <= yindex < 180): raise ValueError(f"Invalid yindex: {yindex}. Must be between 0 and 180.") if not (0 <= pindex < 73): raise ValueError(f"Invalid pindex: {pindex}. Must be between 0 and 72.") if anom is not None: self.grid[xindex, yindex, pindex] += anom self.nobs[xindex, yindex, pindex] += 1
[docs] def take_average(self) -> None: """Take the average of a grid to which reps have been added using add_rep.""" nonmiss = np.nonzero(self.nobs) self.grid[nonmiss] = self.grid[nonmiss] / self.nobs[nonmiss]
[docs] def get_neighbour_anomalies( self, search_radius: list[int], xindex: int, yindex: int, pindex: int, ) -> tuple[list[float], list[float]]: """ Search within a specified search radius of the given point and extract the neighbours for buddy check. Parameters ---------- search_radius : list[int] Three element array search radius [lon, lat, time]. xindex : int The xindex of the gridcell to start from. yindex : int The yindex of the gridcell to start from. pindex : int The pindex of the gridcell to start from. Returns ------- tuple of list of float Anomalies and numbers of observations in two lists. """ if len(search_radius) != 3: raise ValueError(f"Length mismatch (search_radius): {len(search_radius)}. Must be 3.") radcon = np.pi / 180.0 latitude_approx = 89.5 - yindex xspan = search_radius[0] full_xspan = int(xspan / math.cos(latitude_approx * radcon)) yspan = search_radius[1] pspan = search_radius[2] temp_anom: list[float] = [] temp_nobs: list[float] = [] for xpt, ypt, ppt in itertools.product( range(-1 * full_xspan, full_xspan + 1), range(-1 * yspan, yspan + 1), range(-1 * pspan, pspan + 1), ): # Skip the central grid cell, as we don't want to buddy check it against itself if xpt == 0 and ypt == 0 and ppt == 0: continue # Wrap at grid boundaries thisxx = (xindex + xpt) % 360 thisyy = (yindex + ypt) % 180 thispp = (pindex + ppt) % 73 if self.nobs[thisxx, thisyy, thispp] != 0: temp_anom.append(self.grid[thisxx, thisyy, thispp]) temp_nobs.append(self.nobs[thisxx, thisyy, thispp]) return temp_anom, temp_nobs
[docs] def get_buddy_limits_with_parameters( self, pentad_stdev: Climatology, limits: list[list[int]], number_of_obs_thresholds: list[list[int]], multipliers: list[list[float]], ) -> None: """ Get buddy limits with parameters. Parameters ---------- pentad_stdev : :py:class:`~marine_qc.Climatology` Climatology containing the 3-dimensional latitude array containing the standard deviations. limits : list[list[int]] List of the limits. number_of_obs_thresholds : list[list[int]] List containing the number of obs thresholds. multipliers : list[list[float]] List containing the multipliers to be applied. Returns ------- None The function performs its operations in-place and does not return anything. """ nonmiss = np.nonzero(self.nobs) for i in range(len(nonmiss[0])): xindex = nonmiss[0][i] yindex = nonmiss[1][i] pindex = nonmiss[2][i] m, d = pentad_to_month_day(pindex + 1) # Originally get_value_mds_style - note: might be a mismatch stdev = pentad_stdev.get_value_fast(lat=89.5 - yindex, lon=-179.5 + xindex, month=m, day=d) if stdev is None or stdev < 0.0 or np.isnan(stdev): stdev = 1.0 match_not_found = True for j, limit in enumerate(limits): temp_anom, temp_nobs = self.get_neighbour_anomalies(limit, xindex, yindex, pindex) if len(temp_anom) > 0 and match_not_found: self.buddy_mean[xindex, yindex, pindex] = np.mean(temp_anom) total_nobs = int(np.sum(temp_nobs)) self.buddy_stdev[xindex, yindex, pindex] = ( get_threshold_multiplier(total_nobs, number_of_obs_thresholds[j], multipliers[j]) * stdev ) match_not_found = False if match_not_found: self.buddy_mean[xindex, yindex, pindex] = 0.0 self.buddy_stdev[xindex, yindex, pindex] = 500.0
[docs] def get_new_buddy_limits( self, stdev1: Climatology, stdev2: Climatology, stdev3: Climatology, limits: list[int], sigma_m: float, noise_scaling: float, ) -> None: """ Get buddy limits for new bayesian buddy check. Parameters ---------- stdev1 : :py:class:`~marine_qc.Climatology` Field of standard deviations representing standard deviation of difference between target gridcell and complete neighbour average (grid area to neighbourhood difference). stdev2 : :py:class:`~marine_qc.Climatology` Field of standard deviations representing standard deviation of difference between a single observation and the target gridcell average (point to grid area difference). stdev3 : :py:class:`~marine_qc.Climatology` Field of standard deviations representing standard deviation of difference between random neighbour gridcell and full neighbour average (uncertainty in neighbour average). limits : list[int, int, int] Three membered list of number of degrees in latitude and longitude and number of pentads. sigma_m : float Estimated measurement error uncertainty. noise_scaling : float Scale noise by a factor of noise_scaling used to match observed variability. Returns ------- None The function performs its operations in-place and does not return anything. Notes ----- The original default values for limits, sigma_m, and noise_scaling originally defaulted to: * limits = (2, 2, 4) * sigma_m = 1.0 * noise_scaling = 3.0 """ nonmiss = np.nonzero(self.nobs) for i in range(len(nonmiss[0])): xindex = nonmiss[0][i] yindex = nonmiss[1][i] pindex = nonmiss[2][i] m, d = pentad_to_month_day(pindex + 1) stdev1_ex = stdev1.get_value_fast(89.5 - yindex, -179.5 + xindex, month=m, day=d) stdev2_ex = stdev2.get_value_fast(89.5 - yindex, -179.5 + xindex, month=m, day=d) stdev3_ex = stdev3.get_value_fast(89.5 - yindex, -179.5 + xindex, month=m, day=d) if stdev1_ex is None or stdev1_ex < 0.0 or np.isnan(stdev1_ex): stdev1_ex = 1.0 if stdev2_ex is None or stdev2_ex < 0.0 or np.isnan(stdev2_ex): stdev2_ex = 1.0 if stdev3_ex is None or stdev3_ex < 0.0 or np.isnan(stdev3_ex): stdev3_ex = 1.0 # if there is neighbour in that range then calculate a mean temp_anom, temp_nobs = self.get_neighbour_anomalies(limits, xindex, yindex, pindex) if len(temp_anom) > 0: self.buddy_mean[xindex, yindex, pindex] = np.mean(temp_anom) tot = 0.0 ntot = 0.0 for n_obs in temp_nobs: # measurement error for each 1x1x5day cell tot += sigma_m**2.0 / n_obs # sampling error for each 1x1x5day cell # multiply by three to match observed stdev tot += noise_scaling * (stdev2_ex**2.0 / n_obs) ntot += 1.0 sigma_buddy = tot / (ntot**2.0) sigma_buddy += stdev3_ex**2.0 / ntot self.buddy_stdev[xindex, yindex, pindex] = math.sqrt(sigma_m**2.0 + stdev1_ex**2.0 + noise_scaling * stdev2_ex**2.0 + sigma_buddy) else: self.buddy_mean[xindex, yindex, pindex] = 0.0 self.buddy_stdev[xindex, yindex, pindex] = 500.0
[docs] def get_buddy_mean(self, lat: float, lon: float, month: int, day: int) -> float: """ Get the buddy mean from the grid for a specified time and place. Parameters ---------- lat : float Latitude of the location for which the buddy mean is desired. lon : float Longitude of the location for which the buddy mean is desired. month : int Month for which the buddy mean is desired. day : int Day for which the buddy mean is desired. Returns ------- float Buddy mean at the specified location. """ xindex = mds_lon_to_xindex(lon, res=1.0) yindex = mds_lat_to_yindex(lat, res=1.0) pindex = which_pentad(month, day) - 1 return float(self.buddy_mean[xindex, yindex, pindex])
[docs] def get_buddy_stdev(self, lat: float, lon: float, month: int, day: int) -> float: """ Get the buddy standard deviation from the grid for a specified time and place. Parameters ---------- lat : float Latitude of the location for which the buddy standard deviation is desired. lon : float Longitude of the location for which the buddy standard deviation is desired. month : int Month for which the buddy standard deviation is desired. day : int Day for which the buddy standard deviation is desired. Returns ------- float Buddy standard deviation at the specified location. """ xindex = mds_lon_to_xindex(lon, res=1.0) yindex = mds_lat_to_yindex(lat, res=1.0) pindex = which_pentad(month, day) - 1 return float(self.buddy_stdev[xindex, yindex, pindex])
[docs] @post_format_return_type(["value"]) @inspect_arrays(["lat", "lon", "date", "value", "climatology"]) @convert_units(lat="degrees", lon="degrees") @inspect_climatology("climatology") def do_mds_buddy_check( lat: SequenceNumberType, lon: SequenceNumberType, date: SequenceDatetimeType, value: SequenceNumberType, climatology: ClimArgType, standard_deviation: Climatology, limits: list[list[int]], number_of_obs_thresholds: list[list[int]], multipliers: list[list[float]], ignore_indexes: list[int] | None = None, ) -> SequenceIntType: """ Do the old style buddy check. The buddy check compares an observation to the average of its near neighbours (called the buddy mean). Depending on how many neighbours there are and their proximity to the observation being tested a multiplier is set. If the difference between the observation and the buddy mean is larger than the multiplier times the standard deviation then the observation fails the buddy check. If no buddy observations are found within the specified limits, then the limits are expanded until the check runs out of specified limits or observations are found within the limits. Parameters ---------- lat : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional latitude array. lon : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional longitude array. date : :py:obj:`~marine_qc.SequenceDatetimeType` 1-dimensional date array. value : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional anomaly array. climatology : :py:obj:`~marine_qc.ClimArgType` The climatological average(s) used to calculate anomalies. Can be a scalar, sequence, a one-dimensional NumPy array, a pandas Series, a :py:class:`~marine_qc.Climatology`, a path-like string on disk, a xarray Dataset or a xarray DataArray. standard_deviation : :py:class:`~marine_qc.Climatology` Field of standard deviations of 1x1xpentad standard deviations. limits : list[list] Limits a list of lists. Each list member is a three-membered list specifying the longitudinal, latitudinal, and time range within which buddies are sought at each level of search. number_of_obs_thresholds : list[list] Number of observations corresponding to each multiplier in `multipliers`. The initial list should be the same length as the limits list. multipliers : list[list] Multiplier, x, used for buddy check mu +- x * sigma. The list should have the same structure as `number_of_obs_threshold`. ignore_indexes : list[int], optional List of row numbers to be skipped. Returns ------- :py:obj:`~marine_qc.SequenceIntType` Same type as input, but with integer values - Returns array/sequence/Series of 1s if the MDS buddy check fails - Returns or array/sequence/Series of 0s otherwise. Raises ------ TypeError If `inspect_arrays` does not return np.ndarrays. Notes ----- The limits, number_of_obs_thresholds, and multipliers parameters are rather complex. The buddy check basically looks within a lat-lon-time range specified by the first element in limits. If there are more than zero observations in the search range then a multiplier is chosen based on how many observations there are. If the first element of limits is [1,1,2] then we first look within a distance equivalent to 1 degree latitude and longitude at the equator and 2 pentads in time. If there are more than zero observations then we calculate the buddy mean, and we consult the number_of_obs_threshold. If, for example, this is [0, 5, 15, 100] then we look for the first entry where the number of obs is greater than that threshold. We then look up the multiplier in the appropriate list (say [4, 3.5, 3.0, 2.5]). If the difference between an observation and the buddy mean is greater than the multiplier times the standard deviation at that point then it fails the buddy check. So, if there were 10 observations then the multiplier would be 3.5. Previous versions had default values for the parameters of: * limits = [[1, 1, 2], [2, 2, 2], [1, 1, 4], [2, 2, 4]] * number_of_obs_thresholds = [[0, 5, 15, 100], [0], [0, 5, 15, 100], [0]] * multipliers = [[4.0, 3.5, 3.0, 2.5], [4.0], [4.0, 3.5, 3.0, 2.5], [4.0]] """ if len(limits) != len(number_of_obs_thresholds) and len(limits) != len(multipliers): raise ValueError("Input parameter lists are not equal length") for i, thresholds in enumerate(number_of_obs_thresholds): if len(thresholds) != len(multipliers[i]): raise ValueError("Number of obs thresholds and multipliers have different shapes") lat_arr, lon_arr, date_arr, value_arr, climatology_arr = ensure_arrays(lat=lat, lon=lon, date=date, value=value, climatology=climatology) anoms_arr = value_arr - climatology_arr # calculate superob averages and numbers of observations grid = SuperObsGrid() grid.add_multiple_observations(lat=lat_arr, lon=lon_arr, value=anoms_arr, date=date_arr) grid.get_buddy_limits_with_parameters(standard_deviation, limits, number_of_obs_thresholds, multipliers) numobs = len(lat_arr) qc_outcomes = np.zeros(numobs) + untested if ignore_indexes is None: ignore_indexes = [] valid_mask: np.ndarray = np.atleast_1d(isvalid(anoms_arr)) # finally loop over all reports and update buddy QC for i in range(numobs): if i in ignore_indexes or not valid_mask[i]: continue lat_i: float = lat_arr[i] lon_i: float = lon_arr[i] mon_i: pd.Timestamp = pd.Timestamp(date_arr[i]).month day_i: pd.Timestamp = pd.Timestamp(date_arr[i]).day # if the SST anomaly differs from the neighbour average by more than the calculated range then reject anom_i = anoms_arr[i] bm = grid.get_buddy_mean(lat_i, lon_i, mon_i, day_i) bsd = grid.get_buddy_stdev(lat_i, lon_i, mon_i, day_i) qc_outcomes[i] = passed if bsd == 500.0 or np.isnan(bsd): qc_outcomes[i] = untestable elif abs(anom_i - bm) >= bsd: qc_outcomes[i] = failed del grid return qc_outcomes
[docs] @post_format_return_type(["value"]) @inspect_arrays(["lat", "lon", "date", "value", "climatology"]) @convert_units(lat="degrees", lon="degrees") @inspect_climatology("climatology") def do_bayesian_buddy_check( lat: SequenceNumberType, lon: SequenceNumberType, date: SequenceDatetimeType, value: SequenceNumberType, climatology: ClimArgType, stdev1: Climatology, stdev2: Climatology, stdev3: Climatology, prior_probability_of_gross_error: float, quantization_interval: float, one_sigma_measurement_uncertainty: float, limits: list[int], noise_scaling: float, maximum_anomaly: float, fail_probability: float, ignore_indexes: list[int] | None = None, ) -> SequenceIntType: """ Do the Bayesian buddy check. The bayesian buddy check assigns a probability of gross error to each observation, which is rounded down to the tenth and then multiplied by 10 to yield a flag between 0 and 9. Parameters ---------- lat : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional latitude array. lon : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional longitude array. date : :py:obj:`~marine_qc.SequenceDatetimeType` 1-dimensional date array. value : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional anomaly array. climatology : :py:obj:`~marine_qc.ClimArgType` The climatological average(s) used to calculate anomalies. Can be a scalar, sequence, a one-dimensional NumPy array, a pandas Series, a :py:class:`~marine_qc.Climatology`, a path-like string on disk, a xarray Dataset or a xarray DataArray. stdev1 : :py:class:`~marine_qc.Climatology` Field of standard deviations representing standard deviation of difference between target gridcell and complete neighbour average (grid area to neighbourhood difference). stdev2 : :py:class:`~marine_qc.Climatology` Field of standard deviations representing standard deviation of difference between a single observation and the target gridcell average (point to grid area difference). stdev3 : :py:class:`~marine_qc.Climatology` Field of standard deviations representing standard deviation of difference between random neighbour gridcell and full neighbour average (uncertainty in neighbour average). prior_probability_of_gross_error : float Prior probability of gross error, which is the background rate of gross errors. quantization_interval : float Smallest possible increment in the input values. one_sigma_measurement_uncertainty : float Estimated one sigma measurement uncertainty. limits : list[int] List with three members which specify the search range for the buddy check. noise_scaling : float Tuning parameter used to multiply stdev2. This was determined to be approximately 3.0 by comparison with observed point data. stdev2 was estimated from OSTIA data and typically underestimates the point to area-average difference by this factor. maximum_anomaly : float Largest absolute anomaly, assumes that the maximum and minimum anomalies have the same magnitude. fail_probability : float Probability of gross error that corresponds to a failed test. Anything with a probability of gross error greater than fail_probability will be considered failing. ignore_indexes : list[int], optional List of row numbers to be skipped. Returns ------- :py:obj:`~marine_qc.SequenceIntType` Same type as input, but with integer values - Returns array/sequence/Series of 2s if there are no buddies in the specified limits - Returns array/sequence/Series of 1s if the bayesian buddy check fails - Returns or array/sequence/Series of 0s otherwise. Raises ------ TypeError If `inspect_arrays` does not return np.ndarrays. Notes ----- In previous versions the default values for the parameters were: * prior_probability_of_gross_error = 0.05 * quantization_interval = 0.1 * limits = [2, 2, 4] * noise_scaling = 3.0 * one_sigma_measurement_uncertainty = 1.0 * maximum_anomaly = 8.0 * fail_probability = 0.3 """ lat_arr, lon_arr, date_arr, value_arr, climatology_arr = ensure_arrays(lat=lat, lon=lon, date=date, value=value, climatology=climatology) numobs = len(lat_arr) p0 = prior_probability_of_gross_error q = quantization_interval sigma_m = one_sigma_measurement_uncertainty # previous upper QC limits set. Ideally, this should be set based on any previous QC checks. # The original default was 8 because the climatology check had a range of +-8C. However, a # climatology plus standard deviation check might narrow that range, and it might also be # spatially varying. There is currently no means of expressing that here. r_hi = maximum_anomaly r_lo = -1.0 * r_hi # previous lower QC limit set # Return untestable if any parameters is invalid if p0 < 0.0 or p0 > 1.0 or q <= 0.0 or r_hi < r_lo or sigma_m < 0.0: return np.zeros(numobs) + untestable anoms_arr = value_arr - climatology_arr grid = SuperObsGrid() grid.add_multiple_observations(lat=lat_arr, lon=lon_arr, value=anoms_arr, date=date_arr) grid.get_new_buddy_limits(stdev1, stdev2, stdev3, limits, sigma_m, noise_scaling) qc_outcomes = np.zeros(numobs) + untested if ignore_indexes is None: ignore_indexes = [] for i in range(numobs): if i in ignore_indexes: continue anom_i = anoms_arr[i] if not isvalid(anom_i): continue lat_i = lat_arr[i] lon_i = lon_arr[i] mon_i = pd.Timestamp(date_arr[i]).month day_i = pd.Timestamp(date_arr[i]).day # Calculate the probability of gross error given the set-up buddy_stdev = grid.get_buddy_stdev(lat_i, lon_i, mon_i, day_i) try: ppp = p_gross( p0, q, r_hi, r_lo, anom_i, grid.get_buddy_mean(lat_i, lon_i, mon_i, day_i), buddy_stdev, ) except (ValueError, ZeroDivisionError): qc_outcomes[i] = failed continue # QC outcome is based on the probability of gross error. If the probability of gross error is larger than # the fail_probability then it is considered a fail. qc_outcomes[i] = passed if ppp > fail_probability: qc_outcomes[i] = failed if buddy_stdev == 500.0: qc_outcomes[i] = untestable del grid return qc_outcomes