Source code for marine_qc.buoy_tracking_qc

"""
Buoy tracking QC module.

Module containing QC functions for sequential reports from a single drifting buoy.
"""

# noqa: S101

from __future__ import annotations
import math
import warnings
from collections.abc import Sequence
from datetime import datetime

import numpy as np
import pandas as pd

from .astronomical_geometry import sunangle
from .auxiliary import SequenceDatetimeType, SequenceNumberType, ensure_arrays, failed, inspect_arrays, isvalid, passed, untestable, untested
from .qc_sequential_reports import do_iquam_track_check
from .spherical_geometry import sphere_distance
from .statistics import trim_mean, trim_std
from .time_control import convert_date_to_hours, day_in_year


"""
The trackqc module contains a set of functions for performing the tracking QC
first described in Atkinson et al. [2013]. The general procedures described
in Atkinson et al. [2013] were later revised and improved for the SST CCI 2 project.
Documentation and IDL code for the revised (and original) procedures can be found
in the CMA FCM code repository. The code in this module represents a port of the
revised IDL code into the python marine QC suite. New versions of the aground
and speed checks have also been added.

These functions perform tracking QC checks on a :py:class:`ex.Voyage`.

References:

Atkinson, C.P., N.A. Rayner, J. Roberts-Jones, R.O. Smith, 2013:
Assessing the quality of sea surface temperature observations from
drifting buoys and ships on a platform-by-platform basis (doi:10.1002/jgrc.20257).
"""


[docs] def track_day_test( year: int, month: int, day: int, hour: float, lat: float, lon: float, elevdlim: float = -2.5, ) -> bool: """ Given date, time, lat and lon calculate if the sun elevation is > elevdlim. This is the "day" test used by tracking QC to decide whether an SST measurement is night or day. This is important because daytime diurnal heating can affect comparison with an SST background. It uses the function sunangle to calculate the elevation of the sun. A default solar_zenith angle of 92.5 degrees (elevation of -2.5 degrees) delimits night from day. Parameters ---------- year : int Year. month : int Month. day : int Day. hour : float Hour expressed as decimal fraction (e.g. 20.75 = 20:45 pm). lat : float Latitude in degrees. lon : float Longitude in degrees. elevdlim : float, default: -2.5 Elevation day/night delimiter in degrees above horizon. Returns ------- bool True if daytime, else False. Raises ------ ValueError If either year, month, day, hour, lat or lon is numerically invalid or None of if either month, day, hour or lat is not in valid range. """ if not isvalid(year): raise ValueError("year is missing") if not isvalid(month): raise ValueError("month is missing") if not isvalid(day): raise ValueError("day is missing") if not isvalid(hour): raise ValueError("hour is missing") if not isvalid(lat): raise ValueError("lat is missing") if not isvalid(lon): raise ValueError("lon is missing") if not (1 <= month <= 12): raise ValueError("Month not in range 1-12") if not (1 <= day <= 31): raise ValueError("Day not in range 1-31") if not (0 <= hour <= 24): raise ValueError("Hour not in range 0-24") if not (90 >= lat >= -90): raise ValueError("Latitude not in range -90 to 90") daytime = False year2 = year day2 = day_in_year(year, month, day) hour2 = math.floor(hour) minute2 = int((hour - math.floor(hour)) * 60.0) lat2 = lat lon2 = lon if lat == 0: lat2 = 0.0001 if lon == 0: lon2 = 0.0001 _, elevation, _, _, _, _ = sunangle(year2, day2, hour2, minute2, 0, 0, 0, lat2, lon2) if elevation > elevdlim: daytime = True return daytime
[docs] def is_monotonic(inarr: np.ndarray | Sequence[int | float]) -> bool: """ Test if elements in an array are increasing monotonically. I.e. each element is greater than or equal to the preceding element. Parameters ---------- inarr : array-like of datetime, shape (n,) 1-dimensional date array. Returns ------- bool True if array is increasing monotonically, False otherwise. """ for i in range(1, len(inarr)): if inarr[i] < inarr[i - 1]: return False return True
[docs] class SpeedChecker: """ Class used to carry out :py:func:`.do_speed_check`. The check identifies whether a drifter has been picked up by a ship (out of water) based on 1/100th degree precision positions. A flag is set for each input report: flag=1 for reports deemed picked up, else flag=0. A drifter is deemed picked up if it is moving faster than might be expected for a fast ocean current (a few m/s). Unreasonably fast movement is detected when speed of travel between report-pairs exceeds the chosen 'speed_limit' (speed is estimated as distance between reports divided by time separation - this 'straight line' speed between the two points is a minimum speed estimate given a less-direct path may have been followed). Positional errors introduced by lon/lat 'jitter' and data precision can be of order several km's. Reports must be separated by a suitably long period of time (the 'min_win_period') to minimise the effect of these errors when calculating speed e.g. for reports separated by 24 hours errors of several cm/s would result which are two orders of magnitude less than a fast ocean current which seems reasonable. Conversely, the period of time chosen should not be too long so as to resolve short-lived burst of speed on manoeuvring ships. Larger positional errors may also trigger the check. Because temporal sampling can be erratic the time period over which this assessment is made is specified as a range (bound by 'min_win_period' and 'max_win_period') - assessment uses the longest time separation available within this range. IMPORTANT - for optimal performance, drifter records with observations failing this check should be subsequently manually reviewed. Ships move around in all sorts of complicated ways that can readily confuse such a simple check (e.g. pausing at sea, crisscrossing its own path) and once some erroneous movement is detected it is likely a human operator can then better pick out the actual bad data. False fails caused by positional errors (particularly in fast ocean currents) will also need reinstating. Parameters ---------- lons : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional longitude array in degrees. lats : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional latitude array in degrees. dates : :py:obj:`~marine_qc.SequenceDatetimeType` 1-dimensional date array. speed_limit : float Maximum allowable speed for an in situ drifting buoy (metres per second). min_win_period : float Minimum period of time in days over which position is assessed for speed estimates (see description). max_win_period : float Maximum period of time in days over which position is assessed for speed estimates (this should be greater than min_win_period and allow for some erratic temporal sampling e.g. min_win_period + 0.2 to allow for gaps of up to 0.2 - days in sampling). """ def __init__( self, lons: SequenceNumberType, lats: SequenceNumberType, dates: SequenceDatetimeType, speed_limit: float, min_win_period: float, max_win_period: float, ) -> None: """ Create an object for performing the Speed Check. Parameters ---------- lons : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional longitude array in degrees. lats : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional latitude array in degrees. dates : :py:obj:`~marine_qc.SequenceDatetimeType` 1-dimensional date array. speed_limit : float Maximum allowable speed for an in situ drifting buoy (metres per second). min_win_period : float Minimum period of time in days over which position is assessed for speed estimates (see description). max_win_period : float Maximum period of time in days over which position is assessed for speed estimates (this should be greater than min_win_period and allow for some erratic temporal sampling e.g. min_win_period + 0.2 to allow for gaps of up to 0.2 - days in sampling). """ self.lon = np.asarray(lons, dtype=float) self.lat = np.asarray(lats, dtype=float) self.nreps = len(lons) dates_list: list[datetime] = pd.to_datetime(dates).tolist() self.hrs = np.asarray(convert_date_to_hours(dates_list), dtype=float) self.speed_limit = speed_limit self.min_win_period = min_win_period self.max_win_period = max_win_period # Initialise QC outcomes with untested self.qc_outcomes = np.zeros(self.nreps, dtype=float) + untested
[docs] def get_qc_outcomes(self) -> np.ndarray: """ Retrieve the QC outcomes for all observations. Returns ------- np.ndarray Array of QC flags for each observation (0 = passed, 1 = failed, untested otherwise). """ return self.qc_outcomes
[docs] def valid_parameters(self) -> bool: """ Validate the QC parameters to ensure they are sensible. Checks that: - `speed_limit` is non-negative. - `min_win_period` is non-negative. - `max_win_period` is greater than or equal to `min_win_period`. Warnings are raised for any invalid parameters. Returns ------- bool True if all parameters are valid, False otherwise. """ valid = False if not (self.speed_limit >= 0): warnings.warn(UserWarning(f"Invalid speed_limit: {self.speed_limit}. Must be zero or positive."), stacklevel=2) elif not (self.min_win_period >= 0): warnings.warn(UserWarning(f"Invalid speed_limit: {self.min_win_period}. Must be zero or positive."), stacklevel=2) elif not (self.max_win_period >= self.min_win_period): warnings.warn(UserWarning("max_win_period must be greater than or equal to min_win_period."), stacklevel=2) else: valid = True return valid
[docs] def valid_arrays(self) -> bool: """ Validate the input observation arrays (longitude, latitude, and time differences). Checks for: - NaN values in longitude, latitude, or time arrays. - Monotonicity of the time array (`self.hrs`). Warnings are raised for any issues detected. Returns ------- bool True if all arrays are valid, False otherwise. """ valid = False if any(np.isnan(self.lon)): warnings.warn(UserWarning("Nan(s) found in longitude."), stacklevel=2) elif any(np.isnan(self.lat)): warnings.warn(UserWarning("Nan(s) found in latitudes."), stacklevel=2) elif any(np.isnan(self.hrs)): warnings.warn(UserWarning("Nan(s) found in time differences."), stacklevel=2) elif not is_monotonic(self.hrs): warnings.warn(UserWarning("times are not sorted: {self.hrs}"), stacklevel=2) else: valid = True return valid
[docs] def do_speed_check(self) -> None: """Perform the actual speed check.""" nrep = self.nreps min_win_period_hours = self.min_win_period * 24.0 max_win_period_hours = self.max_win_period * 24.0 if not self.valid_arrays() or not self.valid_parameters(): self.qc_outcomes = np.zeros(self.nreps) + untestable return # Default to pass self.qc_outcomes = np.zeros(nrep) + passed # loop through timeseries to see if drifter is moving too fast # and flag any occurrences index_arr = np.array(range(0, nrep)) # type: np.ndarray i = 0 time_to_end = self.hrs[-1] - self.hrs[i] while time_to_end >= min_win_period_hours: # Find all time points before current time plus the max window period f_win = self.hrs <= self.hrs[i] + max_win_period_hours # Window length is the difference between the latest time point in # the window and the current time win_len = self.hrs[f_win][-1] - self.hrs[i] # If the actual window length is shorter than the minimum window period # then go to the next time step if win_len < min_win_period_hours: i += 1 time_to_end = self.hrs[-1] - self.hrs[i] continue # If the actual window length is long enough then calculate the speed # based on the first and last points in the window displace = sphere_distance(self.lat[i], self.lon[i], self.lat[f_win][-1], self.lon[f_win][-1]) speed = displace / win_len # km per hr speed = speed * 1000.0 / (60.0 * 60) # metres per sec # If the average speed during the window is too high then set all # flags in the window to failed. if speed > self.speed_limit: self.qc_outcomes[i : index_arr[f_win][-1] + 1] = failed i += 1 time_to_end = self.hrs[-1] - self.hrs[i]
[docs] class NewSpeedChecker: """ Class used to carry out :py:func:`.do_new_speed_check`. Check to see whether a drifter has been picked up by a ship (out of water) based on 1/100th degree precision positions. A flag is set for each input report: flag=1 for reports deemed picked up, else flag=0. A drifter is deemed picked up if it is moving faster than might be expected for a fast ocean current (a few m/s). Unreasonably fast movement is detected when speed of travel between report-pairs exceeds the chosen 'speed_limit' (speed is estimated as distance between reports divided by time separation - this 'straight line' speed between the two points is a minimum speed estimate given a less-direct path may have been followed). Positional errors introduced by lon/lat 'jitter' and data precision can be of order several km's. Reports must be separated by a suitably long period of time (the 'min_win_period') to minimise the effect of these errors when calculating speed e.g. for reports separated by 9 hours errors of order 10 cm/s would result which are a few percent of fast ocean current speed. Conversely, the period of time chosen should not be too long so as to resolve short-lived burst of speed on manouvering ships. Larger positional errors may also trigger the check. For each report, speed is assessed over the shortest available period that exceeds 'min_win_period'. Prior to assessment the drifter record is screened for positional errors using the iQuam track check method (from :py:class:`ex.Voyage`). When running the iQuam check the record is treated as a ship (not a drifter) so as to avoid accidentally filtering out observations made aboard a ship (which is what we are trying to detect). This iQuam track check does not overwrite any existing iQuam track check flags. IMPORTANT - for optimal performance, drifter records with observations failing this check should be subsequently manually reviewed. Ships move around in all sorts of complicated ways that can readily confuse such a simple check (e.g. pausing at sea, crisscrossing its own path) and once some erroneous movement is detected it is likely a human operator can then better pick out the actual bad data. False fails caused by positional errors (particularly in fast ocean currents) will also need reinstating. The class has the following class attributes which can be modified using the set_parameters method. * iquam_parameters: Parameter dictionary for Voyage.iquam_track_check() function. * speed_limit: maximum allowable speed for an in situ drifting buoy (metres per second) * min_win_period: minimum period of time in days over which position is assessed for speed estimates (see description) Parameters ---------- lons : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional longitude array in degrees. lats : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional latitude array in degrees. dates : :py:obj:`~marine_qc.SequenceDatetimeType` 1-dimensional date array. speed_limit : float Maximum allowable speed for an in situ drifting buoy (metres per second). min_win_period : float Minimum period of time in days over which position is assessed for speed estimates (see description). ship_speed_limit : float Ship speed limit for the IQUAM track check. delta_d : float The smallest increment in distance that can be resolved. For 0.01 degrees of lat-lon this is 1.11 km. Used in the IQUAM track check. delta_t : float The smallest increment in time that can be resolved. For hourly data expressed as a float this is 0.01 hours. Used in the IQUAM track check. n_neighbours : int Number of neighbours considered in the IQUAM track check. """ def __init__( self, lons: SequenceNumberType, lats: SequenceNumberType, dates: SequenceDatetimeType, speed_limit: float, min_win_period: float, ship_speed_limit: float, delta_d: float, delta_t: float, n_neighbours: int, ) -> None: """ Object used to perform the new speed check. Parameters ---------- lons : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional longitude array in degrees. lats : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional latitude array in degrees. dates : :py:obj:`~marine_qc.SequenceDatetimeType` 1-dimensional date array. speed_limit : float Maximum allowable speed for an in situ drifting buoy (metres per second). min_win_period : float Minimum period of time in days over which position is assessed for speed estimates (see description). ship_speed_limit : float Ship speed limit for the IQUAM track check. delta_d : float The smallest increment in distance that can be resolved. For 0.01 degrees of lat-lon this is 1.11 km. Used in the IQUAM track check. delta_t : float The smallest increment in time that can be resolved. For hourly data expressed as a float this is 0.01 hours. Used in the IQUAM track check. n_neighbours : int Number of neighbours considered in the IQUAM track check. """ self.lon = np.asarray(lons, dtype=float) self.lat = np.asarray(lats, dtype=float) self.nreps = len(lons) self.dates = np.asarray(dates, dtype=datetime) dates_list: list[datetime] = pd.to_datetime(dates).tolist() self.hrs = np.asarray(convert_date_to_hours(dates_list), dtype=float) self.speed_limit = speed_limit self.min_win_period = min_win_period self.ship_speed_limit = ship_speed_limit self.delta_d = delta_d self.delta_t = delta_t self.n_neighbours = n_neighbours self.iquam_track_ship: np.ndarray = np.array([], dtype=int) # Initialise QC outcomes with untested self.qc_outcomes = np.zeros(self.nreps) + untested
[docs] def get_qc_outcomes(self) -> np.ndarray: """ Retrieve the QC outcomes for all observations. Returns ------- np.ndarray Array of QC flags for each observation (0 = valid, 1 = flagged, untested otherwise). """ return self.qc_outcomes
[docs] def valid_parameters(self) -> bool: """ Validate the QC parameters to ensure they are sensible. Checks that: - `speed_limit` is non-negative. - `min_win_period` is non-negative. Warnings are raised for any invalid parameters. Returns ------- bool True if all parameters are valid, False otherwise. """ valid = False if not (self.speed_limit >= 0): warnings.warn(UserWarning(f"Invalid speed_limit: {self.speed_limit}. Must be zero or positive."), stacklevel=2) elif not (self.min_win_period >= 0): warnings.warn(UserWarning(f"Invalid min_win_period: {self.min_win_period}. Must be zero or positive."), stacklevel=2) else: valid = True return valid
[docs] def valid_arrays(self) -> bool: """ Validate the input arrays (longitude, latitude, and time differences). Checks for: - NaN values in longitude, latitude, or time arrays. - Monotonicity of the time array (`self.hrs`). Warnings are raised for any issues detected. Returns ------- bool True if all arrays are valid, False otherwise. """ valid = False if any(np.isnan(self.lon)): warnings.warn(UserWarning("Nan(s) found in longitude."), stacklevel=2) elif any(np.isnan(self.lat)): warnings.warn(UserWarning("Nan(s) found in latitudes."), stacklevel=2) elif any(np.isnan(self.hrs)): warnings.warn(UserWarning("Nan(s) found in time differences."), stacklevel=2) elif not is_monotonic(self.hrs): warnings.warn(UserWarning("times are not sorted: {self.hrs}"), stacklevel=2) else: valid = True return valid
[docs] def perform_iquam_track_check(self) -> None: """ Perform iQuam track check as if reports are from a ship. A deep copy of reps is made so metadata can be safely modified ahead of iQuam check an array of qc flags (iquam_track_ship) is the result. """ self.iquam_track_ship = do_iquam_track_check( self.lat, self.lon, self.dates, self.ship_speed_limit, self.delta_d, self.delta_t, self.n_neighbours, )
[docs] def do_new_speed_check(self) -> None: """Perform the actual new speed check.""" nrep = self.nreps min_win_period_hours = self.min_win_period * 24.0 if not self.valid_arrays() or not self.valid_parameters(): self.qc_outcomes = np.zeros(self.nreps) + untestable return self.perform_iquam_track_check() # Initialise self.qc_outcomes = np.zeros(nrep) + passed # loop through timeseries to see if drifter is moving too fast and flag any occurrences index_arr = np.array(range(0, nrep)) # type: np.ndarray i = 0 time_to_end = self.hrs[-1] - self.hrs[i] while time_to_end >= min_win_period_hours: if self.iquam_track_ship[i] == failed: i += 1 time_to_end = self.hrs[-1] - self.hrs[i] continue f_win = (self.hrs >= self.hrs[i] + min_win_period_hours) & (self.iquam_track_ship == passed) if not any(f_win): i += 1 time_to_end = self.hrs[-1] - self.hrs[i] continue win_len = self.hrs[f_win][0] - self.hrs[i] displace = sphere_distance(self.lat[i], self.lon[i], self.lat[f_win][0], self.lon[f_win][0]) speed = displace / win_len # km per hr speed = speed * 1000.0 / (60.0 * 60) # metres per sec if speed > self.speed_limit: self.qc_outcomes[i : index_arr[f_win][0] + 1] = failed i += 1 time_to_end = self.hrs[-1] - self.hrs[i]
[docs] class AgroundChecker: """ Class used to carry out :py:func:`.do_aground_check`. Check to see whether a drifter has run aground based on 1/100th degree precision positions. A flag is set for each input report: flag=1 for reports deemed aground, else flag=0. Positional errors introduced by lon/lat 'jitter' and data precision can be of order several km's. Longitude and latitude timeseries are smoothed prior to assessment to reduce position 'jitter'. Some post-smoothing position 'jitter' may remain and its expected magnitude is set within the function by the 'tolerance' parameter. A drifter is deemed aground when, after a period of time, the distance between reports is less than the 'tolerance'. The minimum period of time over which this assessment is made is set by 'min_win_period'. This period must be long enough such that slow moving drifters are not falsely flagged as aground given errors in position (e.g. a buoy drifting at around 1 cm/s will travel around 1 km/day; given 'tolerance' and precision errors of a few km's the 'min_win_period' needs to be several days to ensure distance-travelled exceeds the error so that motion is reliably detected and the buoy is not falsely flagged as aground). However, min_win_period should not be longer than necessary as buoys that run aground for less than min_win_period will not be detected. Because temporal sampling can be erratic the time period over which an assessment is made is specified as a range (bound by 'min_win_period' and 'max_win_period') - assessment uses the longest time separation available within this range. If a drifter is deemed aground and subsequently starts moving (e.g. if a drifter has moved very slowly for a prolonged period) incorrectly flagged reports will be reinstated. * smooth_win: length of window (odd number) in datapoints used for smoothing lon/lat * min_win_period: minimum period of time in days over which position is assessed for no movement (see description) * max_win_period: maximum period of time in days over which position is assessed for no movement (this should be greater than min_win_period and allow for erratic temporal sampling e.g. min_win_period+2 to allow for gaps of up to 2-days in sampling). Parameters ---------- lons : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional longitude array in degrees. lats : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional latitude array in degrees. dates : :py:obj:`~marine_qc.SequenceDatetimeType` 1-dimensional date array. smooth_win : int Length of window (odd number) in datapoints used for smoothing lon/lat. min_win_period : int Minimum period of time in days over which position is assessed for no movement (see description). max_win_period : int or None Maximum period of time in days over which position is assessed for no movement (this should be greater than min_win_period and allow for erratic temporal sampling e.g. min_win_period+2 to allow for gaps of up to 2-days in sampling). """ # displacement resulting from 1/100th deg 'position-jitter' at the equator (km) tolerance = sphere_distance(0, 0, 0.01, 0.01) def __init__( self, lons: SequenceNumberType, lats: SequenceNumberType, dates: SequenceDatetimeType, smooth_win: int, min_win_period: int, max_win_period: int | None, ) -> None: """ Create on object for performing the Aground and New Aground checks. Parameters ---------- lons : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional longitude array in degrees. lats : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional latitude array in degrees. dates : :py:obj:`~marine_qc.SequenceDatetimeType` 1-dimensional date array. smooth_win : int Length of window (odd number) in datapoints used for smoothing lon/lat. min_win_period : int Minimum period of time in days over which position is assessed for no movement (see description). max_win_period : int or None Maximum period of time in days over which position is assessed for no movement (this should be greater than min_win_period and allow for erratic temporal sampling e.g. min_win_period+2 to allow for gaps of up to 2-days in sampling). """ self.lon = np.asarray(lons, dtype=float) self.lat = np.asarray(lats, dtype=float) self.nreps = len(lons) dates_list: list[datetime] = pd.to_datetime(dates).tolist() self.hrs = np.asarray(convert_date_to_hours(dates_list), dtype=float) self.smooth_win = smooth_win self.min_win_period = min_win_period self.max_win_period = max_win_period self.lon_smooth: np.ndarray = np.array([], dtype=float) self.lat_smooth: np.ndarray = np.array([], dtype=float) self.hrs_smooth: np.ndarray = np.array([], dtype=float) # Initialise QC outcomes with untested self.qc_outcomes = np.zeros(self.nreps) + untested
[docs] def get_qc_outcomes(self) -> np.ndarray: """ Return the QC outcomes. Returns ------- array-like of int, shape (n,) 1-dimensional array containing QC flags. """ return self.qc_outcomes
[docs] def valid_parameters(self) -> bool: """ Check the parameters are valid. Raises a warning and returns False if not valid. Returns ------- bool True if parameter is valid, otherwise False. """ valid = False if self.smooth_win < 1: warnings.warn(UserWarning("Invalid smooth_win: {self.smooth_win}. Must be positive."), stacklevel=2) elif self.smooth_win % 2 == 0: warnings.warn(UserWarning("Invalid smooth_win: {self.smooth_win}. Must be an odd number."), stacklevel=2) elif self.min_win_period < 1: warnings.warn(UserWarning(f"Invalid min_win_period: {self.min_win_period}. Must be positive."), stacklevel=2) elif self.max_win_period is None: valid = True elif self.max_win_period < 1: warnings.warn(UserWarning("Invalid max_win_period: {self.max_win_period}. Must be positive."), stacklevel=2) elif self.max_win_period < self.min_win_period: warnings.warn("max_win_period must be greater than or equal to min_win_period.", stacklevel=2) else: valid = True return valid
[docs] def valid_arrays(self) -> bool: """ Check the input arrays are valid. Raises a warning and returns False if not valid. Returns ------- bool True if array is valid, otherwise False. """ valid = False if any(np.isnan(self.lon)): warnings.warn(UserWarning("Nan(s) found in longitude."), stacklevel=2) elif any(np.isnan(self.lat)): warnings.warn(UserWarning("Nan(s) found in latitudes."), stacklevel=2) elif any(np.isnan(self.hrs)): warnings.warn(UserWarning("Nan(s) found in time differences."), stacklevel=2) elif not is_monotonic(self.hrs): warnings.warn(UserWarning("times are not sorted: {self.hrs}"), stacklevel=2) else: valid = True return valid
[docs] def smooth_arrays(self) -> None: """Perform the preprocessing of the lat lon and time arrays.""" half_win = int((self.smooth_win - 1) / 2) # create smoothed lon/lat timeseries # length of series after smoothing nrep_smooth = self.nreps - self.smooth_win + 1 lon_smooth = np.empty(nrep_smooth) # type: np.ndarray lon_smooth[:] = np.nan lat_smooth = np.empty(nrep_smooth) # type: np.ndarray lat_smooth[:] = np.nan hrs_smooth = np.empty(nrep_smooth) # type: np.ndarray hrs_smooth[:] = np.nan for i in range(0, nrep_smooth): lon_smooth[i] = np.median(self.lon[i : i + self.smooth_win]) lat_smooth[i] = np.median(self.lat[i : i + self.smooth_win]) hrs_smooth[i] = self.hrs[i + half_win] self.lon_smooth = lon_smooth self.lat_smooth = lat_smooth self.hrs_smooth = hrs_smooth
[docs] def do_aground_check(self) -> None: """Perform the actual aground check.""" half_win = (self.smooth_win - 1) / 2 min_win_period_hours = self.min_win_period * 24.0 if self.max_win_period is not None: max_win_period_hours = self.max_win_period * 24.0 else: max_win_period_hours = None if not self.valid_parameters() or not self.valid_arrays(): self.qc_outcomes[:] = untestable return # Default to pass if self.nreps <= self.smooth_win: self.qc_outcomes[:] = passed return self.smooth_arrays() # loop through smoothed timeseries to see if drifter has run aground i = 0 is_aground = False # keeps track of whether drifter is deemed aground i_aground = np.nan # keeps track of index when drifter first ran aground time_to_end = self.hrs_smooth[-1] - self.hrs_smooth[i] while time_to_end >= min_win_period_hours: if self.max_win_period is not None: f_win = self.hrs_smooth <= self.hrs_smooth[i] + max_win_period_hours win_len = self.hrs_smooth[f_win][-1] - self.hrs_smooth[i] if win_len < min_win_period_hours: i += 1 time_to_end = self.hrs_smooth[-1] - self.hrs_smooth[i] continue displace = sphere_distance( self.lat_smooth[i], self.lon_smooth[i], self.lat_smooth[f_win][-1], self.lon_smooth[f_win][-1], ) else: displace = sphere_distance( self.lat_smooth[i], self.lon_smooth[i], self.lat_smooth[-1], self.lon_smooth[-1], ) if displace <= AgroundChecker.tolerance: if not is_aground: is_aground = True i_aground = i else: is_aground = False i_aground = np.nan i += 1 time_to_end = self.hrs_smooth[-1] - self.hrs_smooth[i] # set flags if is_aground and i_aground > 0: i_aground += half_win # this gets the first index the drifter is deemed aground for the original (un-smoothed) timeseries # n.b. if i_aground=0 then the entire drifter record is deemed aground and flagged as such self.qc_outcomes[:] = passed if is_aground: self.qc_outcomes[int(i_aground) :] = failed
[docs] class SSTTailChecker: """ Class used to carry out :py:func:`.do_sst_start_tail_check` and :py:func:`.do_sst_end_tail_check`. Check to see whether there is erroneous sea surface temperature data at the beginning or end of a drifter record (referred to as 'tails'). Flags are set for each input report: flag=1 for reports with erroneous data, else flag=0, 'drf_tail1' is used for bad data at the beginning of a record, 'drf_tail2' is used for bad data at the end of a record. The tail check makes an assessment of the quality of data at the start and end of a drifting buoy record by comparing to a background reference field. Data found to be unacceptably biased or noisy relative to the background are flagged by the check. When making the comparison an allowance is made for background error variance and also normal drifter error (both bias and random measurement error). The correlation of the background error is treated as unknown and takes on a value which maximises background error dependent on the assessment being made. A background error variance limit is also specified, beyond which the background is deemed unreliable. Observations made during the day, in icy regions or where the background value is missing are excluded from the comparison. The check proceeds in two steps; a 'long tail-check' followed by a 'short tail-check'. The idea is that the short tail-check has finer resolution but lower sensitivity than the long tail-check and may pick off noisy data not picked up by the long tail check. Only observations that pass the long tail-check are passed to the short tail-check. Both of these tail checks proceed by moving a window over the data and assessing the data in each window. Once good data are found the check stops and any bad data preceding this are flagged. If unreliable background data are encountered the check stops. The checks are run forwards and backwards over the record so as to assess data at the start and end of the record. If the whole record fails no observations are flagged as there are then no 'tails' in the data (this is left for other checks). The long tail check looks for groups of observations that are too biased or noisy as a whole. The short tail check looks for individual observations exceeding a noise limit within the window. Parameters ---------- lat : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional latitude array in degrees. lon : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional longitude array in degrees. sst : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional array of sea surface temperatures in K. ostia : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional array of background field sea surface temperatures in K. ice : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional array of ice concentrations in the range 0.0 to 1.0. bgvar : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional array of background sea surface temperature fields variances in K^2. dates : :py:obj:`~marine_qc.SequenceDatetimeType` 1-dimensional date array. long_win_len : int Length of window (in data-points) over which to make long tail-check (must be an odd number). long_err_std_n : float Number of standard deviations of combined background and drifter bias error, beyond which data fail bias check. short_win_len : int Length of window (in data-points) over which to make the short tail-check. short_err_std_n : float Number of standard deviations of combined background and drifter error, beyond which data are deemed suspicious. short_win_n_bad : int Minimum number of suspicious data points required for failure of short check window. drif_inter : float Spread of biases expected in drifter data (standard deviation, degC or K). drif_intra : float Maximum random measurement uncertainty reasonably expected in drifter data (standard deviation, degC or K). background_err_lim : float Background error variance beyond which the SST background is deemed unreliable (degC squared). """ def __init__( self, lat: SequenceNumberType, lon: SequenceNumberType, sst: SequenceNumberType, ostia: SequenceNumberType, ice: SequenceNumberType, bgvar: SequenceNumberType, dates: SequenceDatetimeType, long_win_len: int, long_err_std_n: float, short_win_len: int, short_err_std_n: float, short_win_n_bad: int, drif_inter: float, drif_intra: float, background_err_lim: float, ) -> None: """ Create SSTTailChecker object to perform the SST Tail QC Check. Parameters ---------- lat : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional latitude array in degrees. lon : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional longitude array in degrees. sst : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional array of sea surface temperatures in K. ostia : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional array of background field sea surface temperatures in K. ice : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional array of ice concentrations in the range 0.0 to 1.0. bgvar : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional array of background sea surface temperature fields variances in K^2. dates : :py:obj:`~marine_qc.SequenceDatetimeType` 1-dimensional date array. long_win_len : int Length of window (in data-points) over which to make long tail-check (must be an odd number). long_err_std_n : float Number of standard deviations of combined background and drifter bias error, beyond which data fail bias check. short_win_len : int Length of window (in data-points) over which to make the short tail-check. short_err_std_n : float Number of standard deviations of combined background and drifter error, beyond which data are deemed suspicious. short_win_n_bad : int Minimum number of suspicious data points required for failure of short check window. drif_inter : float Spread of biases expected in drifter data (standard deviation, degC or K). drif_intra : float Maximum random measurement uncertainty reasonably expected in drifter data (standard deviation, degC or K). background_err_lim : float Background error variance beyond which the SST background is deemed unreliable (degC squared). """ self.nreps = len(sst) self.lat = np.asarray(lat, dtype=float) self.lon = np.asarray(lon, dtype=float) self.sst = np.asarray(sst, dtype=float) self.ostia = np.asarray(ostia, dtype=float) self.ice = np.asarray(ice, dtype=float) self.bgvar = np.asarray(bgvar, dtype=float) self.dates = np.asarray(dates, dtype=datetime) dates_list: list[datetime] = pd.to_datetime(dates).tolist() self.hrs = np.asarray(convert_date_to_hours(dates_list), dtype=float) self.reps_ind: np.ndarray = np.array([], dtype=float) self.sst_anom: np.ndarray = np.array([], dtype=float) self.bgerr: np.ndarray = np.array([], dtype=float) self.start_tail_ind: int self.end_tail_ind: int self.qc_outcomes = np.zeros(self.nreps) + untested self.long_win_len = long_win_len self.long_err_std_n = long_err_std_n self.short_win_len = short_win_len self.short_err_std_n = short_err_std_n self.short_win_n_bad = short_win_n_bad self.drif_inter = drif_inter self.drif_intra = drif_intra self.background_err_lim = background_err_lim
[docs] def get_qc_outcomes(self) -> np.ndarray: """ Return the QC outcomes. Returns ------- array-like of int, shape (n,) 1-dimensional array containing QC flags. """ return self.qc_outcomes
[docs] def valid_parameters(self) -> bool: """ Check the parameters are valid. Raises a warning and returns False if not valid. Returns ------- bool True if parameter is valid, otherwise False. """ valid = False if self.long_win_len < 1: warnings.warn(UserWarning("Invalid long_win_len: {self.long_win_len}. Must be positive."), stacklevel=2) elif not (self.long_win_len % 2 != 0): warnings.warn(UserWarning("Invalid long_win_len: {self.long_win_len}. Must be an odd number."), stacklevel=2) elif self.long_err_std_n < 0: warnings.warn(UserWarning("Invalid long_err_std_n: {self.long_err_std_n}. Must be zero or positive."), stacklevel=2) elif self.short_win_len < 1: warnings.warn(UserWarning("Invalid short_win_len: {self.short_win_len}. Must be positive."), stacklevel=2) elif self.short_err_std_n < 0: warnings.warn(UserWarning("Invalid short_err_std_n: {self.short_err_std_n}. Must be zero or positive."), stacklevel=2) elif self.short_win_n_bad < 1: warnings.warn(UserWarning("Invalid short_win_n_bad: {self.short_win_n_bad}. Must be positive."), stacklevel=2) elif self.drif_inter < 0: warnings.warn(UserWarning("Invalid drif_inter: {self.drif_inter}. Must be zero or positive."), stacklevel=2) elif self.drif_intra < 0: warnings.warn(UserWarning("Invalid drif_intra: {self.drif_intra}. Must be zero or positive."), stacklevel=2) elif self.background_err_lim < 0: warnings.warn(UserWarning("Invalid background_err_lim: {self.background_err_lim}. Must be zero or positive."), stacklevel=2) else: valid = True return valid
[docs] def do_sst_tail_check(self, start_tail: bool) -> None: """ Perform the actual SST tail check. Parameters ---------- start_tail : bool If True flag the start of the record as failed, otherwise flag the end of the record as failed. """ if not self.valid_parameters(): self.qc_outcomes[:] = untestable return if not is_monotonic(self.hrs): warnings.warn(UserWarning("Times do not increase monotonically"), stacklevel=2) self.qc_outcomes[:] = untestable return invalid_series = self._preprocess_reps() if invalid_series: self.qc_outcomes[:] = untestable return if len(self.sst_anom) == 0: self.qc_outcomes[:] = passed return self.qc_outcomes[:] = passed nrep = len(self.sst_anom) self.start_tail_ind = -1 # keeps track of index where start tail stops self.end_tail_ind = nrep # keeps track of index where end tail starts # do long tail check - records shorter than long-window length aren't evaluated if nrep >= self.long_win_len: # run forwards then backwards over timeseries self._do_long_tail_check(forward=True) self._do_long_tail_check(forward=False) # do short tail check on records that pass long tail check - whole record already failed long tail check if self.start_tail_ind < self.end_tail_ind: first_pass_ind = self.start_tail_ind + 1 # first index passing long tail check last_pass_ind = self.end_tail_ind - 1 # last index passing long tail check self._do_short_tail_check(first_pass_ind, last_pass_ind, forward=True) self._do_short_tail_check(first_pass_ind, last_pass_ind, forward=False) # now flag reps - whole record failed tail checks, don't flag if self.start_tail_ind >= self.end_tail_ind: self.start_tail_ind = -1 self.end_tail_ind = nrep if not self.start_tail_ind == -1 and start_tail: self.qc_outcomes[0 : self.reps_ind[self.start_tail_ind] + 1] = failed if not self.end_tail_ind == nrep and not start_tail: self.qc_outcomes[self.reps_ind[self.end_tail_ind] :] = failed
@staticmethod def _parse_rep( lat: float, lon: float, ostia: float, ice: float, bgvar: float, dates: datetime, ) -> tuple[float, float, float, bool, bool]: """ Process a report. Parameters ---------- lat : float Latitude. lon : float Longitude. ostia : float OSTIA value matched to this observation. ice : float Ice concentration value matched to this observation. bgvar : float Background variance value matched to this observation. dates : np.datetime Date and time of the observation. Returns ------- (float, float, float, bool) Background value, ice concentration, background variance, and a boolean variable indicating whether the report is "good". """ invalid_ob = False if not isvalid(ice): ice = 0.0 if ice < 0.0 or ice > 1.0: warnings.warn(UserWarning("Invalid ice value"), stacklevel=2) invalid_ob = True try: daytime = track_day_test( dates.year, dates.month, dates.day, dates.hour + dates.minute / 60, lat, lon, -2.5, ) except ValueError as error: warnings.warn(f"Daytime check failed with {error}", stacklevel=2) daytime = True invalid_ob = True land_match = not isvalid(ostia) ice_match = ice > 0.15 good_match = not (daytime or land_match or ice_match) return ostia, ice, bgvar, good_match, invalid_ob def _preprocess_reps(self) -> bool: """ Process the reps and calculate the values used in the QC check. Returns ------- bool True if any invalid observations or background values were encountered during preprocessing, otherwise False. """ invalid_series = False # test and filter out obs with unsuitable background matches reps_ind: list[int] = [] sst_anom: list[float] = [] bgvar: list[float] = [] for ind in range(self.nreps): bg_val, _ice_val, bgvar_val, good_match, invalid_ob = self._parse_rep( self.lat[ind], self.lon[ind], self.ostia[ind], self.ice[ind], self.bgvar[ind], self.dates[ind], ) if invalid_ob: invalid_series = True if good_match: if bg_val < -5.0 or bg_val > 45.0: warnings.warn(UserWarning("Background value is invalid"), stacklevel=2) invalid_series = True if bgvar_val < 0 or bgvar_val > 10.0: warnings.warn(UserWarning("Background variance is invalid"), stacklevel=2) invalid_series = True reps_ind.append(ind) sst_anom.append(self.sst[ind] - bg_val) bgvar.append(bgvar_val) # prepare numpy arrays and variables needed for tail checks # indices of obs suitable for assessment self.reps_ind = np.array(reps_ind, dtype=int) # ob-background differences self.sst_anom = np.array(sst_anom, dtype=float) # standard deviation of background error bgvar_arr = np.array(bgvar, dtype=float) bgvar_arr[bgvar_arr < 0.0] = float("nan") self.bgerr = np.sqrt(bgvar_arr) return invalid_series def _do_long_tail_check(self, forward: bool = True) -> None: """ Perform the long tail check. Parameters ---------- forward : bool Flag to set for a forward (True) or backward (False) pass of the long tail check. """ nrep = len(self.sst_anom) mid_win_ind = int((self.long_win_len - 1) / 2) if forward: sst_anom_temp = self.sst_anom bgerr_temp = self.bgerr else: sst_anom_temp = np.flipud(self.sst_anom) bgerr_temp = np.flipud(self.bgerr) # this is the long tail check for ix in range(0, nrep - self.long_win_len + 1): sst_anom_winvals = sst_anom_temp[ix : ix + self.long_win_len] bgerr_winvals = bgerr_temp[ix : ix + self.long_win_len] if np.any(bgerr_winvals > np.sqrt(self.background_err_lim)): break sst_anom_avg = trim_mean(sst_anom_winvals, 100) sst_anom_stdev = trim_std(sst_anom_winvals, 100) bgerr_avg = np.mean(bgerr_winvals) bgerr_rms = np.sqrt(np.mean(bgerr_winvals**2)) if (abs(sst_anom_avg) > self.long_err_std_n * np.sqrt(self.drif_inter**2 + bgerr_avg**2)) or ( sst_anom_stdev > np.sqrt(self.drif_intra**2 + bgerr_rms**2) ): if forward: self.start_tail_ind = ix + mid_win_ind else: self.end_tail_ind = (nrep - 1) - ix - mid_win_ind else: break def _do_short_tail_check(self, first_pass_ind: int, last_pass_ind: int, forward: bool = True) -> None: """ Perform the short tail check. Parameters ---------- first_pass_ind : int Index. last_pass_ind : int Index. forward : bool Flag to set for a forward (True) or backward (False) pass of the short tail check. """ npass = last_pass_ind - first_pass_ind + 1 if npass <= 0: raise ValueError(f"Invalid npass: {npass}. Must be positive.") # records shorter than short-window length aren't evaluated if npass < self.short_win_len: return if forward: sst_anom_temp = self.sst_anom[first_pass_ind : last_pass_ind + 1] bgerr_temp = self.bgerr[first_pass_ind : last_pass_ind + 1] else: sst_anom_temp = np.flipud(self.sst_anom[first_pass_ind : last_pass_ind + 1]) bgerr_temp = np.flipud(self.bgerr[first_pass_ind : last_pass_ind + 1]) # this is the short tail check for ix in range(0, npass - self.short_win_len + 1): sst_anom_winvals = sst_anom_temp[ix : ix + self.short_win_len] bgerr_winvals = bgerr_temp[ix : ix + self.short_win_len] if np.any(bgerr_winvals > np.sqrt(self.background_err_lim)): break limit = self.short_err_std_n * np.sqrt(bgerr_winvals**2 + self.drif_inter**2 + self.drif_intra**2) exceed_limit = np.logical_or(sst_anom_winvals > limit, sst_anom_winvals < -limit) if np.sum(exceed_limit) >= self.short_win_n_bad: if forward: # if all windows have failed, flag everything if ix == npass - self.short_win_len: self.start_tail_ind += self.short_win_len else: self.start_tail_ind += 1 else: # if all windows have failed, flag everything if ix == npass - self.short_win_len: self.end_tail_ind -= self.short_win_len else: self.end_tail_ind -= 1 else: break
[docs] class SSTBiasedNoisyChecker: """ Class used to perform the :py:func:`.do_sst_biased_check`, :py:func:`.do_sst_noisy_check`, and :py:func:`.do_sst_biased_noisy_short_check`. Check to see whether a drifter sea surface temperature record is unacceptably biased or noisy as a whole. The check makes an assessment of the quality of data in a drifting buoy record by comparing to a background reference field. If the record is found to be unacceptably biased or noisy relative to the background all observations are flagged by the check. For longer records the flags 'drf_bias' and 'drf_noise' are set for each input report: flag=1 for records with erroneous data, else flag=0. For shorter records 'drf_short' is set for each input report: flag=1 for reports with erroneous data, else flag=0. When making the comparison an allowance is made for background error variance and also normal drifter error (both bias and random measurement error). A background error variance limit is also specified, beyond which the background is deemed unreliable and is excluded from comparison. Observations made during the day, in icy regions or where the background value is missing are also excluded from the comparison. The check has two separate streams; a 'long-record check' and a 'short-record check'. Records with at least n_eval observations are passed to the long-record check, else they are passed to the short-record check. The long-record check looks for records that are too biased or noisy as a whole. The short record check looks for individual observations exceeding a noise limit within a record. The purpose of n_eval is to ensure records with too few observations for their bias and noise to be reliably estimated are handled separately by the short-record check. The correlation of the background error is treated as unknown and handled differently for each assessment. For the long-record noise-check and the short-record check the background error is treated as uncorrelated, which maximises the possible impact of background error on these assessments. For the long-record bias-check a limit (bias_lim) is specified beyond which the record is considered biased. The default value for this limit was chosen based on histograms of drifter-background bias. An alternative approach would be to treat the background error as entirely correlated across a long-record, which maximises its possible impact on the bias assessment. In this case the histogram approach was used as the limit could be tuned to give better results. Parameters ---------- lat : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional latitude array in degrees. lon : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional longitude array in degrees. dates : :py:obj:`~marine_qc.SequenceDatetimeType` 1-dimensional date array. sst : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional sea surface temperature array in K. ostia : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional background field sea surface temperature array in K. bgvar : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional background variance array in K^2. ice : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional ice concentration array in range 0,1. n_eval : int The minimum number of drifter observations required to be assessed by the long-record check. bias_lim : float Maximum allowable drifter-background bias, beyond which a record is considered biased (degC or K). drif_intra : float Maximum random measurement uncertainty reasonably expected in drifter data (standard deviation, degC or K). drif_inter : float Spread of biases expected in drifter data (standard deviation, degC or K). err_std_n : float Number of standard deviations of combined background and drifter error, beyond which short-record data are deemed suspicious. n_bad : int Minimum number of suspicious data points required for failure of short-record check. background_err_lim : float Background error variance beyond which the SST background is deemed unreliable (degC squared or K squared). """ def __init__( self, lat: SequenceNumberType, lon: SequenceNumberType, dates: SequenceDatetimeType, sst: SequenceNumberType, ostia: SequenceNumberType, bgvar: SequenceNumberType, ice: SequenceNumberType, n_eval: int, bias_lim: float, drif_intra: float, drif_inter: float, err_std_n: float, n_bad: int, background_err_lim: float, ) -> None: """ Create an object for performing the SST Biased, Noisy and Short Checks. Parameters ---------- lat : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional latitude array in degrees. lon : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional longitude array in degrees. dates : :py:obj:`~marine_qc.SequenceDatetimeType` 1-dimensional date array. sst : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional sea surface temperature array in K. ostia : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional background field sea surface temperature array in K. bgvar : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional background variance array in K^2. ice : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional ice concentration array in range 0,1. n_eval : int The minimum number of drifter observations required to be assessed by the long-record check. bias_lim : float Maximum allowable drifter-background bias, beyond which a record is considered biased (degC or K). drif_intra : float Maximum random measurement uncertainty reasonably expected in drifter data (standard deviation, degC or K). drif_inter : float Spread of biases expected in drifter data (standard deviation, degC or K). err_std_n : float Number of standard deviations of combined background and drifter error, beyond which short-record data are deemed suspicious. n_bad : int Minimum number of suspicious data points required for failure of short-record check. background_err_lim : float Background error variance beyond which the SST background is deemed unreliable (degC squared or K squared). """ self.lat = np.asarray(lat, dtype=float) self.lon = np.asarray(lon, dtype=float) self.dates = np.asarray(dates, dtype=datetime) self.sst = np.asarray(sst, dtype=float) self.ostia = np.asarray(ostia, dtype=float) self.bgvar = np.asarray(bgvar, dtype=float) self.ice = np.asarray(ice, dtype=float) self.nreps = len(lat) dates_list: list[datetime] = pd.to_datetime(dates).tolist() self.hrs = np.asarray(convert_date_to_hours(dates_list), dtype=float) self.n_eval = n_eval self.bias_lim = bias_lim self.drif_intra = drif_intra self.drif_inter = drif_inter self.err_std_n = err_std_n self.n_bad = n_bad self.background_err_lim = background_err_lim self.sst_anom: np.ndarray = np.array([], dtype=float) self.bgerr: np.ndarray = np.array([], dtype=float) self.bgvar_is_masked: bool self.qc_outcomes_bias = np.zeros(self.nreps) + untested self.qc_outcomes_noise = np.zeros(self.nreps) + untested self.qc_outcomes_short = np.zeros(self.nreps) + untested
[docs] def valid_parameters(self) -> bool: """ Check the parameters are valid. Raises a warning and returns False if not valid. Returns ------- bool True if parameter is valid, otherwise False. """ valid = False if self.n_eval < 1: warnings.warn(UserWarning("Invalid n_eval: {self.n_eval}. Must be positive."), stacklevel=2) elif self.bias_lim < 0: warnings.warn(UserWarning("Invalid bias_lim: {self.bias_lim}. Must be zero or positive."), stacklevel=2) elif self.drif_inter < 0: warnings.warn(UserWarning("Invalid drif_inter: {self.drif_inter}. Must be zero or positive."), stacklevel=2) elif self.drif_intra < 0: warnings.warn(UserWarning("Invalid drif_intra: {self.drif_intra}. Must be zero or positive."), stacklevel=2) elif self.err_std_n < 0: warnings.warn(UserWarning("Invalid err_std_n: {self.err_std_n}. Must be zero or positive."), stacklevel=2) elif self.n_bad < 1: warnings.warn(UserWarning("Invalid n_bad: {self.n_bad}. Must be positive."), stacklevel=2) elif self.background_err_lim < 0: warnings.warn(UserWarning("Invalid background_err_lim: {self.background_err_lim}. Must be zero or positive."), stacklevel=2) else: valid = True return valid
[docs] def get_qc_outcomes_bias(self) -> np.ndarray: """ Return the QC outcomes for the bias check. Returns ------- array-like of int, shape (n,) 1-dimensional array containing QC flags. """ return self.qc_outcomes_bias
[docs] def get_qc_outcomes_noise(self) -> np.ndarray: """ Return the QC outcomes for the noisy check. Returns ------- array-like of int, shape (n,) 1-dimensional array containing QC flags. """ return self.qc_outcomes_noise
[docs] def get_qc_outcomes_short(self) -> np.ndarray: """ Return the QC outcomes for the short check. Returns ------- array-like of int, shape (n,) 1-dimensional array containing QC flags. """ return self.qc_outcomes_short
[docs] def set_all_qc_outcomes_to(self, input_state: int) -> None: """ Set all the QC outcomes to the specified input_state. Parameters ---------- input_state : int QC flag to map to the QC outcomes. """ if input_state not in [ passed, failed, untested, untestable, ]: raise ValueError(f"Invalid input_state: {input_state}. Must be one of [{passed}, {failed}, {untested}, {untestable}].") self.qc_outcomes_short[:] = input_state self.qc_outcomes_noise[:] = input_state self.qc_outcomes_bias[:] = input_state
[docs] def do_sst_biased_noisy_check(self) -> None: """Perform the bias/noise check QC.""" if not self.valid_parameters(): self.set_all_qc_outcomes_to(untestable) return invalid_series = self._preprocess_reps() if invalid_series: self.set_all_qc_outcomes_to(untestable) return long_record = not (len(self.sst_anom) < self.n_eval) self.set_all_qc_outcomes_to(passed) if long_record: self._long_record_qc() else: if not self.bgvar_is_masked: self._short_record_qc()
@staticmethod def _parse_rep( lat: float, lon: float, ostia: float, ice: float, bgvar: float, dates: datetime, background_err_lim: float, ) -> tuple[float, float, float, bool, bool, bool]: """ Extract QC-relevant variables from a marine report. Parameters ---------- lat : float Latitude of the observation to be parsed. lon : float Longitude of the observation to be parsed. ostia : float Background SST field value. ice : float Ice concentration field value. bgvar : float Background variance field value. dates : datetime Date and time of the observation to be parsed. background_err_lim : float Background error variance beyond which the SST background is deemed unreliable (degC squared or K squared). Returns ------- float, float, float, bool, bool, bool Returns the background SST value, ice value, background SST variance, a flag that indicates a good match, and a flag that indicates if the background variance is valid, and a flag that indicates if the observation is valid overall. """ invalid_ob = False if not isvalid(ice): ice = 0.0 if ice < 0.0 or ice > 1.0: warnings.warn(UserWarning("Invalid ice value"), stacklevel=2) invalid_ob = True try: daytime = track_day_test( dates.year, dates.month, dates.day, dates.hour + dates.minute / 60, lat, lon, -2.5, ) except ValueError as error: warnings.warn(f"Daytime check failed with {error}", stacklevel=2) daytime = True invalid_ob = True land_match = not isvalid(ostia) ice_match = ice > 0.15 bgvar_mask = bgvar is not None and bgvar > background_err_lim good_match = not (daytime or land_match or ice_match or bgvar_mask) return ostia, ice, bgvar, good_match, bgvar_mask, invalid_ob def _preprocess_reps(self) -> bool: """ Fill SST anomalies, background errors, and flags for invalid background values. This method processes each observation to compute sea surface temperature (SST) anomalies, background error standard deviations, and flags any missing or invalid background values. It also checks whether the time series is sorted and sets a mask flag if any background variances are masked. Returns ------- bool True if any invalid observations or background values were encountered during preprocessing, otherwise False. """ invalid_series = False # test and filter out obs with unsuitable background matches sst_anom = [] bgvar = [] bgvar_is_masked = False for ind in range(self.nreps): bg_val, _ice_val, bgvar_val, good_match, bgvar_mask, invalid_ob = SSTBiasedNoisyChecker._parse_rep( self.lat[ind], self.lon[ind], self.ostia[ind], self.ice[ind], self.bgvar[ind], self.dates[ind], self.background_err_lim, ) if invalid_ob: invalid_series = True if bgvar_mask: bgvar_is_masked = True if good_match: if bg_val < -5.0 or bg_val > 45.0: warnings.warn(UserWarning("Background value is invalid"), stacklevel=2) invalid_series = True if bgvar_val < 0 or bgvar_val > 10.0: warnings.warn(UserWarning("Background variance is invalid"), stacklevel=2) invalid_series = True sst_anom.append(self.sst[ind] - bg_val) bgvar.append(bgvar_val) # prepare numpy arrays and variables needed for checks self.sst_anom = np.array(sst_anom) # ob-background differences bgvar_arr = np.array(bgvar) bgvar_arr[bgvar_arr < 0] = np.nan self.bgerr = np.sqrt(np.array(bgvar_arr)) # standard deviation of background error self.bgvar_is_masked = bgvar_is_masked if not is_monotonic(self.hrs): warnings.warn(UserWarning("Not sorted in time order"), stacklevel=2) invalid_series = True return invalid_series def _long_record_qc(self) -> None: """Perform the long record check.""" sst_anom_avg = np.mean(self.sst_anom) sst_anom_stdev = np.std(self.sst_anom) bgerr_rms = np.sqrt(np.mean(self.bgerr**2)) self.qc_outcomes_bias[:] = passed if abs(sst_anom_avg) > self.bias_lim: self.qc_outcomes_bias[:] = failed self.qc_outcomes_noise[:] = passed if sst_anom_stdev > np.sqrt(self.drif_intra**2 + bgerr_rms**2): self.qc_outcomes_noise[:] = failed def _short_record_qc(self) -> None: """Perform the short record check.""" # Calculate the limit based on the combined uncertainties (background error, drifter inter and drifter intra # error) and then multiply by the err_std_n limit = self.err_std_n * np.sqrt(self.bgerr**2 + self.drif_inter**2 + self.drif_intra**2) # If the number of obs outside the limit exceed n_bad then flag them all as bad self.qc_outcomes_short[:] = passed exceed_limit = np.logical_or(self.sst_anom > limit, self.sst_anom < -limit) if np.sum(exceed_limit) >= self.n_bad: self.qc_outcomes_short[:] = failed
[docs] @inspect_arrays(["lons", "lats", "dates"]) def do_speed_check( lons: SequenceNumberType, lats: SequenceNumberType, dates: SequenceDatetimeType, speed_limit: float, min_win_period: float, max_win_period: float, ) -> np.ndarray: """ Perform the Track QC speed check. Parameters ---------- lons : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional longitude array in degrees. lats : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional latitude array in degrees. dates : :py:obj:`~marine_qc.SequenceDatetimeType` 1-dimensional date array. speed_limit : float Maximum allowable speed for an in situ drifting buoy (metres per second). min_win_period : float Minimum period of time in days over which position is assessed for speed estimates (see description). max_win_period : float Maximum period of time in days over which position is assessed for speed estimates (this should be greater than min_win_period and allow for some erratic temporal sampling e.g. min_win_period + 0.2 to allow for gaps of up to 0.2 - days in sampling). Returns ------- array-like of int, shape (n,) 1-dimensional array containing QC flags. 1 if speed check fails, 0 otherwise. Raises ------ TypeError If `inspect_arrays` does not return np.ndarrays. Notes ----- In previous versions, default values for the parameters were: * speed_limit = 2.5 * min_win_period = 0.8 * max_win_perido = 1.8 """ lons_arr, lats_arr, dates_arr = ensure_arrays(lons=lons, lats=lats, dates=dates) checker = SpeedChecker(lons_arr, lats_arr, dates_arr, speed_limit, min_win_period, max_win_period) checker.do_speed_check() return checker.get_qc_outcomes()
[docs] @inspect_arrays(["lons", "lats", "dates"]) def do_new_speed_check( lons: SequenceNumberType, lats: SequenceNumberType, dates: SequenceDatetimeType, speed_limit: float, min_win_period: float, ship_speed_limit: float, delta_d: float, delta_t: float, n_neighbours: int, ) -> np.ndarray: """ Perform the new speed check. Parameters ---------- lons : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional longitude array in degrees. lats : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional latitude array in degrees. dates : :py:obj:`~marine_qc.SequenceDatetimeType` 1-dimensional date array. speed_limit : float Maximum allowable speed for an in situ drifting buoy (metres per second). min_win_period : float Minimum period of time in days over which position is assessed for speed estimates (see description). ship_speed_limit : float Ship speed limit for the IQUAM track check. delta_d : float The smallest increment in distance that can be resolved. For 0.01 degrees of lat-lon this is 1.11 km. Used in the IQUAM track check. delta_t : float The smallest increment in time that can be resolved. For hourly data expressed as a float this is 0.01 hours. Used in the IQUAM track check. n_neighbours : int Number of neighbours considered in the IQUAM track check. Returns ------- array-like of int, shape (n,) Array containing the QC outcomes for the new speed check. Raises ------ TypeError If `inspect_arrays` does not return np.ndarrays. Notes ----- In previous versions, default values for the parameters were: * speed_limit = 3.0 * min_win_period = 0.375 And, for the IQUAM-specific parameters: * ship_speed_limit = 60.0 * delta_d = 1.11 * delta_t = 0.01 * n_neighbours = 5 """ lons_arr, lats_arr, dates_arr = ensure_arrays(lons=lons, lats=lats, dates=dates) checker = NewSpeedChecker( lons_arr, lats_arr, dates_arr, speed_limit, min_win_period, ship_speed_limit, delta_d, delta_t, n_neighbours, ) checker.do_new_speed_check() return checker.get_qc_outcomes()
[docs] @inspect_arrays(["lons", "lats", "dates"]) def do_aground_check( lons: SequenceNumberType, lats: SequenceNumberType, dates: SequenceDatetimeType, smooth_win: int, min_win_period: int, max_win_period: int, ) -> np.ndarray: """ Perform the aground check. Parameters ---------- lons : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional longitude array in degrees. lats : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional latitude array in degrees. dates : :py:obj:`~marine_qc.SequenceDatetimeType` 1-dimensional date array. smooth_win : int Length of window (odd number) in datapoints used for smoothing lon/lat. min_win_period : int Minimum period of time in days over which position is assessed for no movement (see description). max_win_period : int or None Maximum period of time in days over which position is assessed for no movement (this should be greater than min_win_period and allow for erratic temporal sampling e.g. min_win_period+2 to allow for gaps of up to 2-days in sampling). Returns ------- array-like of int, shape (n,) 1-dimensional array containing QC flags. 1 if aground check fails, 0 otherwise. Raises ------ TypeError If `inspect_arrays` does not return np.ndarrays. Notes ----- In previous versions, default values for the parameters were: * smooth_win = 41 * min_win_period = 8 * max_win_period = 10 """ lats_arr, lons_arr, dates_arr = ensure_arrays(lats=lats, lons=lons, dates=dates) checker = AgroundChecker(lons_arr, lats_arr, dates_arr, smooth_win, min_win_period, max_win_period) checker.do_aground_check() return checker.get_qc_outcomes()
[docs] @inspect_arrays(["lons", "lats", "dates"]) def do_new_aground_check( lons: SequenceNumberType, lats: SequenceNumberType, dates: SequenceDatetimeType, smooth_win: int, min_win_period: int, ) -> np.ndarray: """ Perform the new aground check. Parameters ---------- lons : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional longitude array in degrees. lats : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional latitude array in degrees. dates : :py:obj:`~marine_qc.SequenceDatetimeType` 1-dimensional date array. smooth_win : int Length of window (odd number) in datapoints used for smoothing lon/lat. min_win_period : int Minimum period of time in days over which position is assessed for no movement (see description). Returns ------- array-like of int, shape (n,) 1-dimensional array containing QC flags. 1 if new aground check fails, 0 otherwise. Raises ------ TypeError If `inspect_arrays` does not return np.ndarrays. Notes ----- In previous versions, default values for the parameters were: * smooth_win = 41 * min_win_period = 8 """ lats_arr, lons_arr, dates_arr = ensure_arrays(lats=lats, lons=lons, dates=dates) checker = AgroundChecker(lons_arr, lats_arr, dates_arr, smooth_win, min_win_period, None) checker.do_aground_check() return checker.get_qc_outcomes()
[docs] @inspect_arrays(["lats", "lons", "sst", "ostia", "ice", "bgvar", "dates"]) def do_sst_start_tail_check( lons: SequenceNumberType, lats: SequenceNumberType, dates: SequenceDatetimeType, sst: SequenceNumberType, ostia: SequenceNumberType, ice: SequenceNumberType, bgvar: SequenceNumberType, long_win_len: int, long_err_std_n: float, short_win_len: int, short_err_std_n: float, short_win_n_bad: int, drif_inter: float, drif_intra: float, background_err_lim: float, ) -> np.ndarray: """ Perform the SST Start Tail Check. Parameters ---------- lons : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional longitude array in degrees. lats : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional latitude array in degrees. dates : :py:obj:`~marine_qc.SequenceDatetimeType` 1-dimensional date array. sst : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional array of sea surface temperatures in K. ostia : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional array of background field sea surface temperatures in K. ice : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional array of ice concentrations in the range 0.0 to 1.0. bgvar : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional array of background sea surface temperature fields variances in K^2. long_win_len : int Length of window (in data-points) over which to make long tail-check (must be an odd number). long_err_std_n : float Number of standard deviations of combined background and drifter bias error, beyond which data fail bias check. short_win_len : int Length of window (in data-points) over which to make the short tail-check. short_err_std_n : float Number of standard deviations of combined background and drifter error, beyond which data are deemed suspicious. short_win_n_bad : int Minimum number of suspicious data points required for failure of short check window. drif_inter : float Spread of biases expected in drifter data (standard deviation, degC or K). drif_intra : float Maximum random measurement uncertainty reasonably expected in drifter data (standard deviation, degC or K). background_err_lim : float Background error variance beyond which the SST background is deemed unreliable (degC squared). Returns ------- array-like of int, shape (n,) 1-dimensional array containing QC flags. 1 if SST start tail check fails, 0 otherwise. Raises ------ TypeError If `inspect_arrays` does not return np.ndarrays. Notes ----- In previous versions, default values for the parameters were: * long_win_len = 121 * long_err_std_n = 3.0 * short_win_len = 30 * short_err_std_n = 3.0 * short_win_n_bad = 2 * drif_inter = 0.29 * drif_intra = 1.00 * background_err_lim = 0.3 """ lats_arr, lons_arr, sst_arr, ostia_arr, ice_arr, bgvar_arr, dates_arr = ensure_arrays( lats=lats, lons=lons, sst=sst, ostia=ostia, ice=ice, bgvar=bgvar, dates=dates ) checker = SSTTailChecker( lats_arr, lons_arr, sst_arr, ostia_arr, ice_arr, bgvar_arr, dates_arr, long_win_len, long_err_std_n, short_win_len, short_err_std_n, short_win_n_bad, drif_inter, drif_intra, background_err_lim, ) checker.do_sst_tail_check(True) return checker.get_qc_outcomes()
[docs] @inspect_arrays(["lats", "lons", "sst", "ostia", "ice", "bgvar", "dates"]) def do_sst_end_tail_check( lons: SequenceNumberType, lats: SequenceNumberType, dates: SequenceDatetimeType, sst: SequenceNumberType, ostia: SequenceNumberType, ice: SequenceNumberType, bgvar: SequenceNumberType, long_win_len: int, long_err_std_n: float, short_win_len: int, short_err_std_n: float, short_win_n_bad: int, drif_inter: float, drif_intra: float, background_err_lim: float, ) -> np.ndarray: """ Perform the SST Start Tail Check. Parameters ---------- lons : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional longitude array in degrees. lats : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional latitude array in degrees. dates : :py:obj:`~marine_qc.SequenceDatetimeType` 1-dimensional date array. sst : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional array of sea surface temperatures in K. ostia : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional array of background field sea surface temperatures in K. ice : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional array of ice concentrations in the range 0.0 to 1.0. bgvar : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional array of background sea surface temperature fields variances in K^2. long_win_len : int Length of window (in data-points) over which to make long tail-check (must be an odd number). long_err_std_n : float Number of standard deviations of combined background and drifter bias error, beyond which data fail bias check. short_win_len : int Length of window (in data-points) over which to make the short tail-check. short_err_std_n : float Number of standard deviations of combined background and drifter error, beyond which data are deemed suspicious. short_win_n_bad : int Minimum number of suspicious data points required for failure of short check window. drif_inter : float Spread of biases expected in drifter data (standard deviation, degC or K). drif_intra : float Maximum random measurement uncertainty reasonably expected in drifter data (standard deviation, degC or K). background_err_lim : float Background error variance beyond which the SST background is deemed unreliable (degC squared). Returns ------- array-like of int, shape (n,) 1-dimensional array containing QC flags. 1 if SST start tail check fails, 0 otherwise. Raises ------ TypeError If `inspect_arrays` does not return np.ndarrays. Notes ----- In previous versions, default values for the parameters were: * long_win_len = 121 * long_err_std_n = 3.0 * short_win_len = 30 * short_err_std_n = 3.0 * short_win_n_bad = 2 * drif_inter = 0.29 * drif_intra = 1.00 * background_err_lim = 0.3 """ lats_arr, lons_arr, sst_arr, ostia_arr, ice_arr, bgvar_arr, dates_arr = ensure_arrays( lats=lats, lons=lons, sst=sst, ostia=ostia, ice=ice, bgvar=bgvar, dates=dates ) checker = SSTTailChecker( lats_arr, lons_arr, sst_arr, ostia_arr, ice_arr, bgvar_arr, dates_arr, long_win_len, long_err_std_n, short_win_len, short_err_std_n, short_win_n_bad, drif_inter, drif_intra, background_err_lim, ) checker.do_sst_tail_check(False) return checker.get_qc_outcomes()
[docs] @inspect_arrays(["lats", "lons", "dates", "sst", "ostia", "bgvar", "ice"]) def do_sst_biased_check( lons: SequenceNumberType, lats: SequenceNumberType, dates: SequenceDatetimeType, sst: SequenceNumberType, ostia: SequenceNumberType, ice: SequenceNumberType, bgvar: SequenceNumberType, n_eval: int, bias_lim: float, drif_intra: float, drif_inter: float, err_std_n: float, n_bad: int, background_err_lim: float, ) -> np.ndarray: """ Perform the SST bias check. Parameters ---------- lons : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional longitude array in degrees. lats : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional latitude array in degrees. dates : :py:obj:`~marine_qc.SequenceDatetimeType` 1-dimensional date array. sst : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional array of sea surface temperatures in K. ostia : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional array of background field sea surface temperatures in K. ice : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional array of ice concentrations in the range 0.0 to 1.0. bgvar : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional array of background sea surface temperature fields variances in K^2. n_eval : int The minimum number of drifter observations required to be assessed by the long-record check. bias_lim : float Maximum allowable drifter-background bias, beyond which a record is considered biased (degC or K). drif_intra : float Maximum random measurement uncertainty reasonably expected in drifter data (standard deviation, degC or K). drif_inter : float Spread of biases expected in drifter data (standard deviation, degC or K). err_std_n : float Number of standard deviations of combined background and drifter error, beyond which short-record data are deemed suspicious. n_bad : int Minimum number of suspicious data points required for failure of short-record check. background_err_lim : float Background error variance beyond which the SST background is deemed unreliable (degC squared or K squared). Returns ------- array-like of int, shape (n,) 1-dimensional array containing QC flags. 1 if SST bias check fails, 0 otherwise. Raises ------ TypeError If `inspect_arrays` does not return np.ndarrays. Notes ----- In previous versions, default values for the parameters were: * n_eval = 30 * bias_lim = 1.10 * drif_intra = 1.0 * drif_inter = 0.29 * err_std_n = 3.0 * n_bad = 2 * background_err_lim = 0.3 """ lats_arr, lons_arr, sst_arr, ostia_arr, ice_arr, bgvar_arr, dates_arr = ensure_arrays( lats=lats, lons=lons, sst=sst, ostia=ostia, ice=ice, bgvar=bgvar, dates=dates ) checker = SSTBiasedNoisyChecker( lats_arr, lons_arr, dates_arr, sst_arr, ostia_arr, bgvar_arr, ice_arr, n_eval, bias_lim, drif_intra, drif_inter, err_std_n, n_bad, background_err_lim, ) checker.do_sst_biased_noisy_check() return checker.get_qc_outcomes_bias()
[docs] @inspect_arrays(["lats", "lons", "dates", "sst", "ostia", "bgvar", "ice"]) def do_sst_noisy_check( lons: SequenceNumberType, lats: SequenceNumberType, dates: SequenceDatetimeType, sst: SequenceNumberType, ostia: SequenceNumberType, ice: SequenceNumberType, bgvar: SequenceNumberType, n_eval: int, bias_lim: float, drif_intra: float, drif_inter: float, err_std_n: float, n_bad: int, background_err_lim: float, ) -> np.ndarray: """ Perform the SST noise check. Parameters ---------- lons : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional longitude array in degrees. lats : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional latitude array in degrees. dates : :py:obj:`~marine_qc.SequenceDatetimeType` 1-dimensional date array. sst : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional array of sea surface temperatures in K. ostia : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional array of background field sea surface temperatures in K. ice : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional array of ice concentrations in the range 0.0 to 1.0. bgvar : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional array of background sea surface temperature fields variances in K^2. n_eval : int The minimum number of drifter observations required to be assessed by the long-record check. bias_lim : float Maximum allowable drifter-background bias, beyond which a record is considered biased (degC or K). drif_intra : float Maximum random measurement uncertainty reasonably expected in drifter data (standard deviation, degC or K). drif_inter : float Spread of biases expected in drifter data (standard deviation, degC or K). err_std_n : float Number of standard deviations of combined background and drifter error, beyond which short-record data are deemed suspicious. n_bad : int Minimum number of suspicious data points required for failure of short-record check. background_err_lim : float Background error variance beyond which the SST background is deemed unreliable (degC squared or K squared). Returns ------- array-like of int, shape (n,) 1-dimensional array containing QC flags. 1 if SST noise check fails, 0 otherwise. Raises ------ TypeError If `inspect_arrays` does not return np.ndarrays. Notes ----- In previous versions, default values for the parameters were: * n_eval = 30 * bias_lim = 1.10 * drif_intra = 1.0 * drif_inter = 0.29 * err_std_n = 3.0 * n_bad = 2 * background_err_lim = 0.3 """ lats_arr, lons_arr, sst_arr, ostia_arr, ice_arr, bgvar_arr, dates_arr = ensure_arrays( lats=lats, lons=lons, sst=sst, ostia=ostia, ice=ice, bgvar=bgvar, dates=dates ) checker = SSTBiasedNoisyChecker( lats_arr, lons_arr, dates_arr, sst_arr, ostia_arr, bgvar_arr, ice_arr, n_eval, bias_lim, drif_intra, drif_inter, err_std_n, n_bad, background_err_lim, ) checker.do_sst_biased_noisy_check() return checker.get_qc_outcomes_noise()
[docs] @inspect_arrays(["lats", "lons", "dates", "sst", "ostia", "bgvar", "ice"]) def do_sst_biased_noisy_short_check( lons: SequenceNumberType, lats: SequenceNumberType, dates: SequenceDatetimeType, sst: SequenceNumberType, ostia: SequenceNumberType, ice: SequenceNumberType, bgvar: SequenceNumberType, n_eval: int, bias_lim: float, drif_intra: float, drif_inter: float, err_std_n: float, n_bad: int, background_err_lim: float, ) -> np.ndarray: """ Perform the SST short check. Parameters ---------- lons : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional longitude array in degrees. lats : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional latitude array in degrees. dates : :py:obj:`~marine_qc.SequenceDatetimeType` 1-dimensional date array. sst : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional array of sea surface temperatures in K. ostia : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional array of background field sea surface temperatures in K. ice : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional array of ice concentrations in the range 0.0 to 1.0. bgvar : :py:obj:`~marine_qc.SequenceNumberType` 1-dimensional array of background sea surface temperature fields variances in K^2. n_eval : int The minimum number of drifter observations required to be assessed by the long-record check. bias_lim : float Maximum allowable drifter-background bias, beyond which a record is considered biased (degC or K). drif_intra : float Maximum random measurement uncertainty reasonably expected in drifter data (standard deviation, degC or K). drif_inter : float Spread of biases expected in drifter data (standard deviation, degC or K). err_std_n : float Number of standard deviations of combined background and drifter error, beyond which short-record data are deemed suspicious. n_bad : int Minimum number of suspicious data points required for failure of short-record check. background_err_lim : float Background error variance beyond which the SST background is deemed unreliable (degC squared or K squared). Returns ------- array-like of int, shape (n,) 1-dimensional array containing QC flags. 1 if SST short check fails, 0 otherwise. Raises ------ TypeError If `inspect_arrays` does not return np.ndarrays. Notes ----- In previous versions, default values for the parameters were: * n_eval = 30 * bias_lim = 1.10 * drif_intra = 1.0 * drif_inter = 0.29 * err_std_n = 3.0 * n_bad = 2 * background_err_lim = 0.3 """ lats_arr, lons_arr, sst_arr, ostia_arr, ice_arr, bgvar_arr, dates_arr = ensure_arrays( lats=lats, lons=lons, sst=sst, ostia=ostia, ice=ice, bgvar=bgvar, dates=dates ) checker = SSTBiasedNoisyChecker( lats_arr, lons_arr, dates_arr, sst_arr, ostia_arr, bgvar_arr, ice_arr, n_eval, bias_lim, drif_intra, drif_inter, err_std_n, n_bad, background_err_lim, ) checker.do_sst_biased_noisy_check() return checker.get_qc_outcomes_short()