"""
Buoy tracking QC module.
Module containing QC functions for sequential reports from a single drifting buoy.
"""
# noqa: S101
from __future__ import annotations
import math
import warnings
from collections.abc import Sequence
from datetime import datetime
import numpy as np
import pandas as pd
from .astronomical_geometry import sunangle
from .auxiliary import SequenceDatetimeType, SequenceNumberType, ensure_arrays, failed, inspect_arrays, isvalid, passed, untestable, untested
from .qc_sequential_reports import do_iquam_track_check
from .spherical_geometry import sphere_distance
from .statistics import trim_mean, trim_std
from .time_control import convert_date_to_hours, day_in_year
"""
The trackqc module contains a set of functions for performing the tracking QC
first described in Atkinson et al. [2013]. The general procedures described
in Atkinson et al. [2013] were later revised and improved for the SST CCI 2 project.
Documentation and IDL code for the revised (and original) procedures can be found
in the CMA FCM code repository. The code in this module represents a port of the
revised IDL code into the python marine QC suite. New versions of the aground
and speed checks have also been added.
These functions perform tracking QC checks on a :py:class:`ex.Voyage`.
References:
Atkinson, C.P., N.A. Rayner, J. Roberts-Jones, R.O. Smith, 2013:
Assessing the quality of sea surface temperature observations from
drifting buoys and ships on a platform-by-platform basis (doi:10.1002/jgrc.20257).
"""
[docs]
def track_day_test(
year: int,
month: int,
day: int,
hour: float,
lat: float,
lon: float,
elevdlim: float = -2.5,
) -> bool:
"""
Given date, time, lat and lon calculate if the sun elevation is > elevdlim.
This is the "day" test used by tracking QC to decide whether an SST measurement is night or day.
This is important because daytime diurnal heating can affect comparison with an SST background.
It uses the function sunangle to calculate the elevation of the sun. A default solar_zenith angle
of 92.5 degrees (elevation of -2.5 degrees) delimits night from day.
Parameters
----------
year : int
Year.
month : int
Month.
day : int
Day.
hour : float
Hour expressed as decimal fraction (e.g. 20.75 = 20:45 pm).
lat : float
Latitude in degrees.
lon : float
Longitude in degrees.
elevdlim : float, default: -2.5
Elevation day/night delimiter in degrees above horizon.
Returns
-------
bool
True if daytime, else False.
Raises
------
ValueError
If either year, month, day, hour, lat or lon is numerically invalid or None
of if either month, day, hour or lat is not in valid range.
"""
if not isvalid(year):
raise ValueError("year is missing")
if not isvalid(month):
raise ValueError("month is missing")
if not isvalid(day):
raise ValueError("day is missing")
if not isvalid(hour):
raise ValueError("hour is missing")
if not isvalid(lat):
raise ValueError("lat is missing")
if not isvalid(lon):
raise ValueError("lon is missing")
if not (1 <= month <= 12):
raise ValueError("Month not in range 1-12")
if not (1 <= day <= 31):
raise ValueError("Day not in range 1-31")
if not (0 <= hour <= 24):
raise ValueError("Hour not in range 0-24")
if not (90 >= lat >= -90):
raise ValueError("Latitude not in range -90 to 90")
daytime = False
year2 = year
day2 = day_in_year(year, month, day)
hour2 = math.floor(hour)
minute2 = int((hour - math.floor(hour)) * 60.0)
lat2 = lat
lon2 = lon
if lat == 0:
lat2 = 0.0001
if lon == 0:
lon2 = 0.0001
_, elevation, _, _, _, _ = sunangle(year2, day2, hour2, minute2, 0, 0, 0, lat2, lon2)
if elevation > elevdlim:
daytime = True
return daytime
[docs]
def is_monotonic(inarr: np.ndarray | Sequence[int | float]) -> bool:
"""
Test if elements in an array are increasing monotonically.
I.e. each element is greater than or equal to the preceding element.
Parameters
----------
inarr : array-like of datetime, shape (n,)
1-dimensional date array.
Returns
-------
bool
True if array is increasing monotonically, False otherwise.
"""
for i in range(1, len(inarr)):
if inarr[i] < inarr[i - 1]:
return False
return True
[docs]
class SpeedChecker:
"""
Class used to carry out :py:func:`.do_speed_check`.
The check identifies whether a drifter has been picked up by a ship (out of water) based on 1/100th degree
precision positions. A flag is set for each input report: flag=1 for reports deemed picked up,
else flag=0.
A drifter is deemed picked up if it is moving faster than might be expected for a fast ocean current
(a few m/s). Unreasonably fast movement is detected when speed of travel between report-pairs exceeds
the chosen 'speed_limit' (speed is estimated as distance between reports divided by time separation -
this 'straight line' speed between the two points is a minimum speed estimate given a less-direct
path may have been followed). Positional errors introduced by lon/lat 'jitter' and data precision
can be of order several km's. Reports must be separated by a suitably long period of time (the 'min_win_period')
to minimise the effect of these errors when calculating speed e.g. for reports separated by 24 hours
errors of several cm/s would result which are two orders of magnitude less than a fast ocean current
which seems reasonable. Conversely, the period of time chosen should not be too long so as to resolve
short-lived burst of speed on manoeuvring ships. Larger positional errors may also trigger the check.
Because temporal sampling can be erratic the time period over which this assessment is made is specified
as a range (bound by 'min_win_period' and 'max_win_period') - assessment uses the longest time separation
available within this range.
IMPORTANT - for optimal performance, drifter records with observations failing this check should be
subsequently manually reviewed. Ships move around in all sorts of complicated ways that can readily
confuse such a simple check (e.g. pausing at sea, crisscrossing its own path) and once some erroneous
movement is detected it is likely a human operator can then better pick out the actual bad data. False
fails caused by positional errors (particularly in fast ocean currents) will also need reinstating.
Parameters
----------
lons : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional longitude array in degrees.
lats : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional latitude array in degrees.
dates : :py:obj:`~marine_qc.SequenceDatetimeType`
1-dimensional date array.
speed_limit : float
Maximum allowable speed for an in situ drifting buoy (metres per second).
min_win_period : float
Minimum period of time in days over which position is assessed for speed estimates (see
description).
max_win_period : float
Maximum period of time in days over which position is assessed for speed estimates
(this should be greater than min_win_period and allow for some erratic temporal sampling e.g.
min_win_period + 0.2 to allow for gaps of up to 0.2 - days in sampling).
"""
def __init__(
self,
lons: SequenceNumberType,
lats: SequenceNumberType,
dates: SequenceDatetimeType,
speed_limit: float,
min_win_period: float,
max_win_period: float,
) -> None:
"""
Create an object for performing the Speed Check.
Parameters
----------
lons : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional longitude array in degrees.
lats : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional latitude array in degrees.
dates : :py:obj:`~marine_qc.SequenceDatetimeType`
1-dimensional date array.
speed_limit : float
Maximum allowable speed for an in situ drifting buoy (metres per second).
min_win_period : float
Minimum period of time in days over which position is assessed for speed estimates (see
description).
max_win_period : float
Maximum period of time in days over which position is assessed for speed estimates
(this should be greater than min_win_period and allow for some erratic temporal sampling e.g.
min_win_period + 0.2 to allow for gaps of up to 0.2 - days in sampling).
"""
self.lon = np.asarray(lons, dtype=float)
self.lat = np.asarray(lats, dtype=float)
self.nreps = len(lons)
dates_list: list[datetime] = pd.to_datetime(dates).tolist()
self.hrs = np.asarray(convert_date_to_hours(dates_list), dtype=float)
self.speed_limit = speed_limit
self.min_win_period = min_win_period
self.max_win_period = max_win_period
# Initialise QC outcomes with untested
self.qc_outcomes = np.zeros(self.nreps, dtype=float) + untested
[docs]
def get_qc_outcomes(self) -> np.ndarray:
"""
Retrieve the QC outcomes for all observations.
Returns
-------
np.ndarray
Array of QC flags for each observation (0 = passed, 1 = failed, untested otherwise).
"""
return self.qc_outcomes
[docs]
def valid_parameters(self) -> bool:
"""
Validate the QC parameters to ensure they are sensible.
Checks that:
- `speed_limit` is non-negative.
- `min_win_period` is non-negative.
- `max_win_period` is greater than or equal to `min_win_period`.
Warnings are raised for any invalid parameters.
Returns
-------
bool
True if all parameters are valid, False otherwise.
"""
valid = False
if not (self.speed_limit >= 0):
warnings.warn(UserWarning(f"Invalid speed_limit: {self.speed_limit}. Must be zero or positive."), stacklevel=2)
elif not (self.min_win_period >= 0):
warnings.warn(UserWarning(f"Invalid speed_limit: {self.min_win_period}. Must be zero or positive."), stacklevel=2)
elif not (self.max_win_period >= self.min_win_period):
warnings.warn(UserWarning("max_win_period must be greater than or equal to min_win_period."), stacklevel=2)
else:
valid = True
return valid
[docs]
def valid_arrays(self) -> bool:
"""
Validate the input observation arrays (longitude, latitude, and time differences).
Checks for:
- NaN values in longitude, latitude, or time arrays.
- Monotonicity of the time array (`self.hrs`).
Warnings are raised for any issues detected.
Returns
-------
bool
True if all arrays are valid, False otherwise.
"""
valid = False
if any(np.isnan(self.lon)):
warnings.warn(UserWarning("Nan(s) found in longitude."), stacklevel=2)
elif any(np.isnan(self.lat)):
warnings.warn(UserWarning("Nan(s) found in latitudes."), stacklevel=2)
elif any(np.isnan(self.hrs)):
warnings.warn(UserWarning("Nan(s) found in time differences."), stacklevel=2)
elif not is_monotonic(self.hrs):
warnings.warn(UserWarning("times are not sorted: {self.hrs}"), stacklevel=2)
else:
valid = True
return valid
[docs]
def do_speed_check(self) -> None:
"""Perform the actual speed check."""
nrep = self.nreps
min_win_period_hours = self.min_win_period * 24.0
max_win_period_hours = self.max_win_period * 24.0
if not self.valid_arrays() or not self.valid_parameters():
self.qc_outcomes = np.zeros(self.nreps) + untestable
return
# Default to pass
self.qc_outcomes = np.zeros(nrep) + passed
# loop through timeseries to see if drifter is moving too fast
# and flag any occurrences
index_arr = np.array(range(0, nrep)) # type: np.ndarray
i = 0
time_to_end = self.hrs[-1] - self.hrs[i]
while time_to_end >= min_win_period_hours:
# Find all time points before current time plus the max window period
f_win = self.hrs <= self.hrs[i] + max_win_period_hours
# Window length is the difference between the latest time point in
# the window and the current time
win_len = self.hrs[f_win][-1] - self.hrs[i]
# If the actual window length is shorter than the minimum window period
# then go to the next time step
if win_len < min_win_period_hours:
i += 1
time_to_end = self.hrs[-1] - self.hrs[i]
continue
# If the actual window length is long enough then calculate the speed
# based on the first and last points in the window
displace = sphere_distance(self.lat[i], self.lon[i], self.lat[f_win][-1], self.lon[f_win][-1])
speed = displace / win_len # km per hr
speed = speed * 1000.0 / (60.0 * 60) # metres per sec
# If the average speed during the window is too high then set all
# flags in the window to failed.
if speed > self.speed_limit:
self.qc_outcomes[i : index_arr[f_win][-1] + 1] = failed
i += 1
time_to_end = self.hrs[-1] - self.hrs[i]
[docs]
class NewSpeedChecker:
"""
Class used to carry out :py:func:`.do_new_speed_check`.
Check to see whether a drifter has been picked up by a ship (out of water) based on 1/100th degree
precision positions. A flag is set for each input report: flag=1 for reports deemed picked up,
else flag=0.
A drifter is deemed picked up if it is moving faster than might be expected for a fast ocean current
(a few m/s). Unreasonably fast movement is detected when speed of travel between report-pairs exceeds
the chosen 'speed_limit' (speed is estimated as distance between reports divided by time separation -
this 'straight line' speed between the two points is a minimum speed estimate given a less-direct
path may have been followed). Positional errors introduced by lon/lat 'jitter' and data precision
can be of order several km's. Reports must be separated by a suitably long period of time (the 'min_win_period')
to minimise the effect of these errors when calculating speed e.g. for reports separated by 9 hours
errors of order 10 cm/s would result which are a few percent of fast ocean current speed. Conversely,
the period of time chosen should not be too long so as to resolve short-lived burst of speed on
manouvering ships. Larger positional errors may also trigger the check.
For each report, speed is assessed over the shortest available period that exceeds 'min_win_period'.
Prior to assessment the drifter record is screened for positional errors using the iQuam track check
method (from :py:class:`ex.Voyage`). When running the iQuam check the record is treated as a ship (not a
drifter) so as to avoid accidentally filtering out observations made aboard a ship (which is what we
are trying to detect). This iQuam track check does not overwrite any existing iQuam track check flags.
IMPORTANT - for optimal performance, drifter records with observations failing this check should be
subsequently manually reviewed. Ships move around in all sorts of complicated ways that can readily
confuse such a simple check (e.g. pausing at sea, crisscrossing its own path) and once some erroneous
movement is detected it is likely a human operator can then better pick out the actual bad data. False
fails caused by positional errors (particularly in fast ocean currents) will also need reinstating.
The class has the following class attributes which can be modified using the set_parameters method.
* iquam_parameters: Parameter dictionary for Voyage.iquam_track_check() function.
* speed_limit: maximum allowable speed for an in situ drifting buoy (metres per second)
* min_win_period: minimum period of time in days over which position is assessed for speed estimates (see
description)
Parameters
----------
lons : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional longitude array in degrees.
lats : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional latitude array in degrees.
dates : :py:obj:`~marine_qc.SequenceDatetimeType`
1-dimensional date array.
speed_limit : float
Maximum allowable speed for an in situ drifting buoy (metres per second).
min_win_period : float
Minimum period of time in days over which position is assessed for speed estimates (see description).
ship_speed_limit : float
Ship speed limit for the IQUAM track check.
delta_d : float
The smallest increment in distance that can be resolved. For 0.01 degrees of lat-lon this is 1.11 km.
Used in the IQUAM track check.
delta_t : float
The smallest increment in time that can be resolved. For hourly data expressed as a float this is 0.01
hours. Used in the IQUAM track check.
n_neighbours : int
Number of neighbours considered in the IQUAM track check.
"""
def __init__(
self,
lons: SequenceNumberType,
lats: SequenceNumberType,
dates: SequenceDatetimeType,
speed_limit: float,
min_win_period: float,
ship_speed_limit: float,
delta_d: float,
delta_t: float,
n_neighbours: int,
) -> None:
"""
Object used to perform the new speed check.
Parameters
----------
lons : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional longitude array in degrees.
lats : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional latitude array in degrees.
dates : :py:obj:`~marine_qc.SequenceDatetimeType`
1-dimensional date array.
speed_limit : float
Maximum allowable speed for an in situ drifting buoy (metres per second).
min_win_period : float
Minimum period of time in days over which position is assessed for speed estimates (see description).
ship_speed_limit : float
Ship speed limit for the IQUAM track check.
delta_d : float
The smallest increment in distance that can be resolved. For 0.01 degrees of lat-lon this is 1.11 km.
Used in the IQUAM track check.
delta_t : float
The smallest increment in time that can be resolved.
For hourly data expressed as a float this is 0.01 hours.
Used in the IQUAM track check.
n_neighbours : int
Number of neighbours considered in the IQUAM track check.
"""
self.lon = np.asarray(lons, dtype=float)
self.lat = np.asarray(lats, dtype=float)
self.nreps = len(lons)
self.dates = np.asarray(dates, dtype=datetime)
dates_list: list[datetime] = pd.to_datetime(dates).tolist()
self.hrs = np.asarray(convert_date_to_hours(dates_list), dtype=float)
self.speed_limit = speed_limit
self.min_win_period = min_win_period
self.ship_speed_limit = ship_speed_limit
self.delta_d = delta_d
self.delta_t = delta_t
self.n_neighbours = n_neighbours
self.iquam_track_ship: np.ndarray = np.array([], dtype=int)
# Initialise QC outcomes with untested
self.qc_outcomes = np.zeros(self.nreps) + untested
[docs]
def get_qc_outcomes(self) -> np.ndarray:
"""
Retrieve the QC outcomes for all observations.
Returns
-------
np.ndarray
Array of QC flags for each observation (0 = valid, 1 = flagged, untested otherwise).
"""
return self.qc_outcomes
[docs]
def valid_parameters(self) -> bool:
"""
Validate the QC parameters to ensure they are sensible.
Checks that:
- `speed_limit` is non-negative.
- `min_win_period` is non-negative.
Warnings are raised for any invalid parameters.
Returns
-------
bool
True if all parameters are valid, False otherwise.
"""
valid = False
if not (self.speed_limit >= 0):
warnings.warn(UserWarning(f"Invalid speed_limit: {self.speed_limit}. Must be zero or positive."), stacklevel=2)
elif not (self.min_win_period >= 0):
warnings.warn(UserWarning(f"Invalid min_win_period: {self.min_win_period}. Must be zero or positive."), stacklevel=2)
else:
valid = True
return valid
[docs]
def valid_arrays(self) -> bool:
"""
Validate the input arrays (longitude, latitude, and time differences).
Checks for:
- NaN values in longitude, latitude, or time arrays.
- Monotonicity of the time array (`self.hrs`).
Warnings are raised for any issues detected.
Returns
-------
bool
True if all arrays are valid, False otherwise.
"""
valid = False
if any(np.isnan(self.lon)):
warnings.warn(UserWarning("Nan(s) found in longitude."), stacklevel=2)
elif any(np.isnan(self.lat)):
warnings.warn(UserWarning("Nan(s) found in latitudes."), stacklevel=2)
elif any(np.isnan(self.hrs)):
warnings.warn(UserWarning("Nan(s) found in time differences."), stacklevel=2)
elif not is_monotonic(self.hrs):
warnings.warn(UserWarning("times are not sorted: {self.hrs}"), stacklevel=2)
else:
valid = True
return valid
[docs]
def do_new_speed_check(self) -> None:
"""Perform the actual new speed check."""
nrep = self.nreps
min_win_period_hours = self.min_win_period * 24.0
if not self.valid_arrays() or not self.valid_parameters():
self.qc_outcomes = np.zeros(self.nreps) + untestable
return
self.perform_iquam_track_check()
# Initialise
self.qc_outcomes = np.zeros(nrep) + passed
# loop through timeseries to see if drifter is moving too fast and flag any occurrences
index_arr = np.array(range(0, nrep)) # type: np.ndarray
i = 0
time_to_end = self.hrs[-1] - self.hrs[i]
while time_to_end >= min_win_period_hours:
if self.iquam_track_ship[i] == failed:
i += 1
time_to_end = self.hrs[-1] - self.hrs[i]
continue
f_win = (self.hrs >= self.hrs[i] + min_win_period_hours) & (self.iquam_track_ship == passed)
if not any(f_win):
i += 1
time_to_end = self.hrs[-1] - self.hrs[i]
continue
win_len = self.hrs[f_win][0] - self.hrs[i]
displace = sphere_distance(self.lat[i], self.lon[i], self.lat[f_win][0], self.lon[f_win][0])
speed = displace / win_len # km per hr
speed = speed * 1000.0 / (60.0 * 60) # metres per sec
if speed > self.speed_limit:
self.qc_outcomes[i : index_arr[f_win][0] + 1] = failed
i += 1
time_to_end = self.hrs[-1] - self.hrs[i]
[docs]
class AgroundChecker:
"""
Class used to carry out :py:func:`.do_aground_check`.
Check to see whether a drifter has run aground based on 1/100th degree precision positions.
A flag is set for each input report: flag=1 for reports deemed aground, else flag=0.
Positional errors introduced by lon/lat 'jitter' and data precision can be of order several km's.
Longitude and latitude timeseries are smoothed prior to assessment to reduce position 'jitter'.
Some post-smoothing position 'jitter' may remain and its expected magnitude is set within the
function by the 'tolerance' parameter. A drifter is deemed aground when, after a period of time,
the distance between reports is less than the 'tolerance'. The minimum period of time over which this
assessment is made is set by 'min_win_period'. This period must be long enough such that slow moving
drifters are not falsely flagged as aground given errors in position (e.g. a buoy drifting at around
1 cm/s will travel around 1 km/day; given 'tolerance' and precision errors of a few km's the 'min_win_period'
needs to be several days to ensure distance-travelled exceeds the error so that motion is reliably
detected and the buoy is not falsely flagged as aground). However, min_win_period should not be longer
than necessary as buoys that run aground for less than min_win_period will not be detected.
Because temporal sampling can be erratic the time period over which an assessment is made is specified
as a range (bound by 'min_win_period' and 'max_win_period') - assessment uses the longest time separation
available within this range. If a drifter is deemed aground and subsequently starts moving (e.g. if a drifter
has moved very slowly for a prolonged period) incorrectly flagged reports will be reinstated.
* smooth_win: length of window (odd number) in datapoints used for smoothing lon/lat
* min_win_period: minimum period of time in days over which position is assessed for no movement (see description)
* max_win_period: maximum period of time in days over which position is assessed for no movement (this should be
greater than min_win_period and allow for erratic temporal sampling e.g. min_win_period+2 to allow for gaps of
up to 2-days in sampling).
Parameters
----------
lons : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional longitude array in degrees.
lats : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional latitude array in degrees.
dates : :py:obj:`~marine_qc.SequenceDatetimeType`
1-dimensional date array.
smooth_win : int
Length of window (odd number) in datapoints used for smoothing lon/lat.
min_win_period : int
Minimum period of time in days over which position is assessed for no movement (see description).
max_win_period : int or None
Maximum period of time in days over which position is assessed for no movement (this should be greater
than min_win_period and allow for erratic temporal sampling e.g. min_win_period+2 to allow for gaps of
up to 2-days in sampling).
"""
# displacement resulting from 1/100th deg 'position-jitter' at the equator (km)
tolerance = sphere_distance(0, 0, 0.01, 0.01)
def __init__(
self,
lons: SequenceNumberType,
lats: SequenceNumberType,
dates: SequenceDatetimeType,
smooth_win: int,
min_win_period: int,
max_win_period: int | None,
) -> None:
"""
Create on object for performing the Aground and New Aground checks.
Parameters
----------
lons : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional longitude array in degrees.
lats : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional latitude array in degrees.
dates : :py:obj:`~marine_qc.SequenceDatetimeType`
1-dimensional date array.
smooth_win : int
Length of window (odd number) in datapoints used for smoothing lon/lat.
min_win_period : int
Minimum period of time in days over which position is assessed for no movement (see description).
max_win_period : int or None
Maximum period of time in days over which position is assessed for no movement (this should be greater
than min_win_period and allow for erratic temporal sampling e.g. min_win_period+2 to allow for gaps of
up to 2-days in sampling).
"""
self.lon = np.asarray(lons, dtype=float)
self.lat = np.asarray(lats, dtype=float)
self.nreps = len(lons)
dates_list: list[datetime] = pd.to_datetime(dates).tolist()
self.hrs = np.asarray(convert_date_to_hours(dates_list), dtype=float)
self.smooth_win = smooth_win
self.min_win_period = min_win_period
self.max_win_period = max_win_period
self.lon_smooth: np.ndarray = np.array([], dtype=float)
self.lat_smooth: np.ndarray = np.array([], dtype=float)
self.hrs_smooth: np.ndarray = np.array([], dtype=float)
# Initialise QC outcomes with untested
self.qc_outcomes = np.zeros(self.nreps) + untested
[docs]
def get_qc_outcomes(self) -> np.ndarray:
"""
Return the QC outcomes.
Returns
-------
array-like of int, shape (n,)
1-dimensional array containing QC flags.
"""
return self.qc_outcomes
[docs]
def valid_parameters(self) -> bool:
"""
Check the parameters are valid. Raises a warning and returns False if not valid.
Returns
-------
bool
True if parameter is valid, otherwise False.
"""
valid = False
if self.smooth_win < 1:
warnings.warn(UserWarning("Invalid smooth_win: {self.smooth_win}. Must be positive."), stacklevel=2)
elif self.smooth_win % 2 == 0:
warnings.warn(UserWarning("Invalid smooth_win: {self.smooth_win}. Must be an odd number."), stacklevel=2)
elif self.min_win_period < 1:
warnings.warn(UserWarning(f"Invalid min_win_period: {self.min_win_period}. Must be positive."), stacklevel=2)
elif self.max_win_period is None:
valid = True
elif self.max_win_period < 1:
warnings.warn(UserWarning("Invalid max_win_period: {self.max_win_period}. Must be positive."), stacklevel=2)
elif self.max_win_period < self.min_win_period:
warnings.warn("max_win_period must be greater than or equal to min_win_period.", stacklevel=2)
else:
valid = True
return valid
[docs]
def valid_arrays(self) -> bool:
"""
Check the input arrays are valid. Raises a warning and returns False if not valid.
Returns
-------
bool
True if array is valid, otherwise False.
"""
valid = False
if any(np.isnan(self.lon)):
warnings.warn(UserWarning("Nan(s) found in longitude."), stacklevel=2)
elif any(np.isnan(self.lat)):
warnings.warn(UserWarning("Nan(s) found in latitudes."), stacklevel=2)
elif any(np.isnan(self.hrs)):
warnings.warn(UserWarning("Nan(s) found in time differences."), stacklevel=2)
elif not is_monotonic(self.hrs):
warnings.warn(UserWarning("times are not sorted: {self.hrs}"), stacklevel=2)
else:
valid = True
return valid
[docs]
def smooth_arrays(self) -> None:
"""Perform the preprocessing of the lat lon and time arrays."""
half_win = int((self.smooth_win - 1) / 2)
# create smoothed lon/lat timeseries # length of series after smoothing
nrep_smooth = self.nreps - self.smooth_win + 1
lon_smooth = np.empty(nrep_smooth) # type: np.ndarray
lon_smooth[:] = np.nan
lat_smooth = np.empty(nrep_smooth) # type: np.ndarray
lat_smooth[:] = np.nan
hrs_smooth = np.empty(nrep_smooth) # type: np.ndarray
hrs_smooth[:] = np.nan
for i in range(0, nrep_smooth):
lon_smooth[i] = np.median(self.lon[i : i + self.smooth_win])
lat_smooth[i] = np.median(self.lat[i : i + self.smooth_win])
hrs_smooth[i] = self.hrs[i + half_win]
self.lon_smooth = lon_smooth
self.lat_smooth = lat_smooth
self.hrs_smooth = hrs_smooth
[docs]
def do_aground_check(self) -> None:
"""Perform the actual aground check."""
half_win = (self.smooth_win - 1) / 2
min_win_period_hours = self.min_win_period * 24.0
if self.max_win_period is not None:
max_win_period_hours = self.max_win_period * 24.0
else:
max_win_period_hours = None
if not self.valid_parameters() or not self.valid_arrays():
self.qc_outcomes[:] = untestable
return
# Default to pass
if self.nreps <= self.smooth_win:
self.qc_outcomes[:] = passed
return
self.smooth_arrays()
# loop through smoothed timeseries to see if drifter has run aground
i = 0
is_aground = False # keeps track of whether drifter is deemed aground
i_aground = np.nan # keeps track of index when drifter first ran aground
time_to_end = self.hrs_smooth[-1] - self.hrs_smooth[i]
while time_to_end >= min_win_period_hours:
if self.max_win_period is not None:
f_win = self.hrs_smooth <= self.hrs_smooth[i] + max_win_period_hours
win_len = self.hrs_smooth[f_win][-1] - self.hrs_smooth[i]
if win_len < min_win_period_hours:
i += 1
time_to_end = self.hrs_smooth[-1] - self.hrs_smooth[i]
continue
displace = sphere_distance(
self.lat_smooth[i],
self.lon_smooth[i],
self.lat_smooth[f_win][-1],
self.lon_smooth[f_win][-1],
)
else:
displace = sphere_distance(
self.lat_smooth[i],
self.lon_smooth[i],
self.lat_smooth[-1],
self.lon_smooth[-1],
)
if displace <= AgroundChecker.tolerance:
if not is_aground:
is_aground = True
i_aground = i
else:
is_aground = False
i_aground = np.nan
i += 1
time_to_end = self.hrs_smooth[-1] - self.hrs_smooth[i]
# set flags
if is_aground and i_aground > 0:
i_aground += half_win
# this gets the first index the drifter is deemed aground for the original (un-smoothed) timeseries
# n.b. if i_aground=0 then the entire drifter record is deemed aground and flagged as such
self.qc_outcomes[:] = passed
if is_aground:
self.qc_outcomes[int(i_aground) :] = failed
[docs]
class SSTTailChecker:
"""
Class used to carry out :py:func:`.do_sst_start_tail_check` and :py:func:`.do_sst_end_tail_check`.
Check to see whether there is erroneous sea surface temperature data at the beginning or end of a drifter record
(referred to as 'tails'). Flags are set for each input report: flag=1 for reports
with erroneous data, else flag=0, 'drf_tail1' is used for bad data at the beginning of a record, 'drf_tail2' is
used for bad data at the end of a record.
The tail check makes an assessment of the quality of data at the start and end of a drifting buoy record by
comparing to a background reference field. Data found to be unacceptably biased or noisy relative to the
background are flagged by the check. When making the comparison an allowance is made for background error
variance and also normal drifter error (both bias and random measurement error). The correlation of the
background error is treated as unknown and takes on a value which maximises background error dependent on the
assessment being made. A background error variance limit is also specified, beyond which the background is deemed
unreliable. Observations made during the day, in icy regions or where the background value is missing are
excluded from the comparison.
The check proceeds in two steps; a 'long tail-check' followed by a 'short tail-check'. The idea is that the short
tail-check has finer resolution but lower sensitivity than the long tail-check and may pick off noisy data not
picked up by the long tail check. Only observations that pass the long tail-check are passed to the short
tail-check. Both of these tail checks proceed by moving a window over the data and assessing the data in each
window. Once good data are found the check stops and any bad data preceding this are flagged. If unreliable
background data are encountered the check stops. The checks are run forwards and backwards over the record so as
to assess data at the start and end of the record. If the whole record fails no observations are flagged as there
are then no 'tails' in the data (this is left for other checks). The long tail check looks for groups of
observations that are too biased or noisy as a whole. The short tail check looks for individual observations
exceeding a noise limit within the window.
Parameters
----------
lat : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional latitude array in degrees.
lon : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional longitude array in degrees.
sst : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional array of sea surface temperatures in K.
ostia : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional array of background field sea surface temperatures in K.
ice : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional array of ice concentrations in the range 0.0 to 1.0.
bgvar : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional array of background sea surface temperature fields variances in K^2.
dates : :py:obj:`~marine_qc.SequenceDatetimeType`
1-dimensional date array.
long_win_len : int
Length of window (in data-points) over which to make long tail-check (must be an odd number).
long_err_std_n : float
Number of standard deviations of combined background and drifter bias error, beyond which
data fail bias check.
short_win_len : int
Length of window (in data-points) over which to make the short tail-check.
short_err_std_n : float
Number of standard deviations of combined background and drifter error, beyond which data
are deemed suspicious.
short_win_n_bad : int
Minimum number of suspicious data points required for failure of short check window.
drif_inter : float
Spread of biases expected in drifter data (standard deviation, degC or K).
drif_intra : float
Maximum random measurement uncertainty reasonably expected in drifter data (standard deviation,
degC or K).
background_err_lim : float
Background error variance beyond which the SST background is deemed unreliable (degC squared).
"""
def __init__(
self,
lat: SequenceNumberType,
lon: SequenceNumberType,
sst: SequenceNumberType,
ostia: SequenceNumberType,
ice: SequenceNumberType,
bgvar: SequenceNumberType,
dates: SequenceDatetimeType,
long_win_len: int,
long_err_std_n: float,
short_win_len: int,
short_err_std_n: float,
short_win_n_bad: int,
drif_inter: float,
drif_intra: float,
background_err_lim: float,
) -> None:
"""
Create SSTTailChecker object to perform the SST Tail QC Check.
Parameters
----------
lat : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional latitude array in degrees.
lon : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional longitude array in degrees.
sst : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional array of sea surface temperatures in K.
ostia : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional array of background field sea surface temperatures in K.
ice : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional array of ice concentrations in the range 0.0 to 1.0.
bgvar : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional array of background sea surface temperature fields variances in K^2.
dates : :py:obj:`~marine_qc.SequenceDatetimeType`
1-dimensional date array.
long_win_len : int
Length of window (in data-points) over which to make long tail-check (must be an odd number).
long_err_std_n : float
Number of standard deviations of combined background and drifter bias error, beyond which
data fail bias check.
short_win_len : int
Length of window (in data-points) over which to make the short tail-check.
short_err_std_n : float
Number of standard deviations of combined background and drifter error, beyond which data
are deemed suspicious.
short_win_n_bad : int
Minimum number of suspicious data points required for failure of short check window.
drif_inter : float
Spread of biases expected in drifter data (standard deviation, degC or K).
drif_intra : float
Maximum random measurement uncertainty reasonably expected in drifter data (standard deviation,
degC or K).
background_err_lim : float
Background error variance beyond which the SST background is deemed unreliable (degC
squared).
"""
self.nreps = len(sst)
self.lat = np.asarray(lat, dtype=float)
self.lon = np.asarray(lon, dtype=float)
self.sst = np.asarray(sst, dtype=float)
self.ostia = np.asarray(ostia, dtype=float)
self.ice = np.asarray(ice, dtype=float)
self.bgvar = np.asarray(bgvar, dtype=float)
self.dates = np.asarray(dates, dtype=datetime)
dates_list: list[datetime] = pd.to_datetime(dates).tolist()
self.hrs = np.asarray(convert_date_to_hours(dates_list), dtype=float)
self.reps_ind: np.ndarray = np.array([], dtype=float)
self.sst_anom: np.ndarray = np.array([], dtype=float)
self.bgerr: np.ndarray = np.array([], dtype=float)
self.start_tail_ind: int
self.end_tail_ind: int
self.qc_outcomes = np.zeros(self.nreps) + untested
self.long_win_len = long_win_len
self.long_err_std_n = long_err_std_n
self.short_win_len = short_win_len
self.short_err_std_n = short_err_std_n
self.short_win_n_bad = short_win_n_bad
self.drif_inter = drif_inter
self.drif_intra = drif_intra
self.background_err_lim = background_err_lim
[docs]
def get_qc_outcomes(self) -> np.ndarray:
"""
Return the QC outcomes.
Returns
-------
array-like of int, shape (n,)
1-dimensional array containing QC flags.
"""
return self.qc_outcomes
[docs]
def valid_parameters(self) -> bool:
"""
Check the parameters are valid. Raises a warning and returns False if not valid.
Returns
-------
bool
True if parameter is valid, otherwise False.
"""
valid = False
if self.long_win_len < 1:
warnings.warn(UserWarning("Invalid long_win_len: {self.long_win_len}. Must be positive."), stacklevel=2)
elif not (self.long_win_len % 2 != 0):
warnings.warn(UserWarning("Invalid long_win_len: {self.long_win_len}. Must be an odd number."), stacklevel=2)
elif self.long_err_std_n < 0:
warnings.warn(UserWarning("Invalid long_err_std_n: {self.long_err_std_n}. Must be zero or positive."), stacklevel=2)
elif self.short_win_len < 1:
warnings.warn(UserWarning("Invalid short_win_len: {self.short_win_len}. Must be positive."), stacklevel=2)
elif self.short_err_std_n < 0:
warnings.warn(UserWarning("Invalid short_err_std_n: {self.short_err_std_n}. Must be zero or positive."), stacklevel=2)
elif self.short_win_n_bad < 1:
warnings.warn(UserWarning("Invalid short_win_n_bad: {self.short_win_n_bad}. Must be positive."), stacklevel=2)
elif self.drif_inter < 0:
warnings.warn(UserWarning("Invalid drif_inter: {self.drif_inter}. Must be zero or positive."), stacklevel=2)
elif self.drif_intra < 0:
warnings.warn(UserWarning("Invalid drif_intra: {self.drif_intra}. Must be zero or positive."), stacklevel=2)
elif self.background_err_lim < 0:
warnings.warn(UserWarning("Invalid background_err_lim: {self.background_err_lim}. Must be zero or positive."), stacklevel=2)
else:
valid = True
return valid
[docs]
def do_sst_tail_check(self, start_tail: bool) -> None:
"""
Perform the actual SST tail check.
Parameters
----------
start_tail : bool
If True flag the start of the record as failed,
otherwise flag the end of the record as failed.
"""
if not self.valid_parameters():
self.qc_outcomes[:] = untestable
return
if not is_monotonic(self.hrs):
warnings.warn(UserWarning("Times do not increase monotonically"), stacklevel=2)
self.qc_outcomes[:] = untestable
return
invalid_series = self._preprocess_reps()
if invalid_series:
self.qc_outcomes[:] = untestable
return
if len(self.sst_anom) == 0:
self.qc_outcomes[:] = passed
return
self.qc_outcomes[:] = passed
nrep = len(self.sst_anom)
self.start_tail_ind = -1 # keeps track of index where start tail stops
self.end_tail_ind = nrep # keeps track of index where end tail starts
# do long tail check - records shorter than long-window length aren't evaluated
if nrep >= self.long_win_len:
# run forwards then backwards over timeseries
self._do_long_tail_check(forward=True)
self._do_long_tail_check(forward=False)
# do short tail check on records that pass long tail check - whole record already failed long tail check
if self.start_tail_ind < self.end_tail_ind:
first_pass_ind = self.start_tail_ind + 1 # first index passing long tail check
last_pass_ind = self.end_tail_ind - 1 # last index passing long tail check
self._do_short_tail_check(first_pass_ind, last_pass_ind, forward=True)
self._do_short_tail_check(first_pass_ind, last_pass_ind, forward=False)
# now flag reps - whole record failed tail checks, don't flag
if self.start_tail_ind >= self.end_tail_ind:
self.start_tail_ind = -1
self.end_tail_ind = nrep
if not self.start_tail_ind == -1 and start_tail:
self.qc_outcomes[0 : self.reps_ind[self.start_tail_ind] + 1] = failed
if not self.end_tail_ind == nrep and not start_tail:
self.qc_outcomes[self.reps_ind[self.end_tail_ind] :] = failed
@staticmethod
def _parse_rep(
lat: float,
lon: float,
ostia: float,
ice: float,
bgvar: float,
dates: datetime,
) -> tuple[float, float, float, bool, bool]:
"""
Process a report.
Parameters
----------
lat : float
Latitude.
lon : float
Longitude.
ostia : float
OSTIA value matched to this observation.
ice : float
Ice concentration value matched to this observation.
bgvar : float
Background variance value matched to this observation.
dates : np.datetime
Date and time of the observation.
Returns
-------
(float, float, float, bool)
Background value, ice concentration, background variance, and a boolean variable indicating whether the
report is "good".
"""
invalid_ob = False
if not isvalid(ice):
ice = 0.0
if ice < 0.0 or ice > 1.0:
warnings.warn(UserWarning("Invalid ice value"), stacklevel=2)
invalid_ob = True
try:
daytime = track_day_test(
dates.year,
dates.month,
dates.day,
dates.hour + dates.minute / 60,
lat,
lon,
-2.5,
)
except ValueError as error:
warnings.warn(f"Daytime check failed with {error}", stacklevel=2)
daytime = True
invalid_ob = True
land_match = not isvalid(ostia)
ice_match = ice > 0.15
good_match = not (daytime or land_match or ice_match)
return ostia, ice, bgvar, good_match, invalid_ob
def _preprocess_reps(self) -> bool:
"""
Process the reps and calculate the values used in the QC check.
Returns
-------
bool
True if any invalid observations or background values were encountered
during preprocessing, otherwise False.
"""
invalid_series = False
# test and filter out obs with unsuitable background matches
reps_ind: list[int] = []
sst_anom: list[float] = []
bgvar: list[float] = []
for ind in range(self.nreps):
bg_val, _ice_val, bgvar_val, good_match, invalid_ob = self._parse_rep(
self.lat[ind],
self.lon[ind],
self.ostia[ind],
self.ice[ind],
self.bgvar[ind],
self.dates[ind],
)
if invalid_ob:
invalid_series = True
if good_match:
if bg_val < -5.0 or bg_val > 45.0:
warnings.warn(UserWarning("Background value is invalid"), stacklevel=2)
invalid_series = True
if bgvar_val < 0 or bgvar_val > 10.0:
warnings.warn(UserWarning("Background variance is invalid"), stacklevel=2)
invalid_series = True
reps_ind.append(ind)
sst_anom.append(self.sst[ind] - bg_val)
bgvar.append(bgvar_val)
# prepare numpy arrays and variables needed for tail checks
# indices of obs suitable for assessment
self.reps_ind = np.array(reps_ind, dtype=int)
# ob-background differences
self.sst_anom = np.array(sst_anom, dtype=float)
# standard deviation of background error
bgvar_arr = np.array(bgvar, dtype=float)
bgvar_arr[bgvar_arr < 0.0] = float("nan")
self.bgerr = np.sqrt(bgvar_arr)
return invalid_series
def _do_long_tail_check(self, forward: bool = True) -> None:
"""
Perform the long tail check.
Parameters
----------
forward : bool
Flag to set for a forward (True) or backward (False) pass of the long tail check.
"""
nrep = len(self.sst_anom)
mid_win_ind = int((self.long_win_len - 1) / 2)
if forward:
sst_anom_temp = self.sst_anom
bgerr_temp = self.bgerr
else:
sst_anom_temp = np.flipud(self.sst_anom)
bgerr_temp = np.flipud(self.bgerr)
# this is the long tail check
for ix in range(0, nrep - self.long_win_len + 1):
sst_anom_winvals = sst_anom_temp[ix : ix + self.long_win_len]
bgerr_winvals = bgerr_temp[ix : ix + self.long_win_len]
if np.any(bgerr_winvals > np.sqrt(self.background_err_lim)):
break
sst_anom_avg = trim_mean(sst_anom_winvals, 100)
sst_anom_stdev = trim_std(sst_anom_winvals, 100)
bgerr_avg = np.mean(bgerr_winvals)
bgerr_rms = np.sqrt(np.mean(bgerr_winvals**2))
if (abs(sst_anom_avg) > self.long_err_std_n * np.sqrt(self.drif_inter**2 + bgerr_avg**2)) or (
sst_anom_stdev > np.sqrt(self.drif_intra**2 + bgerr_rms**2)
):
if forward:
self.start_tail_ind = ix + mid_win_ind
else:
self.end_tail_ind = (nrep - 1) - ix - mid_win_ind
else:
break
def _do_short_tail_check(self, first_pass_ind: int, last_pass_ind: int, forward: bool = True) -> None:
"""
Perform the short tail check.
Parameters
----------
first_pass_ind : int
Index.
last_pass_ind : int
Index.
forward : bool
Flag to set for a forward (True) or backward (False) pass of the short tail check.
"""
npass = last_pass_ind - first_pass_ind + 1
if npass <= 0:
raise ValueError(f"Invalid npass: {npass}. Must be positive.")
# records shorter than short-window length aren't evaluated
if npass < self.short_win_len:
return
if forward:
sst_anom_temp = self.sst_anom[first_pass_ind : last_pass_ind + 1]
bgerr_temp = self.bgerr[first_pass_ind : last_pass_ind + 1]
else:
sst_anom_temp = np.flipud(self.sst_anom[first_pass_ind : last_pass_ind + 1])
bgerr_temp = np.flipud(self.bgerr[first_pass_ind : last_pass_ind + 1])
# this is the short tail check
for ix in range(0, npass - self.short_win_len + 1):
sst_anom_winvals = sst_anom_temp[ix : ix + self.short_win_len]
bgerr_winvals = bgerr_temp[ix : ix + self.short_win_len]
if np.any(bgerr_winvals > np.sqrt(self.background_err_lim)):
break
limit = self.short_err_std_n * np.sqrt(bgerr_winvals**2 + self.drif_inter**2 + self.drif_intra**2)
exceed_limit = np.logical_or(sst_anom_winvals > limit, sst_anom_winvals < -limit)
if np.sum(exceed_limit) >= self.short_win_n_bad:
if forward:
# if all windows have failed, flag everything
if ix == npass - self.short_win_len:
self.start_tail_ind += self.short_win_len
else:
self.start_tail_ind += 1
else:
# if all windows have failed, flag everything
if ix == npass - self.short_win_len:
self.end_tail_ind -= self.short_win_len
else:
self.end_tail_ind -= 1
else:
break
[docs]
class SSTBiasedNoisyChecker:
"""
Class used to perform the :py:func:`.do_sst_biased_check`, :py:func:`.do_sst_noisy_check`, and :py:func:`.do_sst_biased_noisy_short_check`.
Check to see whether a drifter sea surface temperature record is unacceptably biased or noisy as a whole.
The check makes an assessment of the quality of data in a drifting buoy record by comparing to a background
reference field. If the record is found to be unacceptably biased or noisy relative to the background all
observations are flagged by the check. For longer records the flags 'drf_bias' and 'drf_noise' are set for each
input report: flag=1 for records with erroneous data, else flag=0. For shorter records 'drf_short' is set for
each input report: flag=1 for reports with erroneous data, else flag=0.
When making the comparison an allowance is made for background error variance and also normal drifter error (both
bias and random measurement error). A background error variance limit is also specified, beyond which the
background is deemed unreliable and is excluded from comparison. Observations made during the day, in icy regions
or where the background value is missing are also excluded from the comparison.
The check has two separate streams; a 'long-record check' and a 'short-record check'. Records with at least
n_eval observations are passed to the long-record check, else they are passed to the short-record check. The
long-record check looks for records that are too biased or noisy as a whole. The short record check looks for
individual observations exceeding a noise limit within a record. The purpose of n_eval is to ensure records with
too few observations for their bias and noise to be reliably estimated are handled separately by the short-record
check.
The correlation of the background error is treated as unknown and handled differently for each assessment. For
the long-record noise-check and the short-record check the background error is treated as uncorrelated,
which maximises the possible impact of background error on these assessments. For the long-record bias-check a
limit (bias_lim) is specified beyond which the record is considered biased. The default value for this limit was
chosen based on histograms of drifter-background bias. An alternative approach would be to treat the background
error as entirely correlated across a long-record, which maximises its possible impact on the bias assessment. In
this case the histogram approach was used as the limit could be tuned to give better results.
Parameters
----------
lat : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional latitude array in degrees.
lon : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional longitude array in degrees.
dates : :py:obj:`~marine_qc.SequenceDatetimeType`
1-dimensional date array.
sst : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional sea surface temperature array in K.
ostia : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional background field sea surface temperature array in K.
bgvar : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional background variance array in K^2.
ice : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional ice concentration array in range 0,1.
n_eval : int
The minimum number of drifter observations required to be assessed by the long-record check.
bias_lim : float
Maximum allowable drifter-background bias, beyond which a record is considered biased (degC or K).
drif_intra : float
Maximum random measurement uncertainty reasonably expected in drifter data (standard deviation, degC or K).
drif_inter : float
Spread of biases expected in drifter data (standard deviation, degC or K).
err_std_n : float
Number of standard deviations of combined background and drifter error, beyond which
short-record data are deemed suspicious.
n_bad : int
Minimum number of suspicious data points required for failure of short-record check.
background_err_lim : float
Background error variance beyond which the SST background is deemed unreliable (degC squared or K squared).
"""
def __init__(
self,
lat: SequenceNumberType,
lon: SequenceNumberType,
dates: SequenceDatetimeType,
sst: SequenceNumberType,
ostia: SequenceNumberType,
bgvar: SequenceNumberType,
ice: SequenceNumberType,
n_eval: int,
bias_lim: float,
drif_intra: float,
drif_inter: float,
err_std_n: float,
n_bad: int,
background_err_lim: float,
) -> None:
"""
Create an object for performing the SST Biased, Noisy and Short Checks.
Parameters
----------
lat : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional latitude array in degrees.
lon : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional longitude array in degrees.
dates : :py:obj:`~marine_qc.SequenceDatetimeType`
1-dimensional date array.
sst : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional sea surface temperature array in K.
ostia : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional background field sea surface temperature array in K.
bgvar : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional background variance array in K^2.
ice : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional ice concentration array in range 0,1.
n_eval : int
The minimum number of drifter observations required to be assessed by the long-record check.
bias_lim : float
Maximum allowable drifter-background bias, beyond which a record is considered biased (degC or K).
drif_intra : float
Maximum random measurement uncertainty reasonably expected in drifter data (standard deviation, degC or K).
drif_inter : float
Spread of biases expected in drifter data (standard deviation, degC or K).
err_std_n : float
Number of standard deviations of combined background and drifter error, beyond which
short-record data are deemed suspicious.
n_bad : int
Minimum number of suspicious data points required for failure of short-record check.
background_err_lim : float
Background error variance beyond which the SST background is deemed unreliable (degC squared or K squared).
"""
self.lat = np.asarray(lat, dtype=float)
self.lon = np.asarray(lon, dtype=float)
self.dates = np.asarray(dates, dtype=datetime)
self.sst = np.asarray(sst, dtype=float)
self.ostia = np.asarray(ostia, dtype=float)
self.bgvar = np.asarray(bgvar, dtype=float)
self.ice = np.asarray(ice, dtype=float)
self.nreps = len(lat)
dates_list: list[datetime] = pd.to_datetime(dates).tolist()
self.hrs = np.asarray(convert_date_to_hours(dates_list), dtype=float)
self.n_eval = n_eval
self.bias_lim = bias_lim
self.drif_intra = drif_intra
self.drif_inter = drif_inter
self.err_std_n = err_std_n
self.n_bad = n_bad
self.background_err_lim = background_err_lim
self.sst_anom: np.ndarray = np.array([], dtype=float)
self.bgerr: np.ndarray = np.array([], dtype=float)
self.bgvar_is_masked: bool
self.qc_outcomes_bias = np.zeros(self.nreps) + untested
self.qc_outcomes_noise = np.zeros(self.nreps) + untested
self.qc_outcomes_short = np.zeros(self.nreps) + untested
[docs]
def valid_parameters(self) -> bool:
"""
Check the parameters are valid. Raises a warning and returns False if not valid.
Returns
-------
bool
True if parameter is valid, otherwise False.
"""
valid = False
if self.n_eval < 1:
warnings.warn(UserWarning("Invalid n_eval: {self.n_eval}. Must be positive."), stacklevel=2)
elif self.bias_lim < 0:
warnings.warn(UserWarning("Invalid bias_lim: {self.bias_lim}. Must be zero or positive."), stacklevel=2)
elif self.drif_inter < 0:
warnings.warn(UserWarning("Invalid drif_inter: {self.drif_inter}. Must be zero or positive."), stacklevel=2)
elif self.drif_intra < 0:
warnings.warn(UserWarning("Invalid drif_intra: {self.drif_intra}. Must be zero or positive."), stacklevel=2)
elif self.err_std_n < 0:
warnings.warn(UserWarning("Invalid err_std_n: {self.err_std_n}. Must be zero or positive."), stacklevel=2)
elif self.n_bad < 1:
warnings.warn(UserWarning("Invalid n_bad: {self.n_bad}. Must be positive."), stacklevel=2)
elif self.background_err_lim < 0:
warnings.warn(UserWarning("Invalid background_err_lim: {self.background_err_lim}. Must be zero or positive."), stacklevel=2)
else:
valid = True
return valid
[docs]
def get_qc_outcomes_bias(self) -> np.ndarray:
"""
Return the QC outcomes for the bias check.
Returns
-------
array-like of int, shape (n,)
1-dimensional array containing QC flags.
"""
return self.qc_outcomes_bias
[docs]
def get_qc_outcomes_noise(self) -> np.ndarray:
"""
Return the QC outcomes for the noisy check.
Returns
-------
array-like of int, shape (n,)
1-dimensional array containing QC flags.
"""
return self.qc_outcomes_noise
[docs]
def get_qc_outcomes_short(self) -> np.ndarray:
"""
Return the QC outcomes for the short check.
Returns
-------
array-like of int, shape (n,)
1-dimensional array containing QC flags.
"""
return self.qc_outcomes_short
[docs]
def set_all_qc_outcomes_to(self, input_state: int) -> None:
"""
Set all the QC outcomes to the specified input_state.
Parameters
----------
input_state : int
QC flag to map to the QC outcomes.
"""
if input_state not in [
passed,
failed,
untested,
untestable,
]:
raise ValueError(f"Invalid input_state: {input_state}. Must be one of [{passed}, {failed}, {untested}, {untestable}].")
self.qc_outcomes_short[:] = input_state
self.qc_outcomes_noise[:] = input_state
self.qc_outcomes_bias[:] = input_state
[docs]
def do_sst_biased_noisy_check(self) -> None:
"""Perform the bias/noise check QC."""
if not self.valid_parameters():
self.set_all_qc_outcomes_to(untestable)
return
invalid_series = self._preprocess_reps()
if invalid_series:
self.set_all_qc_outcomes_to(untestable)
return
long_record = not (len(self.sst_anom) < self.n_eval)
self.set_all_qc_outcomes_to(passed)
if long_record:
self._long_record_qc()
else:
if not self.bgvar_is_masked:
self._short_record_qc()
@staticmethod
def _parse_rep(
lat: float,
lon: float,
ostia: float,
ice: float,
bgvar: float,
dates: datetime,
background_err_lim: float,
) -> tuple[float, float, float, bool, bool, bool]:
"""
Extract QC-relevant variables from a marine report.
Parameters
----------
lat : float
Latitude of the observation to be parsed.
lon : float
Longitude of the observation to be parsed.
ostia : float
Background SST field value.
ice : float
Ice concentration field value.
bgvar : float
Background variance field value.
dates : datetime
Date and time of the observation to be parsed.
background_err_lim : float
Background error variance beyond which the SST background is deemed unreliable (degC squared or K squared).
Returns
-------
float, float, float, bool, bool, bool
Returns the background SST value, ice value, background SST variance, a flag that indicates a good match,
and a flag that indicates if the background variance is valid, and a flag that indicates if
the observation is valid overall.
"""
invalid_ob = False
if not isvalid(ice):
ice = 0.0
if ice < 0.0 or ice > 1.0:
warnings.warn(UserWarning("Invalid ice value"), stacklevel=2)
invalid_ob = True
try:
daytime = track_day_test(
dates.year,
dates.month,
dates.day,
dates.hour + dates.minute / 60,
lat,
lon,
-2.5,
)
except ValueError as error:
warnings.warn(f"Daytime check failed with {error}", stacklevel=2)
daytime = True
invalid_ob = True
land_match = not isvalid(ostia)
ice_match = ice > 0.15
bgvar_mask = bgvar is not None and bgvar > background_err_lim
good_match = not (daytime or land_match or ice_match or bgvar_mask)
return ostia, ice, bgvar, good_match, bgvar_mask, invalid_ob
def _preprocess_reps(self) -> bool:
"""
Fill SST anomalies, background errors, and flags for invalid background values.
This method processes each observation to compute sea surface temperature (SST)
anomalies, background error standard deviations, and flags any missing or invalid
background values. It also checks whether the time series is sorted and sets
a mask flag if any background variances are masked.
Returns
-------
bool
True if any invalid observations or background values were encountered
during preprocessing, otherwise False.
"""
invalid_series = False
# test and filter out obs with unsuitable background matches
sst_anom = []
bgvar = []
bgvar_is_masked = False
for ind in range(self.nreps):
bg_val, _ice_val, bgvar_val, good_match, bgvar_mask, invalid_ob = SSTBiasedNoisyChecker._parse_rep(
self.lat[ind],
self.lon[ind],
self.ostia[ind],
self.ice[ind],
self.bgvar[ind],
self.dates[ind],
self.background_err_lim,
)
if invalid_ob:
invalid_series = True
if bgvar_mask:
bgvar_is_masked = True
if good_match:
if bg_val < -5.0 or bg_val > 45.0:
warnings.warn(UserWarning("Background value is invalid"), stacklevel=2)
invalid_series = True
if bgvar_val < 0 or bgvar_val > 10.0:
warnings.warn(UserWarning("Background variance is invalid"), stacklevel=2)
invalid_series = True
sst_anom.append(self.sst[ind] - bg_val)
bgvar.append(bgvar_val)
# prepare numpy arrays and variables needed for checks
self.sst_anom = np.array(sst_anom) # ob-background differences
bgvar_arr = np.array(bgvar)
bgvar_arr[bgvar_arr < 0] = np.nan
self.bgerr = np.sqrt(np.array(bgvar_arr)) # standard deviation of background error
self.bgvar_is_masked = bgvar_is_masked
if not is_monotonic(self.hrs):
warnings.warn(UserWarning("Not sorted in time order"), stacklevel=2)
invalid_series = True
return invalid_series
def _long_record_qc(self) -> None:
"""Perform the long record check."""
sst_anom_avg = np.mean(self.sst_anom)
sst_anom_stdev = np.std(self.sst_anom)
bgerr_rms = np.sqrt(np.mean(self.bgerr**2))
self.qc_outcomes_bias[:] = passed
if abs(sst_anom_avg) > self.bias_lim:
self.qc_outcomes_bias[:] = failed
self.qc_outcomes_noise[:] = passed
if sst_anom_stdev > np.sqrt(self.drif_intra**2 + bgerr_rms**2):
self.qc_outcomes_noise[:] = failed
def _short_record_qc(self) -> None:
"""Perform the short record check."""
# Calculate the limit based on the combined uncertainties (background error, drifter inter and drifter intra
# error) and then multiply by the err_std_n
limit = self.err_std_n * np.sqrt(self.bgerr**2 + self.drif_inter**2 + self.drif_intra**2)
# If the number of obs outside the limit exceed n_bad then flag them all as bad
self.qc_outcomes_short[:] = passed
exceed_limit = np.logical_or(self.sst_anom > limit, self.sst_anom < -limit)
if np.sum(exceed_limit) >= self.n_bad:
self.qc_outcomes_short[:] = failed
[docs]
@inspect_arrays(["lons", "lats", "dates"])
def do_speed_check(
lons: SequenceNumberType,
lats: SequenceNumberType,
dates: SequenceDatetimeType,
speed_limit: float,
min_win_period: float,
max_win_period: float,
) -> np.ndarray:
"""
Perform the Track QC speed check.
Parameters
----------
lons : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional longitude array in degrees.
lats : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional latitude array in degrees.
dates : :py:obj:`~marine_qc.SequenceDatetimeType`
1-dimensional date array.
speed_limit : float
Maximum allowable speed for an in situ drifting buoy (metres per second).
min_win_period : float
Minimum period of time in days over which position is assessed for speed estimates (see
description).
max_win_period : float
Maximum period of time in days over which position is assessed for speed estimates
(this should be greater than min_win_period and allow for some erratic temporal sampling e.g.
min_win_period + 0.2 to allow for gaps of up to 0.2 - days in sampling).
Returns
-------
array-like of int, shape (n,)
1-dimensional array containing QC flags.
1 if speed check fails, 0 otherwise.
Raises
------
TypeError
If `inspect_arrays` does not return np.ndarrays.
Notes
-----
In previous versions, default values for the parameters were:
* speed_limit = 2.5
* min_win_period = 0.8
* max_win_perido = 1.8
"""
lons_arr, lats_arr, dates_arr = ensure_arrays(lons=lons, lats=lats, dates=dates)
checker = SpeedChecker(lons_arr, lats_arr, dates_arr, speed_limit, min_win_period, max_win_period)
checker.do_speed_check()
return checker.get_qc_outcomes()
[docs]
@inspect_arrays(["lons", "lats", "dates"])
def do_new_speed_check(
lons: SequenceNumberType,
lats: SequenceNumberType,
dates: SequenceDatetimeType,
speed_limit: float,
min_win_period: float,
ship_speed_limit: float,
delta_d: float,
delta_t: float,
n_neighbours: int,
) -> np.ndarray:
"""
Perform the new speed check.
Parameters
----------
lons : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional longitude array in degrees.
lats : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional latitude array in degrees.
dates : :py:obj:`~marine_qc.SequenceDatetimeType`
1-dimensional date array.
speed_limit : float
Maximum allowable speed for an in situ drifting buoy (metres per second).
min_win_period : float
Minimum period of time in days over which position is assessed for speed estimates (see description).
ship_speed_limit : float
Ship speed limit for the IQUAM track check.
delta_d : float
The smallest increment in distance that can be resolved. For 0.01 degrees of lat-lon this is 1.11 km. Used
in the IQUAM track check.
delta_t : float
The smallest increment in time that can be resolved. For hourly data expressed as a float this is 0.01 hours.
Used in the IQUAM track check.
n_neighbours : int
Number of neighbours considered in the IQUAM track check.
Returns
-------
array-like of int, shape (n,)
Array containing the QC outcomes for the new speed check.
Raises
------
TypeError
If `inspect_arrays` does not return np.ndarrays.
Notes
-----
In previous versions, default values for the parameters were:
* speed_limit = 3.0
* min_win_period = 0.375
And, for the IQUAM-specific parameters:
* ship_speed_limit = 60.0
* delta_d = 1.11
* delta_t = 0.01
* n_neighbours = 5
"""
lons_arr, lats_arr, dates_arr = ensure_arrays(lons=lons, lats=lats, dates=dates)
checker = NewSpeedChecker(
lons_arr,
lats_arr,
dates_arr,
speed_limit,
min_win_period,
ship_speed_limit,
delta_d,
delta_t,
n_neighbours,
)
checker.do_new_speed_check()
return checker.get_qc_outcomes()
[docs]
@inspect_arrays(["lons", "lats", "dates"])
def do_aground_check(
lons: SequenceNumberType,
lats: SequenceNumberType,
dates: SequenceDatetimeType,
smooth_win: int,
min_win_period: int,
max_win_period: int,
) -> np.ndarray:
"""
Perform the aground check.
Parameters
----------
lons : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional longitude array in degrees.
lats : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional latitude array in degrees.
dates : :py:obj:`~marine_qc.SequenceDatetimeType`
1-dimensional date array.
smooth_win : int
Length of window (odd number) in datapoints used for smoothing lon/lat.
min_win_period : int
Minimum period of time in days over which position is assessed for no movement (see description).
max_win_period : int or None
Maximum period of time in days over which position is assessed for no movement (this should be greater
than min_win_period and allow for erratic temporal sampling e.g. min_win_period+2 to allow for gaps of
up to 2-days in sampling).
Returns
-------
array-like of int, shape (n,)
1-dimensional array containing QC flags.
1 if aground check fails, 0 otherwise.
Raises
------
TypeError
If `inspect_arrays` does not return np.ndarrays.
Notes
-----
In previous versions, default values for the parameters were:
* smooth_win = 41
* min_win_period = 8
* max_win_period = 10
"""
lats_arr, lons_arr, dates_arr = ensure_arrays(lats=lats, lons=lons, dates=dates)
checker = AgroundChecker(lons_arr, lats_arr, dates_arr, smooth_win, min_win_period, max_win_period)
checker.do_aground_check()
return checker.get_qc_outcomes()
[docs]
@inspect_arrays(["lons", "lats", "dates"])
def do_new_aground_check(
lons: SequenceNumberType,
lats: SequenceNumberType,
dates: SequenceDatetimeType,
smooth_win: int,
min_win_period: int,
) -> np.ndarray:
"""
Perform the new aground check.
Parameters
----------
lons : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional longitude array in degrees.
lats : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional latitude array in degrees.
dates : :py:obj:`~marine_qc.SequenceDatetimeType`
1-dimensional date array.
smooth_win : int
Length of window (odd number) in datapoints used for smoothing lon/lat.
min_win_period : int
Minimum period of time in days over which position is assessed for no movement (see description).
Returns
-------
array-like of int, shape (n,)
1-dimensional array containing QC flags.
1 if new aground check fails, 0 otherwise.
Raises
------
TypeError
If `inspect_arrays` does not return np.ndarrays.
Notes
-----
In previous versions, default values for the parameters were:
* smooth_win = 41
* min_win_period = 8
"""
lats_arr, lons_arr, dates_arr = ensure_arrays(lats=lats, lons=lons, dates=dates)
checker = AgroundChecker(lons_arr, lats_arr, dates_arr, smooth_win, min_win_period, None)
checker.do_aground_check()
return checker.get_qc_outcomes()
[docs]
@inspect_arrays(["lats", "lons", "sst", "ostia", "ice", "bgvar", "dates"])
def do_sst_start_tail_check(
lons: SequenceNumberType,
lats: SequenceNumberType,
dates: SequenceDatetimeType,
sst: SequenceNumberType,
ostia: SequenceNumberType,
ice: SequenceNumberType,
bgvar: SequenceNumberType,
long_win_len: int,
long_err_std_n: float,
short_win_len: int,
short_err_std_n: float,
short_win_n_bad: int,
drif_inter: float,
drif_intra: float,
background_err_lim: float,
) -> np.ndarray:
"""
Perform the SST Start Tail Check.
Parameters
----------
lons : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional longitude array in degrees.
lats : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional latitude array in degrees.
dates : :py:obj:`~marine_qc.SequenceDatetimeType`
1-dimensional date array.
sst : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional array of sea surface temperatures in K.
ostia : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional array of background field sea surface temperatures in K.
ice : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional array of ice concentrations in the range 0.0 to 1.0.
bgvar : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional array of background sea surface temperature fields variances in K^2.
long_win_len : int
Length of window (in data-points) over which to make long tail-check (must be an odd number).
long_err_std_n : float
Number of standard deviations of combined background and drifter bias error, beyond which
data fail bias check.
short_win_len : int
Length of window (in data-points) over which to make the short tail-check.
short_err_std_n : float
Number of standard deviations of combined background and drifter error, beyond which data
are deemed suspicious.
short_win_n_bad : int
Minimum number of suspicious data points required for failure of short check window.
drif_inter : float
Spread of biases expected in drifter data (standard deviation, degC or K).
drif_intra : float
Maximum random measurement uncertainty reasonably expected in drifter data (standard deviation,
degC or K).
background_err_lim : float
Background error variance beyond which the SST background is deemed unreliable (degC
squared).
Returns
-------
array-like of int, shape (n,)
1-dimensional array containing QC flags.
1 if SST start tail check fails, 0 otherwise.
Raises
------
TypeError
If `inspect_arrays` does not return np.ndarrays.
Notes
-----
In previous versions, default values for the parameters were:
* long_win_len = 121
* long_err_std_n = 3.0
* short_win_len = 30
* short_err_std_n = 3.0
* short_win_n_bad = 2
* drif_inter = 0.29
* drif_intra = 1.00
* background_err_lim = 0.3
"""
lats_arr, lons_arr, sst_arr, ostia_arr, ice_arr, bgvar_arr, dates_arr = ensure_arrays(
lats=lats, lons=lons, sst=sst, ostia=ostia, ice=ice, bgvar=bgvar, dates=dates
)
checker = SSTTailChecker(
lats_arr,
lons_arr,
sst_arr,
ostia_arr,
ice_arr,
bgvar_arr,
dates_arr,
long_win_len,
long_err_std_n,
short_win_len,
short_err_std_n,
short_win_n_bad,
drif_inter,
drif_intra,
background_err_lim,
)
checker.do_sst_tail_check(True)
return checker.get_qc_outcomes()
[docs]
@inspect_arrays(["lats", "lons", "sst", "ostia", "ice", "bgvar", "dates"])
def do_sst_end_tail_check(
lons: SequenceNumberType,
lats: SequenceNumberType,
dates: SequenceDatetimeType,
sst: SequenceNumberType,
ostia: SequenceNumberType,
ice: SequenceNumberType,
bgvar: SequenceNumberType,
long_win_len: int,
long_err_std_n: float,
short_win_len: int,
short_err_std_n: float,
short_win_n_bad: int,
drif_inter: float,
drif_intra: float,
background_err_lim: float,
) -> np.ndarray:
"""
Perform the SST Start Tail Check.
Parameters
----------
lons : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional longitude array in degrees.
lats : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional latitude array in degrees.
dates : :py:obj:`~marine_qc.SequenceDatetimeType`
1-dimensional date array.
sst : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional array of sea surface temperatures in K.
ostia : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional array of background field sea surface temperatures in K.
ice : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional array of ice concentrations in the range 0.0 to 1.0.
bgvar : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional array of background sea surface temperature fields variances in K^2.
long_win_len : int
Length of window (in data-points) over which to make long tail-check (must be an odd number).
long_err_std_n : float
Number of standard deviations of combined background and drifter bias error, beyond which
data fail bias check.
short_win_len : int
Length of window (in data-points) over which to make the short tail-check.
short_err_std_n : float
Number of standard deviations of combined background and drifter error, beyond which data
are deemed suspicious.
short_win_n_bad : int
Minimum number of suspicious data points required for failure of short check window.
drif_inter : float
Spread of biases expected in drifter data (standard deviation, degC or K).
drif_intra : float
Maximum random measurement uncertainty reasonably expected in drifter data (standard deviation,
degC or K).
background_err_lim : float
Background error variance beyond which the SST background is deemed unreliable (degC
squared).
Returns
-------
array-like of int, shape (n,)
1-dimensional array containing QC flags.
1 if SST start tail check fails, 0 otherwise.
Raises
------
TypeError
If `inspect_arrays` does not return np.ndarrays.
Notes
-----
In previous versions, default values for the parameters were:
* long_win_len = 121
* long_err_std_n = 3.0
* short_win_len = 30
* short_err_std_n = 3.0
* short_win_n_bad = 2
* drif_inter = 0.29
* drif_intra = 1.00
* background_err_lim = 0.3
"""
lats_arr, lons_arr, sst_arr, ostia_arr, ice_arr, bgvar_arr, dates_arr = ensure_arrays(
lats=lats, lons=lons, sst=sst, ostia=ostia, ice=ice, bgvar=bgvar, dates=dates
)
checker = SSTTailChecker(
lats_arr,
lons_arr,
sst_arr,
ostia_arr,
ice_arr,
bgvar_arr,
dates_arr,
long_win_len,
long_err_std_n,
short_win_len,
short_err_std_n,
short_win_n_bad,
drif_inter,
drif_intra,
background_err_lim,
)
checker.do_sst_tail_check(False)
return checker.get_qc_outcomes()
[docs]
@inspect_arrays(["lats", "lons", "dates", "sst", "ostia", "bgvar", "ice"])
def do_sst_biased_check(
lons: SequenceNumberType,
lats: SequenceNumberType,
dates: SequenceDatetimeType,
sst: SequenceNumberType,
ostia: SequenceNumberType,
ice: SequenceNumberType,
bgvar: SequenceNumberType,
n_eval: int,
bias_lim: float,
drif_intra: float,
drif_inter: float,
err_std_n: float,
n_bad: int,
background_err_lim: float,
) -> np.ndarray:
"""
Perform the SST bias check.
Parameters
----------
lons : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional longitude array in degrees.
lats : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional latitude array in degrees.
dates : :py:obj:`~marine_qc.SequenceDatetimeType`
1-dimensional date array.
sst : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional array of sea surface temperatures in K.
ostia : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional array of background field sea surface temperatures in K.
ice : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional array of ice concentrations in the range 0.0 to 1.0.
bgvar : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional array of background sea surface temperature fields variances in K^2.
n_eval : int
The minimum number of drifter observations required to be assessed by the long-record check.
bias_lim : float
Maximum allowable drifter-background bias, beyond which a record is considered biased (degC or K).
drif_intra : float
Maximum random measurement uncertainty reasonably expected in drifter data (standard deviation, degC or K).
drif_inter : float
Spread of biases expected in drifter data (standard deviation, degC or K).
err_std_n : float
Number of standard deviations of combined background and drifter error, beyond which short-record data are
deemed suspicious.
n_bad : int
Minimum number of suspicious data points required for failure of short-record check.
background_err_lim : float
Background error variance beyond which the SST background is deemed unreliable (degC squared or K squared).
Returns
-------
array-like of int, shape (n,)
1-dimensional array containing QC flags.
1 if SST bias check fails, 0 otherwise.
Raises
------
TypeError
If `inspect_arrays` does not return np.ndarrays.
Notes
-----
In previous versions, default values for the parameters were:
* n_eval = 30
* bias_lim = 1.10
* drif_intra = 1.0
* drif_inter = 0.29
* err_std_n = 3.0
* n_bad = 2
* background_err_lim = 0.3
"""
lats_arr, lons_arr, sst_arr, ostia_arr, ice_arr, bgvar_arr, dates_arr = ensure_arrays(
lats=lats, lons=lons, sst=sst, ostia=ostia, ice=ice, bgvar=bgvar, dates=dates
)
checker = SSTBiasedNoisyChecker(
lats_arr,
lons_arr,
dates_arr,
sst_arr,
ostia_arr,
bgvar_arr,
ice_arr,
n_eval,
bias_lim,
drif_intra,
drif_inter,
err_std_n,
n_bad,
background_err_lim,
)
checker.do_sst_biased_noisy_check()
return checker.get_qc_outcomes_bias()
[docs]
@inspect_arrays(["lats", "lons", "dates", "sst", "ostia", "bgvar", "ice"])
def do_sst_noisy_check(
lons: SequenceNumberType,
lats: SequenceNumberType,
dates: SequenceDatetimeType,
sst: SequenceNumberType,
ostia: SequenceNumberType,
ice: SequenceNumberType,
bgvar: SequenceNumberType,
n_eval: int,
bias_lim: float,
drif_intra: float,
drif_inter: float,
err_std_n: float,
n_bad: int,
background_err_lim: float,
) -> np.ndarray:
"""
Perform the SST noise check.
Parameters
----------
lons : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional longitude array in degrees.
lats : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional latitude array in degrees.
dates : :py:obj:`~marine_qc.SequenceDatetimeType`
1-dimensional date array.
sst : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional array of sea surface temperatures in K.
ostia : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional array of background field sea surface temperatures in K.
ice : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional array of ice concentrations in the range 0.0 to 1.0.
bgvar : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional array of background sea surface temperature fields variances in K^2.
n_eval : int
The minimum number of drifter observations required to be assessed by the long-record check.
bias_lim : float
Maximum allowable drifter-background bias, beyond which a record is considered biased (degC or K).
drif_intra : float
Maximum random measurement uncertainty reasonably expected in drifter data (standard deviation, degC or K).
drif_inter : float
Spread of biases expected in drifter data (standard deviation, degC or K).
err_std_n : float
Number of standard deviations of combined background and drifter error, beyond which short-record data are
deemed suspicious.
n_bad : int
Minimum number of suspicious data points required for failure of short-record check.
background_err_lim : float
Background error variance beyond which the SST background is deemed unreliable (degC squared or K squared).
Returns
-------
array-like of int, shape (n,)
1-dimensional array containing QC flags.
1 if SST noise check fails, 0 otherwise.
Raises
------
TypeError
If `inspect_arrays` does not return np.ndarrays.
Notes
-----
In previous versions, default values for the parameters were:
* n_eval = 30
* bias_lim = 1.10
* drif_intra = 1.0
* drif_inter = 0.29
* err_std_n = 3.0
* n_bad = 2
* background_err_lim = 0.3
"""
lats_arr, lons_arr, sst_arr, ostia_arr, ice_arr, bgvar_arr, dates_arr = ensure_arrays(
lats=lats, lons=lons, sst=sst, ostia=ostia, ice=ice, bgvar=bgvar, dates=dates
)
checker = SSTBiasedNoisyChecker(
lats_arr,
lons_arr,
dates_arr,
sst_arr,
ostia_arr,
bgvar_arr,
ice_arr,
n_eval,
bias_lim,
drif_intra,
drif_inter,
err_std_n,
n_bad,
background_err_lim,
)
checker.do_sst_biased_noisy_check()
return checker.get_qc_outcomes_noise()
[docs]
@inspect_arrays(["lats", "lons", "dates", "sst", "ostia", "bgvar", "ice"])
def do_sst_biased_noisy_short_check(
lons: SequenceNumberType,
lats: SequenceNumberType,
dates: SequenceDatetimeType,
sst: SequenceNumberType,
ostia: SequenceNumberType,
ice: SequenceNumberType,
bgvar: SequenceNumberType,
n_eval: int,
bias_lim: float,
drif_intra: float,
drif_inter: float,
err_std_n: float,
n_bad: int,
background_err_lim: float,
) -> np.ndarray:
"""
Perform the SST short check.
Parameters
----------
lons : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional longitude array in degrees.
lats : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional latitude array in degrees.
dates : :py:obj:`~marine_qc.SequenceDatetimeType`
1-dimensional date array.
sst : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional array of sea surface temperatures in K.
ostia : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional array of background field sea surface temperatures in K.
ice : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional array of ice concentrations in the range 0.0 to 1.0.
bgvar : :py:obj:`~marine_qc.SequenceNumberType`
1-dimensional array of background sea surface temperature fields variances in K^2.
n_eval : int
The minimum number of drifter observations required to be assessed by the long-record check.
bias_lim : float
Maximum allowable drifter-background bias, beyond which a record is considered biased (degC or K).
drif_intra : float
Maximum random measurement uncertainty reasonably expected in drifter data (standard deviation, degC or K).
drif_inter : float
Spread of biases expected in drifter data (standard deviation, degC or K).
err_std_n : float
Number of standard deviations of combined background and drifter error, beyond which short-record data are
deemed suspicious.
n_bad : int
Minimum number of suspicious data points required for failure of short-record check.
background_err_lim : float
Background error variance beyond which the SST background is deemed unreliable (degC squared or K squared).
Returns
-------
array-like of int, shape (n,)
1-dimensional array containing QC flags.
1 if SST short check fails, 0 otherwise.
Raises
------
TypeError
If `inspect_arrays` does not return np.ndarrays.
Notes
-----
In previous versions, default values for the parameters were:
* n_eval = 30
* bias_lim = 1.10
* drif_intra = 1.0
* drif_inter = 0.29
* err_std_n = 3.0
* n_bad = 2
* background_err_lim = 0.3
"""
lats_arr, lons_arr, sst_arr, ostia_arr, ice_arr, bgvar_arr, dates_arr = ensure_arrays(
lats=lats, lons=lons, sst=sst, ostia=ostia, ice=ice, bgvar=bgvar, dates=dates
)
checker = SSTBiasedNoisyChecker(
lats_arr,
lons_arr,
dates_arr,
sst_arr,
ostia_arr,
bgvar_arr,
ice_arr,
n_eval,
bias_lim,
drif_intra,
drif_inter,
err_std_n,
n_bad,
background_err_lim,
)
checker.do_sst_biased_noisy_check()
return checker.get_qc_outcomes_short()