"""Download and process GloFAS forecast and reforecast river discharge data."""
import logging
from datetime import date
from pathlib import Path
from typing import Union
import xarray as xr
from dateutil import rrule
from ochanticipy.config.countryconfig import CountryConfig
from ochanticipy.datasources.glofas import glofas
from ochanticipy.utils.check_file_existence import check_file_existence
from ochanticipy.utils.geoboundingbox import GeoBoundingBox
logger = logging.getLogger(__name__)
[docs]
class GlofasForecast(glofas.Glofas):
"""
Class for downloading and processing GloFAS forecast data.
The GloFAS forecast dataset is a global raster presenting river
discharge forecast from 26 May 2021 until present day (updated daily), see
`this paper <https://hess.copernicus.org/preprints/hess-2020-532/>`__
for more details. While CDS does have version 3 pre-release data
from 2020-2021,
we understand that there were some small issues that were fixed
in the final version, so at this point in time this module
does not support downloading the pre-release data.
This class downloads the raw raster data
`from CDS
<https://cds.climate.copernicus.eu/cdsapp#!/dataset/cems-glofas-forecast?tab=overview>`__,
and processes it from a raster to a datasets of reporting points from the
`GloFAS interface
<https://www.globalfloods.eu/glofas-forecasting/>`_.
Due to the CDS request size limits, separate files are downloaded per
day (that contain all requested lead times).
Parameters
----------
country_config : CountryConfig
Country configuration
geo_bounding_box: GeoBoundingBox
The bounding coordinates of the area that should be included
leadtime_max: int
The maximum desired lead time D in days. All forecast data for lead
times 1 to D days are downloaded
start_date : Union[date, str], default: date(year=2021, month=5, day=26)
The starting date for the dataset. If left blank, defaults to the
earliest available date
end_date : Union[date, str], default: date.today()
The ending date for the dataset. If left blank, defaults to
the current date
model_version : int, default: 4
The version of the GloFAS model to use, can only be 3 or 4.
If in doubt, always use the latest (default).
Examples
--------
Download, process and load GloFAS forecast data for the past month,
for a lead time of 15 days.
>>> from datetime import date
>>> from ochanticipy import create_country_config, CodAB, GeoBoundingBox,
... GlofasForecast
>>>
>>> country_config = create_country_config(iso3="npl")
>>> codab = CodAB(country_config=country_config)
>>> codab.download()
>>> admin_npl = codab.load()
>>> geo_bounding_box = GeoBoundingBox.from_shape(admin_npl)
>>>
>>> glofas_forecast = GlofasForecast(
... country_config=country_config,
... geo_bounding_box=geo_bounding_box,
... leadtime_max=15,
... end_date=date(year=2022, month=10, day=22),
... start_date=date(year=2022, month=9, day=22)
... )
>>> glofas_forecast.download()
>>> glofas_forecast.process()
>>>
>>> npl_glofas_forecast_reporting_points = glofas_forecast.load()
"""
def __init__(
self,
country_config: CountryConfig,
geo_bounding_box: GeoBoundingBox,
leadtime_max: int,
start_date: Union[date, str] = None,
end_date: Union[date, str] = None,
model_version: int = glofas.DEFAULT_MODEL_VERSION,
):
super().__init__(
country_config=country_config,
geo_bounding_box=geo_bounding_box,
start_date=start_date,
end_date=end_date,
cds_name="cems-glofas-forecast",
model_version=model_version,
product_type=["control_forecast", "ensemble_perturbed_forecasts"],
date_variable_prefix="",
frequency=rrule.DAILY,
coord_names=["time", "number", "step"],
leadtime_max=leadtime_max,
start_date_min=date(year=2021, month=5, day=26),
)
@staticmethod
def _system_version_dict() -> glofas.SystemVersions:
system_versions = glofas.SystemVersions()
system_versions[3] = "version_3_1"
system_versions[4] = "operational"
return system_versions
@check_file_existence
def _load_single_file(
self, input_filepath: Path, filepath: Path, clobber: bool
) -> xr.Dataset:
return _read_in_ensemble_and_perturbed_datasets(
filepath=input_filepath
).expand_dims("time")
[docs]
class GlofasReforecast(glofas.Glofas):
"""
Class for downloading and processing GloFAS reforecast data.
The GloFAS reforecast dataset is a global raster presenting river
discharge forecasted from 1999 until 2018, see
`this paper <https://hess.copernicus.org/preprints/hess-2020-532/>`_
for more details.
This class downloads the raw raster data
`from CDS
<https://cds.climate.copernicus.eu/cdsapp#!/dataset/cems-glofas-reforecast?tab=overview>`__,
and processes it from a raster to a datasets of reporting points from the
`GloFAS interface
<https://www.globalfloods.eu/glofas-forecasting/>`_.
Due to the CDS request size limits, separate files are downloaded per
month (that contain all requested lead times).
Parameters
----------
country_config : CountryConfig
Country configuration
geo_bounding_box: GeoBoundingBox
The bounding coordinates of the area that should be included
leadtime_max: int
The maximum desired lead time D in days. All forecast data for lead
times 1 to D days are downloaded
start_date : Union[date, str], default: date(year=1999, month=1, day=1)
The starting date for the dataset. If left blank, defaults to the
earliest available date
end_date : Union[date, str], default: date(year=2018, month=12, day=31)
The ending date for the dataset. If left blank, defaults to the
last available date
model_version : int, default: 4
The version of the GloFAS model to use, can only be 3 or 4.
If in doubt, always use the latest (default).
Examples
--------
Download, process and load all available GloFAS reforecast data
for a lead time of 15 days.
>>> from ochanticipy import create_country_config, CodAB, GeoBoundingBox,
... GlofasReforecast
>>>
>>> country_config = create_country_config(iso3="npl")
>>> codab = CodAB(country_config=country_config)
>>> codab.download()
>>> admin_npl = codab.load()
>>> geo_bounding_box = GeoBoundingBox.from_shape(admin_npl)
>>>
>>> glofas_reforecast = GlofasReforecast(
... country_config=country_config,
... geo_bounding_box=geo_bounding_box,
... leadtime_max=15
... )
>>> glofas_reforecast.download()
>>> glofas_reforecast.process()
>>>
>>> npl_glofas_reforecast_reporting_points = glofas_reforecast.load()
"""
def __init__(
self,
country_config: CountryConfig,
geo_bounding_box: GeoBoundingBox,
leadtime_max: int,
start_date: Union[date, str] = None,
end_date: Union[date, str] = None,
model_version: int = glofas.DEFAULT_MODEL_VERSION,
):
# Unfortunately start date min and max, as well as months,
# depend on the model version. If this were more complicated
# then it may make sense to use a factory, but that would change
# how the constructor is called so trying to avoid it for now.
if model_version == 3:
start_date_min = date(year=1999, month=1, day=1)
end_date_max = date(year=2018, month=12, day=31)
month_list = None
elif model_version == 4:
start_date_min = date(year=2003, month=3, day=1)
end_date_max = date(year=2022, month=8, day=31)
month_list = list(range(3, 9)) # March to August
else:
raise ValueError("Model version must be 3 or 4")
super().__init__(
country_config=country_config,
geo_bounding_box=geo_bounding_box,
start_date=start_date,
end_date=end_date,
model_version=model_version,
cds_name="cems-glofas-reforecast",
product_type=[
"control_reforecast",
"ensemble_perturbed_reforecasts",
],
date_variable_prefix="h",
frequency=rrule.MONTHLY,
coord_names=["number", "time", "step"],
leadtime_max=leadtime_max,
start_date_min=start_date_min,
end_date_max=end_date_max,
month_list=month_list,
)
@staticmethod
def _system_version_dict() -> glofas.SystemVersions:
system_versions = glofas.SystemVersions()
system_versions[3] = "version_3_1"
system_versions[4] = "version_4_0"
return system_versions
@check_file_existence
def _load_single_file(
self, input_filepath: Path, filepath: Path, clobber: bool
) -> xr.Dataset:
return _read_in_ensemble_and_perturbed_datasets(
filepath=input_filepath
)
def _read_in_ensemble_and_perturbed_datasets(filepath: Path):
"""Read in forecast and reforecast data.
The GloFAS forecast and reforecast data GRIB files contain two
separate datasets: the control member, generated from the most accurate
estimate of current conditions, and the perturbed forecast, which
contains N ensemble members created by perturbing the control forecast.
This function reads in both datasets and creates an N+1 (perturbed
+ control) ensemble.
See `this paper <https://hess.copernicus.org/preprints/hess-2020-532/>`__
for more details.
"""
ds_list = []
for data_type in ["cf", "pf"]:
ds = xr.load_dataset(
filepath,
engine="cfgrib",
backend_kwargs={
"indexpath": "",
"filter_by_keys": {"dataType": data_type},
},
)
# Extra processing require for control forecast
if data_type == "cf":
ds = ds.expand_dims(dim="number")
ds_list.append(ds)
return xr.combine_by_coords(ds_list, combine_attrs="drop_conflicts")