Source code for ochanticipy.datasources.glofas.forecast

"""Download and process GloFAS forecast and reforecast river discharge data."""
import logging
from datetime import date
from pathlib import Path
from typing import Union

import xarray as xr
from dateutil import rrule

from ochanticipy.config.countryconfig import CountryConfig
from ochanticipy.datasources.glofas import glofas
from ochanticipy.utils.check_file_existence import check_file_existence
from ochanticipy.utils.geoboundingbox import GeoBoundingBox

logger = logging.getLogger(__name__)



[docs]
class GlofasForecast(glofas.Glofas):
    """
    Class for downloading and processing GloFAS forecast data.

    The GloFAS forecast dataset is a global raster presenting river
    discharge forecast from 26 May 2021 until present day (updated daily), see
    `this paper <https://hess.copernicus.org/preprints/hess-2020-532/>`__
    for more details. While CDS does have version 3 pre-release data
    from 2020-2021,
    we understand that there were some small issues that were fixed
    in the final version, so at this point in time this module
    does not support downloading the pre-release data.

    This class downloads the raw raster data
    `from CDS
    <https://cds.climate.copernicus.eu/cdsapp#!/dataset/cems-glofas-forecast?tab=overview>`__,
    and processes it from a raster to a datasets of reporting points from the
    `GloFAS interface
    <https://www.globalfloods.eu/glofas-forecasting/>`_.
    Due to the CDS request size limits, separate files are downloaded per
    day (that contain all requested lead times).

    Parameters
    ----------
    country_config : CountryConfig
        Country configuration
    geo_bounding_box: GeoBoundingBox
        The bounding coordinates of the area that should be included
    leadtime_max: int
        The maximum desired lead time D in days. All forecast data for lead
        times 1 to D days are downloaded
    start_date : Union[date, str], default: date(year=2021, month=5, day=26)
        The starting date for the dataset. If left blank, defaults to the
        earliest available date
    end_date : Union[date, str], default: date.today()
        The ending date for the dataset. If left blank, defaults to
        the current date
    model_version : int, default: 4
        The version of the GloFAS model to use, can only be 3 or 4.
        If in doubt, always use the latest (default).

    Examples
    --------
    Download, process and load GloFAS forecast data for the past month,
    for a lead time of 15 days.

    >>> from datetime import date
    >>> from ochanticipy import create_country_config, CodAB, GeoBoundingBox,
    ... GlofasForecast
    >>>
    >>> country_config = create_country_config(iso3="npl")
    >>> codab = CodAB(country_config=country_config)
    >>> codab.download()
    >>> admin_npl = codab.load()
    >>> geo_bounding_box = GeoBoundingBox.from_shape(admin_npl)
    >>>
    >>> glofas_forecast = GlofasForecast(
    ...     country_config=country_config,
    ...     geo_bounding_box=geo_bounding_box,
    ...     leadtime_max=15,
    ...     end_date=date(year=2022, month=10, day=22),
    ...     start_date=date(year=2022, month=9, day=22)
    ... )
    >>> glofas_forecast.download()
    >>> glofas_forecast.process()
    >>>
    >>> npl_glofas_forecast_reporting_points = glofas_forecast.load()
    """

    def __init__(
        self,
        country_config: CountryConfig,
        geo_bounding_box: GeoBoundingBox,
        leadtime_max: int,
        start_date: Union[date, str] = None,
        end_date: Union[date, str] = None,
        model_version: int = glofas.DEFAULT_MODEL_VERSION,
    ):
        super().__init__(
            country_config=country_config,
            geo_bounding_box=geo_bounding_box,
            start_date=start_date,
            end_date=end_date,
            cds_name="cems-glofas-forecast",
            model_version=model_version,
            product_type=["control_forecast", "ensemble_perturbed_forecasts"],
            date_variable_prefix="",
            frequency=rrule.DAILY,
            coord_names=["time", "number", "step"],
            leadtime_max=leadtime_max,
            start_date_min=date(year=2021, month=5, day=26),
        )

    @staticmethod
    def _system_version_dict() -> glofas.SystemVersions:
        system_versions = glofas.SystemVersions()
        system_versions[3] = "version_3_1"
        system_versions[4] = "operational"
        return system_versions

    @check_file_existence
    def _load_single_file(
        self, input_filepath: Path, filepath: Path, clobber: bool
    ) -> xr.Dataset:
        return _read_in_ensemble_and_perturbed_datasets(
            filepath=input_filepath
        ).expand_dims("time")




[docs]
class GlofasReforecast(glofas.Glofas):
    """
    Class for downloading and processing GloFAS reforecast data.

    The GloFAS reforecast dataset is a global raster presenting river
    discharge forecasted from 1999 until 2018, see
    `this paper <https://hess.copernicus.org/preprints/hess-2020-532/>`_
    for more details.

    This class downloads the raw raster data
    `from CDS
    <https://cds.climate.copernicus.eu/cdsapp#!/dataset/cems-glofas-reforecast?tab=overview>`__,
    and processes it from a raster to a datasets of reporting points from the
    `GloFAS interface
    <https://www.globalfloods.eu/glofas-forecasting/>`_.
    Due to the CDS request size limits, separate files are downloaded per
    month (that contain all requested lead times).

    Parameters
    ----------
    country_config : CountryConfig
        Country configuration
    geo_bounding_box: GeoBoundingBox
        The bounding coordinates of the area that should be included
    leadtime_max: int
        The maximum desired lead time D in days. All forecast data for lead
        times 1 to D days are downloaded
    start_date : Union[date, str], default: date(year=1999, month=1, day=1)
        The starting date for the dataset. If left blank, defaults to the
        earliest available date
    end_date : Union[date, str], default: date(year=2018, month=12, day=31)
        The ending date for the dataset. If left blank, defaults to the
        last available date
    model_version : int, default: 4
        The version of the GloFAS model to use, can only be 3 or 4.
        If in doubt, always use the latest (default).

    Examples
    --------
    Download, process and load all available GloFAS reforecast data
    for a lead time of 15 days.

    >>> from ochanticipy import create_country_config, CodAB, GeoBoundingBox,
    ... GlofasReforecast
    >>>
    >>> country_config = create_country_config(iso3="npl")
    >>> codab = CodAB(country_config=country_config)
    >>> codab.download()
    >>> admin_npl = codab.load()
    >>> geo_bounding_box = GeoBoundingBox.from_shape(admin_npl)
    >>>
    >>> glofas_reforecast = GlofasReforecast(
    ...     country_config=country_config,
    ...     geo_bounding_box=geo_bounding_box,
    ...     leadtime_max=15
    ... )
    >>> glofas_reforecast.download()
    >>> glofas_reforecast.process()
    >>>
    >>> npl_glofas_reforecast_reporting_points = glofas_reforecast.load()
    """

    def __init__(
        self,
        country_config: CountryConfig,
        geo_bounding_box: GeoBoundingBox,
        leadtime_max: int,
        start_date: Union[date, str] = None,
        end_date: Union[date, str] = None,
        model_version: int = glofas.DEFAULT_MODEL_VERSION,
    ):
        # Unfortunately start date min and max, as well as months,
        # depend on the model version. If this were more complicated
        # then it may make sense to use a factory, but that would change
        # how the constructor is called so trying to avoid it for now.
        if model_version == 3:
            start_date_min = date(year=1999, month=1, day=1)
            end_date_max = date(year=2018, month=12, day=31)
            month_list = None
        elif model_version == 4:
            start_date_min = date(year=2003, month=3, day=1)
            end_date_max = date(year=2022, month=8, day=31)
            month_list = list(range(3, 9))  # March to August
        else:
            raise ValueError("Model version must be 3 or 4")
        super().__init__(
            country_config=country_config,
            geo_bounding_box=geo_bounding_box,
            start_date=start_date,
            end_date=end_date,
            model_version=model_version,
            cds_name="cems-glofas-reforecast",
            product_type=[
                "control_reforecast",
                "ensemble_perturbed_reforecasts",
            ],
            date_variable_prefix="h",
            frequency=rrule.MONTHLY,
            coord_names=["number", "time", "step"],
            leadtime_max=leadtime_max,
            start_date_min=start_date_min,
            end_date_max=end_date_max,
            month_list=month_list,
        )

    @staticmethod
    def _system_version_dict() -> glofas.SystemVersions:
        system_versions = glofas.SystemVersions()
        system_versions[3] = "version_3_1"
        system_versions[4] = "version_4_0"
        return system_versions

    @check_file_existence
    def _load_single_file(
        self, input_filepath: Path, filepath: Path, clobber: bool
    ) -> xr.Dataset:
        return _read_in_ensemble_and_perturbed_datasets(
            filepath=input_filepath
        )



def _read_in_ensemble_and_perturbed_datasets(filepath: Path):
    """Read in forecast and reforecast data.

    The GloFAS forecast and reforecast data GRIB files contain two
    separate datasets: the control member, generated from the most accurate
    estimate of current conditions, and the perturbed forecast, which
    contains N ensemble members created by perturbing the control forecast.

    This function reads in both datasets and creates an N+1 (perturbed
    + control) ensemble.
    See `this paper <https://hess.copernicus.org/preprints/hess-2020-532/>`__
    for more details.
    """
    ds_list = []
    for data_type in ["cf", "pf"]:
        ds = xr.load_dataset(
            filepath,
            engine="cfgrib",
            backend_kwargs={
                "indexpath": "",
                "filter_by_keys": {"dataType": data_type},
            },
        )
        # Extra processing require for control forecast
        if data_type == "cf":
            ds = ds.expand_dims(dim="number")
        ds_list.append(ds)
    return xr.combine_by_coords(ds_list, combine_attrs="drop_conflicts")
Source code for ochanticipy.datasources.glofas.forecast

OCHA AnticiPy

Navigation

Related Topics