Source code for ochanticipy.datasources.fewsnet.fewsnet

"""
FEWS NET processing.

Download and save the data provided by FEWS NET as provided
on <https://fews.net/>.

FEWS NET is only available in a set of countries.
Check their website to see which countries are included.
"""

import datetime
import logging
import zipfile
from enum import Enum
from pathlib import Path
from tempfile import TemporaryDirectory

import geopandas as gpd
from hdx.location.country import Country

from ochanticipy.datasources.datasource import DataSource
from ochanticipy.utils.check_file_existence import check_file_existence
from ochanticipy.utils.io import download_url, unzip

logger = logging.getLogger(__name__)
_BASE_URL_COUNTRY = (
    "https://fdw.fews.net/api/ipcpackage/"
    "?country_code={iso2}&collection_date={YYYY}-{MM}-01"
)
_BASE_URL_REGION = (
    "https://fews.net/data_portal_download/download"
    "?data_file_path=http://shapefiles.fews.net.s3.amazonaws.com/"
    "HFIC/{region_code}/{region_name}{YYYY}{MM}.zip"
)


# Use Enum such that it can function as type-checking
# as well as check if user-inputed string is valid
[docs] class ValidProjectionPeriods(Enum): """ Define Enum for Valid Projection Periods. Use Enum such that it can function as type-checking as well as check if user-inputed string is valid """ CS = "CS" ML1 = "ML1" ML2 = "ML2"
[docs] class FewsNet(DataSource): """ Base class to retrieve FewsNet data. Parameters ---------- country_config : CountryConfig Country configuration """ def __init__( self, country_config, ): super().__init__( country_config=country_config, datasource_base_dir="fewsnet", is_public=True, # FN data can be regional, and thus we save the raw data # in "glb" instead of in the iso3 folder, even though part of it is # at country level is_global_raw=True, is_global_processed=False, config_datasource_name="fewsnet", ) self._iso2 = Country.get_iso2_from_iso3(self._country_config.iso3) if self._iso2 is None: raise KeyError( "No ISO2 found for the given ISO3. Check your ISO3, currently:" f" {self._country_config.iso3}." ) # mypy will give error Signature of "download" incompatible with supertype # "DataSource" due to `pub_year` and `pub_month` not being an arg in # `DataSource`. This is however valid so ignore mypy
[docs] def download( # type: ignore self, pub_year: int, pub_month: int, clobber: bool = False, ) -> Path: """ Retrieve the raw FEWS NET data. Depending on the region and date, this data is published per region or per country. This function retrieves the country data if it exists, and else the regional data for `pub_year`-`pub_month`. Parameters ---------- pub_year: int publication year of the data that should be downloaded pub_month: int publication month of the data that should be downloaded. This commonly refers to the month of the Current Situation period clobber : bool, default = False If True, overwrites existing raw files Returns ------- Path to the downloaded file. Examples -------- >>> from ochanticipy import create_country_config, FewsNet >>> # Download FEWS NET data for ETH published in 2021-06 >>> country_config = create_country_config(iso3="eth") >>> fewsnet = FewsNet(country_config=country_config) >>> eth_fn_202106_path = fewsnet.download(pub_year=2021,pub_month=6) """ self._check_date_validity(pub_year=pub_year, pub_month=pub_month) pub_month_str = self._get_pub_month_str(pub_month) # we prefer the country data as this more nicely structured # thus first check if that is available try: return self._download_country( pub_year=pub_year, pub_month_str=pub_month_str, clobber=clobber, ) except zipfile.BadZipFile: try: return self._download_region( pub_year=pub_year, pub_month_str=pub_month_str, clobber=clobber, ) except zipfile.BadZipFile as err: raise RuntimeError( "No country or regional data found for" f" {pub_year}-{pub_month_str}. Check on the FEWS NET " "website that data for your given date and country/region " "exists." ) from err
[docs] def process(self, *args, **kwargs): """ Process FEWS NET data. Method not implemented. """ logger.info("`process()` method not yet implemented for FEWS NET.")
[docs] def load( # type: ignore self, pub_year: int, pub_month: int, projection_period: ValidProjectionPeriods, ) -> gpd.GeoDataFrame: """ Load FEWS NET data. For the given `pub_year`, `pub_month` and `projection_period`. Parameters ---------- pub_year: int publication year of the data that should be loaded pub_month: int publication month of the data that should be loaded. This refers to the first month of the Current Situation period projection_period: str The projection period to be loaded. This should be CS, ML1, or ML2. Referring to Current Situation, near term projection, and medium term projection respectively. Returns ------- Geopandas DataFrame with the specified data. Examples -------- >>> from ochanticipy import create_country_config, FewsNet >>> # Load FEWS NET data for ETH published in 2021-06 of medium-term ... projection period (ML1) >>> country_config = create_country_config(iso3="eth") >>> fewsnet = FewsNet(country_config=country_config) >>> gdf_eth_fn_202106 = fewsnet.load(pub_year=2021,pub_month=6, ... projection_period = "ML1") """ logger.warning( "load() now returns the raw data. In the future this " "will return processed data." ) projperiods = [pp.value for pp in ValidProjectionPeriods] if projection_period not in projperiods: raise ValueError( f"{projection_period} is not a valid projection" f" period. It must be one of {', '.join(projperiods)}" ) self._check_date_validity(pub_year=pub_year, pub_month=pub_month) pub_month_str = self._get_pub_month_str(pub_month) dir_path = self._find_raw_dir_date( pub_year=pub_year, pub_month_str=pub_month_str ) file_path = self._get_raw_file_projection_period( dir_path=dir_path, projection_period=projection_period ) return gpd.read_file(file_path)
@staticmethod def _check_date_validity(pub_year: int, pub_month: int): try: pub_date = datetime.datetime(year=pub_year, month=pub_month, day=1) except ValueError as err: raise ValueError( f"The combination f pub_year-pub_month, " f"{pub_year}-{pub_month}, is not a valid date." ) from err if pub_date < datetime.datetime(year=2009, month=1, day=1): raise ValueError( f"FEWSNET publishes data since 2009, so adjust your pub_year " f"to be >=2009, currently {pub_year}" ) elif pub_date > datetime.datetime.now(): raise ValueError( "There is no data published in the future. The date should " "refer to the start month-year of the Current " "situation period." ) @staticmethod def _get_pub_month_str(pub_month: int): return f"{pub_month:02d}" def _download_country( self, pub_year: int, pub_month_str: str, clobber: bool ) -> Path: """ Download fewsnet data that covers the iso2 country. Returns ------- country_data : Path if data found return the output_dir, else return None """ url_country_date = _BASE_URL_COUNTRY.format( iso2=self._iso2, YYYY=pub_year, MM=pub_month_str ) return self._download( url=url_country_date, area=self._iso2, pub_year=pub_year, pub_month_str=pub_month_str, clobber=clobber, ) def _download_region( self, pub_year: int, pub_month_str: str, clobber: bool, ) -> Path: """ Download fewsnet data that covers the region the iso3 belongs to. Returns ------- region_data : Path If region data exists, return the saved dir else return None """ url_region_date = _BASE_URL_REGION.format( region_code=self._datasource_config.region_code, region_name=self._datasource_config.region_name, YYYY=pub_year, MM=pub_month_str, ) return self._download( url=url_region_date, area=self._datasource_config.region_code, pub_year=pub_year, pub_month_str=pub_month_str, clobber=clobber, ) def _download( self, url: str, area: str, pub_year: int, pub_month_str: str, clobber: bool, ) -> Path: """ Define output names and call _download_zip. url: str URL the zip file is located area: str Identifier of which area the data covers. This is either the ISO2 or the region code pub_year: int publication year of the data that should be downloaded pub_month: str publication month of the data that should be downloaded. This commonly refers to the month of the Current Situation period """ # filenames have upper iso2/regioncode, so use that for dirs as well output_dir = self._get_raw_dir_date( area=area, pub_year=pub_year, pub_month_str=pub_month_str ) return self._download_zip( filepath=output_dir, zip_filename=self._get_zip_filename( area=area, pub_year=pub_year, pub_month_str=pub_month_str ), url=url, clobber=clobber, ) def _get_raw_dir_date(self, area: str, pub_year: int, pub_month_str: str): return self._raw_base_dir / f"{area}_{pub_year}{pub_month_str}" @staticmethod def _get_zip_filename(area, pub_year, pub_month_str): return f"{area}{pub_year}{pub_month_str}.zip" @staticmethod @check_file_existence def _download_zip( filepath: Path, zip_filename: str, url: str, clobber: bool ) -> Path: """ Download and unzip the file at the url. Parameters ---------- zip_filename : str name of the zipfile url : str url that contains the zip file to be downloaded Returns ------- output_dir : Path None if no valid file, else output_dir """ # create tempdir to write zipfile to with TemporaryDirectory() as temp_dir: zip_path = Path(temp_dir) / zip_filename download_url(url=url, save_path=zip_path) logger.info(f"Downloaded {url} to {zip_path}") try: unzip(zip_file_path=zip_path, save_dir=filepath) logger.debug(f"Unzipped to {filepath}") except zipfile.BadZipFile as err: # indicates that the url returned something that wasn't a # zip, happens often and indicates data for the given # country - year-month is not available raise zipfile.BadZipFile( f"No zip data returned from url {url} " f"check that the area and year-month publication exist." ) from err return filepath def _find_raw_dir_date(self, pub_year: int, pub_month_str: str): """ Check if a dir exists for the given `pub_year`-`pub_month`. Should either cover the iso2 or region. If exists, returns the dir path. """ country_dir = self._get_raw_dir_date( area=self._iso2, pub_year=pub_year, pub_month_str=pub_month_str ) region_dir = self._get_raw_dir_date( area=self._datasource_config.region_code, pub_year=pub_year, pub_month_str=pub_month_str, ) if country_dir.is_dir(): return country_dir elif region_dir.is_dir(): return region_dir raise FileNotFoundError( f"No data found for {pub_year}-{pub_month_str} covering " f"{self._country_config.iso3} " f"or {self._datasource_config.region_name}. " f"Please make sure the data exists and is downloaded" ) @staticmethod def _get_raw_file_projection_period( dir_path: Path, projection_period: ValidProjectionPeriods ): file_path = dir_path / f"{dir_path.name}_{projection_period}.shp" if file_path.is_file(): return file_path else: raise FileNotFoundError( f"File {file_path} not found. Make sure the projection " f"period {projection_period} exists for {dir_path.name}." )