Source code for ochanticipy.utils.hdx_api

"""Use HDX python API to download data."""
import logging
import shutil
import tempfile
from pathlib import Path

from hdx.api.configuration import Configuration
from hdx.data.dataset import Dataset

USER_AGENT = "ocha-anticipy"

logger = logging.getLogger(__name__)
Configuration.create(
    hdx_site="prod", user_agent=USER_AGENT, hdx_read_only=True
)


[docs] def load_resource_from_hdx( hdx_dataset: str, hdx_resource_name: str, output_filepath: Path ) -> Path: """ Use the HDX API to download a dataset based on the address and dataset ID. Parameters ---------- hdx_dataset : str The name of the HDX dataset where the resource is located. Can be found by taking the portion of the url after ``data.humdata.org/dataset/`` hdx_resource_name : str Resources name on HDX. Can be found by taking the filename as it appears on the dataset page. output_filepath : Path Target filepath for the dataset Returns ------- The full path of the downloaded dataset """ logger.info(f"Querying HDX API for dataset {hdx_dataset}") resources = Dataset.read_from_hdx(hdx_dataset).get_resources() logger.debug(f"Found the following resources: {resources}") for resource in resources: if resource["name"] == hdx_resource_name: logger.info(f"Downloading dataset {hdx_resource_name}") with tempfile.TemporaryDirectory() as tempdir: _, downloaded_filepath = resource.download(folder=tempdir) output_filepath.parent.mkdir(parents=True, exist_ok=True) shutil.copy(downloaded_filepath, output_filepath) logger.info(f"Saved to {output_filepath}") return Path(output_filepath) raise FileNotFoundError( f'Dataset with name "{hdx_resource_name}" not found' f'at HDX address "{hdx_dataset}".' )