diff --git a/src/feedinlib/cds_request_tools.py b/src/feedinlib/cds_request_tools.py index 6132ec4..1d4636c 100644 --- a/src/feedinlib/cds_request_tools.py +++ b/src/feedinlib/cds_request_tools.py @@ -5,6 +5,7 @@ import cdsapi import numpy as np import xarray as xr +import zipfile logger = logging.getLogger(__name__) @@ -39,9 +40,18 @@ def _get_cds_data( if cds_client is None: cds_client = cdsapi.Client() + # Create a file in a secure way if a target filename was not provided + if target_file.split(".")[-1] != "nc" and target_file.split(".")[-1] != "grib": + logger.info("No file format provided, assuming NetCDF format (.nc)") + target_file = target_file + ".nc" + + data_format = "netcdf" + if ".grib" in target_file: + data_format = "grib" + # Default request request = { - "format": "netcdf", + "data_format": data_format, "product_type": "reanalysis", "time": [ "00:00", @@ -81,10 +91,6 @@ def _get_cds_data( # Send the data request to the server result = cds_client.retrieve(dataset_name, request) - # Create a file in a secure way if a target filename was not provided - if target_file.split(".")[-1] != "nc": - target_file = target_file + ".nc" - logger.info( "Downloading request for {} variables to {}".format( len(request["variable"]), target_file @@ -94,6 +100,21 @@ def _get_cds_data( # Download the data in the target file result.download(target_file) + # Check if a zipped file was received, unzip the file + if zipfile.is_zipfile(target_file): + logger.info("The file that was downloaded seems to be zipped, unzipping the file") + with zipfile.ZipFile(target_file, 'r') as zip_ref: + zip_ref.extractall("") + # Load the two files that have been extracted + ds_1 = xr.open_dataset("data_stream-oper_stepType-accum.nc", + chunks=None, + decode_cf=True) + ds_2 = xr.open_dataset("data_stream-oper_stepType-instant.nc", + chunks=None, + decode_cf=True) + ds_combined = xr.merge([ds_1, ds_2]) + ds_combined.to_netcdf(target_file) + def _format_cds_request_datespan(start_date, end_date): """ diff --git a/src/feedinlib/era5.py b/src/feedinlib/era5.py index 0863903..609e92c 100644 --- a/src/feedinlib/era5.py +++ b/src/feedinlib/era5.py @@ -116,7 +116,7 @@ def format_windpowerlib(ds): drop_vars = [ _ for _ in ds_vars - if _ not in windpowerlib_vars + ["latitude", "longitude", "time"] + if _ not in windpowerlib_vars + ["latitude", "longitude", "valid_time"] ] ds = ds.drop(drop_vars) @@ -126,9 +126,9 @@ def format_windpowerlib(ds): # the time stamp given by ERA5 for mean values (probably) corresponds to # the end of the valid time interval; the following sets the time stamp # to the middle of the valid time interval - df["time"] = df.time - pd.Timedelta(minutes=60) + df["valid_time"] = df.valid_time - pd.Timedelta(minutes=60) - df.set_index(["time", "latitude", "longitude"], inplace=True) + df.set_index(["valid_time", "latitude", "longitude"], inplace=True) df.sort_index(inplace=True) df = df.tz_localize("UTC", level=0) @@ -201,7 +201,7 @@ def format_pvlib(ds): drop_vars = [ _ for _ in ds_vars - if _ not in pvlib_vars + ["latitude", "longitude", "time"] + if _ not in pvlib_vars + ["latitude", "longitude", "valid_time"] ] ds = ds.drop(drop_vars) @@ -211,9 +211,9 @@ def format_pvlib(ds): # the time stamp given by ERA5 for mean values (probably) corresponds to # the end of the valid time interval; the following sets the time stamp # to the middle of the valid time interval - df["time"] = df.time - pd.Timedelta(minutes=30) + df["valid_time"] = df.valid_time - pd.Timedelta(minutes=30) - df.set_index(["time", "latitude", "longitude"], inplace=True) + df.set_index(["valid_time", "latitude", "longitude"], inplace=True) df.sort_index(inplace=True) df = df.tz_localize("UTC", level=0) @@ -379,6 +379,9 @@ def weather_df_from_era5( """ # noqa: E501 ds = xr.open_dataset(era5_netcdf_filename) + #if 'valid_time' in ds: + # ds = ds.rename({'valid_time': 'time'}) + if area is not None: if isinstance(area, list): ds = select_area(ds, area[0], area[1]) diff --git a/tests/resources/test_data_era5.nc b/tests/resources/test_data_era5.nc new file mode 100644 index 0000000..b179ece Binary files /dev/null and b/tests/resources/test_data_era5.nc differ diff --git a/tests/test_weather_download.py b/tests/test_weather_download.py index bcb8b4f..6451dfb 100644 --- a/tests/test_weather_download.py +++ b/tests/test_weather_download.py @@ -1,8 +1,11 @@ from unittest import mock import cdsapi +import pandas as pd +import pytest from feedinlib import era5 +from feedinlib.era5 import weather_df_from_era5 def test_era5_download(): @@ -12,3 +15,14 @@ def test_era5_download(): era5.get_era5_data_from_datespan_and_position( "2019-01-19", "2019-01-20", "test_file.nc", "50.0", "12.0" ) + + +def test_get_weather_df_from_era5(): + era5_netcdf_filename = "resources/test_data_era5.nc" + + df = weather_df_from_era5(era5_netcdf_filename, lib='pvlib') + + expected_val = -3.808929 + actual_val = df.loc[(pd.Timestamp("2004-01-01 03:30:00+00:00"), 53.0, 8.75), "temp_air"] + + assert actual_val == pytest.approx(expected_val, rel=1e-5)