oemof · y-pohlmann-lintas · Nov 12, 2024 · Dec 2, 2024 · May 28, 2025 · p-snft
diff --git a/src/feedinlib/cds_request_tools.py b/src/feedinlib/cds_request_tools.py
@@ -5,6 +5,7 @@
 import cdsapi
 import numpy as np
 import xarray as xr
+import zipfile
 
 logger = logging.getLogger(__name__)
 
@@ -39,9 +40,18 @@ def _get_cds_data(
     if cds_client is None:
         cds_client = cdsapi.Client()
 
+    # Create a file in a secure way if a target filename was not provided
+    if target_file.split(".")[-1] != "nc" and target_file.split(".")[-1] != "grib":
+        logger.info("No file format provided, assuming NetCDF format (.nc)")
+        target_file = target_file + ".nc"
+
+    data_format = "netcdf"
+    if ".grib" in target_file:
+        data_format = "grib"
+
     # Default request
     request = {
-        "format": "netcdf",
+        "data_format": data_format,
         "product_type": "reanalysis",
         "time": [
             "00:00",
@@ -81,10 +91,6 @@ def _get_cds_data(
     # Send the data request to the server
     result = cds_client.retrieve(dataset_name, request)
 
-    # Create a file in a secure way if a target filename was not provided
-    if target_file.split(".")[-1] != "nc":
-        target_file = target_file + ".nc"
-
     logger.info(
         "Downloading request for {} variables to {}".format(
             len(request["variable"]), target_file
@@ -94,6 +100,21 @@ def _get_cds_data(
     # Download the data in the target file
     result.download(target_file)
 
+    # Check if a zipped file was received, unzip the file
+    if zipfile.is_zipfile(target_file):
+        logger.info("The file that was downloaded seems to be zipped, unzipping the file")
+        with zipfile.ZipFile(target_file, 'r') as zip_ref:
+            zip_ref.extractall("")
+            # Load the two files that have been extracted
+            ds_1 = xr.open_dataset("data_stream-oper_stepType-accum.nc",
+                                   chunks=None,
+                                   decode_cf=True)
+            ds_2 = xr.open_dataset("data_stream-oper_stepType-instant.nc",
+                                   chunks=None,
+                                   decode_cf=True)
+            ds_combined = xr.merge([ds_1, ds_2])
+            ds_combined.to_netcdf(target_file)
+
 
 def _format_cds_request_datespan(start_date, end_date):
     """

diff --git a/src/feedinlib/era5.py b/src/feedinlib/era5.py
@@ -116,7 +116,7 @@ def format_windpowerlib(ds):
     drop_vars = [
         _
         for _ in ds_vars
-        if _ not in windpowerlib_vars + ["latitude", "longitude", "time"]
+        if _ not in windpowerlib_vars + ["latitude", "longitude", "valid_time"]
     ]
     ds = ds.drop(drop_vars)
 
@@ -126,9 +126,9 @@ def format_windpowerlib(ds):
     # the time stamp given by ERA5 for mean values (probably) corresponds to
     # the end of the valid time interval; the following sets the time stamp
     # to the middle of the valid time interval
-    df["time"] = df.time - pd.Timedelta(minutes=60)
+    df["valid_time"] = df.valid_time - pd.Timedelta(minutes=60)
 
-    df.set_index(["time", "latitude", "longitude"], inplace=True)
+    df.set_index(["valid_time", "latitude", "longitude"], inplace=True)
     df.sort_index(inplace=True)
     df = df.tz_localize("UTC", level=0)
 
@@ -201,7 +201,7 @@ def format_pvlib(ds):
     drop_vars = [
         _
         for _ in ds_vars
-        if _ not in pvlib_vars + ["latitude", "longitude", "time"]
+        if _ not in pvlib_vars + ["latitude", "longitude", "valid_time"]
     ]
     ds = ds.drop(drop_vars)
 
@@ -211,9 +211,9 @@ def format_pvlib(ds):
     # the time stamp given by ERA5 for mean values (probably) corresponds to
     # the end of the valid time interval; the following sets the time stamp
     # to the middle of the valid time interval
-    df["time"] = df.time - pd.Timedelta(minutes=30)
+    df["valid_time"] = df.valid_time - pd.Timedelta(minutes=30)
 
-    df.set_index(["time", "latitude", "longitude"], inplace=True)
+    df.set_index(["valid_time", "latitude", "longitude"], inplace=True)
     df.sort_index(inplace=True)
     df = df.tz_localize("UTC", level=0)
 
@@ -379,6 +379,9 @@ def weather_df_from_era5(
     """  # noqa: E501
     ds = xr.open_dataset(era5_netcdf_filename)
 
+    #if 'valid_time' in ds:
+    #    ds = ds.rename({'valid_time': 'time'})
+
-    #if 'valid_time' in ds:
-    #    ds = ds.rename({'valid_time': 'time'})
-    #if 'valid_time' in ds:
-    #    ds = ds.rename({'valid_time': 'time'})
     if area is not None:
         if isinstance(area, list):
             ds = select_area(ds, area[0], area[1])

diff --git a/tests/resources/test_data_era5.nc b/tests/resources/test_data_era5.nc
diff --git a/tests/test_weather_download.py b/tests/test_weather_download.py
@@ -1,8 +1,11 @@
 from unittest import mock
 
 import cdsapi
+import pandas as pd
+import pytest
 
 from feedinlib import era5
+from feedinlib.era5 import weather_df_from_era5
 
 
 def test_era5_download():
@@ -12,3 +15,14 @@ def test_era5_download():
     era5.get_era5_data_from_datespan_and_position(
         "2019-01-19", "2019-01-20", "test_file.nc", "50.0", "12.0"
     )
+
+
+def test_get_weather_df_from_era5():
+    era5_netcdf_filename = "resources/test_data_era5.nc"
+
+    df = weather_df_from_era5(era5_netcdf_filename, lib='pvlib')
+
+    expected_val = -3.808929
+    actual_val = df.loc[(pd.Timestamp("2004-01-01 03:30:00+00:00"), 53.0, 8.75), "temp_air"]
+
+    assert actual_val == pytest.approx(expected_val, rel=1e-5)