Skip to content
This repository was archived by the owner on Sep 16, 2025. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/tox_pytests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.6, 3.9]
python-version: [3.7, 3.9]

steps:
- uses: actions/checkout@v1
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,6 @@ docs/temp/*

# Mypy Cache
.mypy_cache/

# downloaded test data
tests/test_data/*
7 changes: 4 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ def read(*names, **kwargs):
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
Expand All @@ -79,7 +78,7 @@ def read(*names, **kwargs):
keywords=[
# eg: 'keyword1', 'keyword2', 'keyword3',
],
python_requires=">=3.6",
python_requires=">=3.7",
install_requires=[
"cdsapi >= 0.1.4",
"geopandas",
Expand All @@ -100,7 +99,9 @@ def read(*names, **kwargs):
"punch.py",
"pytest",
"sphinx_rtd_theme",
"open_FRED-cli"
"open_FRED-cli",
"shapely",
"requests"
],
"data-sources": [
"open_FRED-cli",
Expand Down
66 changes: 54 additions & 12 deletions src/feedinlib/era5.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,9 @@ def format_windpowerlib(ds):
# the time stamp given by ERA5 for mean values (probably) corresponds to
# the end of the valid time interval; the following sets the time stamp
# to the middle of the valid time interval
df['time'] = df.time - pd.Timedelta(minutes=60)
df["time"] = df.time - pd.Timedelta(minutes=60)

df.set_index(['time', 'latitude', 'longitude'], inplace=True)
df.set_index(["time", "latitude", "longitude"], inplace=True)
df.sort_index(inplace=True)
df = df.tz_localize("UTC", level=0)

Expand Down Expand Up @@ -202,9 +202,9 @@ def format_pvlib(ds):
# the time stamp given by ERA5 for mean values (probably) corresponds to
# the end of the valid time interval; the following sets the time stamp
# to the middle of the valid time interval
df['time'] = df.time - pd.Timedelta(minutes=30)
df["time"] = df.time - pd.Timedelta(minutes=30)

df.set_index(['time', 'latitude', 'longitude'], inplace=True)
df.set_index(["time", "latitude", "longitude"], inplace=True)
df.sort_index(inplace=True)
df = df.tz_localize("UTC", level=0)

Expand Down Expand Up @@ -271,6 +271,21 @@ def select_area(ds, lon, lat, g_step=0.25):
return answer


def extract_coordinates_from_era5(era5_netcdf_filename):
"""
Extract all coordinates from a er5 netCDf-file and return them as a
geopandas.Series
"""
ds = xr.open_dataset(era5_netcdf_filename)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to only read longitude and latitude data needed in this function? Would be good for large datasets.


# Extract all points from the netCDF-file:
points = []
for x in ds.longitude:
for y in ds.latitude:
points.append(Point(x, y))
return gpd.GeoSeries(points)


def select_geometry(ds, area):
"""
Select data for given geometry from dataset.
Expand Down Expand Up @@ -307,7 +322,12 @@ def select_geometry(ds, area):
crs = {"init": "epsg:4326"}
geo_df = gpd.GeoDataFrame(df, crs=crs, geometry=geometry)

inside_points = geo_df.within(area)
if isinstance(area, Point):
d = geo_df.apply(lambda row: area.distance(row.geometry), axis=1)
inside_points = (d == d.min())
else:
inside_points = geo_df.within(area)

# if no points lie within area, return None
if not inside_points.any():
return None
Expand All @@ -322,18 +342,23 @@ def select_geometry(ds, area):
logical_list.append(
np.logical_and((ds.longitude == lon), (ds.latitude == lat))
)

# bind all conditions from the list
cond = np.logical_or(*logical_list[:2])
for new_cond in logical_list[2:]:
cond = logical_list[0]

for new_cond in logical_list[1:]:
cond = np.logical_or(cond, new_cond)

# apply the condition to where
return ds.where(cond)


def weather_df_from_era5(
era5_netcdf_filename, lib, start=None, end=None, area=None
era5_netcdf_filename,
lib,
start=None,
end=None,
area=None,
drop_coord_levels=False,
):
"""
Gets ERA5 weather data from netcdf file and converts it to a pandas
Expand All @@ -350,14 +375,20 @@ def weather_df_from_era5(
end : None or anything `pandas.to_datetime` can convert to a timestamp
Get weather data upto this date. Defaults to None in which
case the end date is set to the last time step in the dataset.
area : shapely compatible geometry object (i.e. Polygon, Multipolygon, etc...) or list(float) or list(tuple)
area : shapely.geometry object (i.e. Polygon, Multipolygon, etc...) or list(float) or list(tuple)
Area specifies for which geographic area to return weather data. Area
can either be a single location or an area.
In case you want data for a single location provide a list in the
form [lon, lat].
If you want data for an area you can provide a shape of this area or
specify a rectangular area giving a list of the
form [(lon west, lon east), (lat south, lat north)].
lib : str
Format the weather data for a specific library. Possible values are
`windpowerlib` and `pvlib`.
drop_coord_levels : bool
Decide whether the index levels of the coordinates will be dropped. A
ValueError is raised if there are more than one coordinates.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A ValueError is raised if there is more than one coordinate.


Returns
-------
Expand Down Expand Up @@ -388,11 +419,22 @@ def weather_df_from_era5(
"It must be either 'pvlib' or 'windpowerlib'."
)

if len(df) == 0:
return pd.DataFrame()

# drop latitude and longitude from index in case a single location
# is given in parameter `area`
if area is not None and isinstance(area, list):
if np.size(area[0]) == 1 and np.size(area[1]) == 1:
if drop_coord_levels is True:
if len(df.groupby(level=[1, 2]).count()) > 1:
msg = ("You cannot drop the coordinate levels if there are more "
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

..if there is more...

"than one point. You will get duplicate entries in the "
"index.")
raise ValueError(msg)
else:
lat = round(df.index.get_level_values(1)[0], 2)
lon = round(df.index.get_level_values(2)[0], 2)
df.index = df.index.droplevel(level=[1, 2])
df.index.name = (lat, lon)

if start is None:
start = df.index[0]
Expand Down
Loading