This repository was archived by the owner on Sep 16, 2025. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 44
Revise weather df from era5 #66
Open
uvchik
wants to merge
15
commits into
dev
Choose a base branch
from
revision/revise-weather-df-from-era5
base: dev
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
15 commits
Select commit
Hold shift + click to select a range
5aa50ea
Add function to extract coordinates (points) from era5 netCDF-file
uvchik c737488
Fix error if only one data set is found
uvchik ea93026
Fix error if no data set is found and return empty DataFrame
uvchik 792883a
Add parameter to drop/keep the coordinate levels of the index
uvchik b6b3e3b
Fix style checker issues
uvchik 1257be6
Add tests for era5 module
uvchik 6e5c70b
Add coordinates as index name if levels are dropped
uvchik 435ed29
Use nearest Point for Point geometry instead of `within`
uvchik 1d51301
Add tests for nearest Point including failing test
uvchik 65b9f21
Make isort happy
uvchik b82fc6d
Adapt test and create second failing test
uvchik fb4a153
Fix selection by geometry if only one element is found
uvchik 6be8a42
Reduce number of test runs
uvchik aa08508
Use 3.7 instead of 3.6 for tests
uvchik 58e88b5
Do not support 3.6 anymore
uvchik File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -73,3 +73,6 @@ docs/temp/* | |
|
|
||
| # Mypy Cache | ||
| .mypy_cache/ | ||
|
|
||
| # downloaded test data | ||
| tests/test_data/* | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -117,9 +117,9 @@ def format_windpowerlib(ds): | |
| # the time stamp given by ERA5 for mean values (probably) corresponds to | ||
| # the end of the valid time interval; the following sets the time stamp | ||
| # to the middle of the valid time interval | ||
| df['time'] = df.time - pd.Timedelta(minutes=60) | ||
| df["time"] = df.time - pd.Timedelta(minutes=60) | ||
|
|
||
| df.set_index(['time', 'latitude', 'longitude'], inplace=True) | ||
| df.set_index(["time", "latitude", "longitude"], inplace=True) | ||
| df.sort_index(inplace=True) | ||
| df = df.tz_localize("UTC", level=0) | ||
|
|
||
|
|
@@ -202,9 +202,9 @@ def format_pvlib(ds): | |
| # the time stamp given by ERA5 for mean values (probably) corresponds to | ||
| # the end of the valid time interval; the following sets the time stamp | ||
| # to the middle of the valid time interval | ||
| df['time'] = df.time - pd.Timedelta(minutes=30) | ||
| df["time"] = df.time - pd.Timedelta(minutes=30) | ||
|
|
||
| df.set_index(['time', 'latitude', 'longitude'], inplace=True) | ||
| df.set_index(["time", "latitude", "longitude"], inplace=True) | ||
| df.sort_index(inplace=True) | ||
| df = df.tz_localize("UTC", level=0) | ||
|
|
||
|
|
@@ -271,6 +271,21 @@ def select_area(ds, lon, lat, g_step=0.25): | |
| return answer | ||
|
|
||
|
|
||
| def extract_coordinates_from_era5(era5_netcdf_filename): | ||
| """ | ||
| Extract all coordinates from a er5 netCDf-file and return them as a | ||
| geopandas.Series | ||
| """ | ||
| ds = xr.open_dataset(era5_netcdf_filename) | ||
|
|
||
| # Extract all points from the netCDF-file: | ||
| points = [] | ||
| for x in ds.longitude: | ||
| for y in ds.latitude: | ||
| points.append(Point(x, y)) | ||
| return gpd.GeoSeries(points) | ||
|
|
||
|
|
||
| def select_geometry(ds, area): | ||
| """ | ||
| Select data for given geometry from dataset. | ||
|
|
@@ -307,7 +322,12 @@ def select_geometry(ds, area): | |
| crs = {"init": "epsg:4326"} | ||
| geo_df = gpd.GeoDataFrame(df, crs=crs, geometry=geometry) | ||
|
|
||
| inside_points = geo_df.within(area) | ||
| if isinstance(area, Point): | ||
| d = geo_df.apply(lambda row: area.distance(row.geometry), axis=1) | ||
| inside_points = (d == d.min()) | ||
| else: | ||
| inside_points = geo_df.within(area) | ||
|
|
||
| # if no points lie within area, return None | ||
| if not inside_points.any(): | ||
| return None | ||
|
|
@@ -322,18 +342,23 @@ def select_geometry(ds, area): | |
| logical_list.append( | ||
| np.logical_and((ds.longitude == lon), (ds.latitude == lat)) | ||
| ) | ||
|
|
||
| # bind all conditions from the list | ||
| cond = np.logical_or(*logical_list[:2]) | ||
| for new_cond in logical_list[2:]: | ||
| cond = logical_list[0] | ||
|
|
||
| for new_cond in logical_list[1:]: | ||
| cond = np.logical_or(cond, new_cond) | ||
|
|
||
| # apply the condition to where | ||
| return ds.where(cond) | ||
|
|
||
|
|
||
| def weather_df_from_era5( | ||
| era5_netcdf_filename, lib, start=None, end=None, area=None | ||
| era5_netcdf_filename, | ||
| lib, | ||
| start=None, | ||
| end=None, | ||
| area=None, | ||
| drop_coord_levels=False, | ||
| ): | ||
| """ | ||
| Gets ERA5 weather data from netcdf file and converts it to a pandas | ||
|
|
@@ -350,14 +375,20 @@ def weather_df_from_era5( | |
| end : None or anything `pandas.to_datetime` can convert to a timestamp | ||
| Get weather data upto this date. Defaults to None in which | ||
| case the end date is set to the last time step in the dataset. | ||
| area : shapely compatible geometry object (i.e. Polygon, Multipolygon, etc...) or list(float) or list(tuple) | ||
| area : shapely.geometry object (i.e. Polygon, Multipolygon, etc...) or list(float) or list(tuple) | ||
| Area specifies for which geographic area to return weather data. Area | ||
| can either be a single location or an area. | ||
| In case you want data for a single location provide a list in the | ||
| form [lon, lat]. | ||
| If you want data for an area you can provide a shape of this area or | ||
| specify a rectangular area giving a list of the | ||
| form [(lon west, lon east), (lat south, lat north)]. | ||
| lib : str | ||
| Format the weather data for a specific library. Possible values are | ||
| `windpowerlib` and `pvlib`. | ||
| drop_coord_levels : bool | ||
| Decide whether the index levels of the coordinates will be dropped. A | ||
| ValueError is raised if there are more than one coordinates. | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A ValueError is raised if there is more than one coordinate. |
||
|
|
||
| Returns | ||
| ------- | ||
|
|
@@ -388,11 +419,22 @@ def weather_df_from_era5( | |
| "It must be either 'pvlib' or 'windpowerlib'." | ||
| ) | ||
|
|
||
| if len(df) == 0: | ||
| return pd.DataFrame() | ||
|
|
||
| # drop latitude and longitude from index in case a single location | ||
| # is given in parameter `area` | ||
| if area is not None and isinstance(area, list): | ||
| if np.size(area[0]) == 1 and np.size(area[1]) == 1: | ||
| if drop_coord_levels is True: | ||
| if len(df.groupby(level=[1, 2]).count()) > 1: | ||
| msg = ("You cannot drop the coordinate levels if there are more " | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ..if there is more... |
||
| "than one point. You will get duplicate entries in the " | ||
| "index.") | ||
| raise ValueError(msg) | ||
| else: | ||
| lat = round(df.index.get_level_values(1)[0], 2) | ||
| lon = round(df.index.get_level_values(2)[0], 2) | ||
| df.index = df.index.droplevel(level=[1, 2]) | ||
| df.index.name = (lat, lon) | ||
|
|
||
| if start is None: | ||
| start = df.index[0] | ||
|
|
||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it possible to only read longitude and latitude data needed in this function? Would be good for large datasets.