diff --git a/tests/resources/solar_projection.csv b/tests/resources/solar_projection.csv new file mode 100644 index 0000000..29050ff --- /dev/null +++ b/tests/resources/solar_projection.csv @@ -0,0 +1,25 @@ +,Elevation,Azimuth,BHI,DHI,BNI,proj_tilt_35_az_154_alb_025 +2009-07-12 00:00:00+00:00,-23.15501,358.02192,0.0,0.0,0.0,0.0 +2009-07-12 01:00:00+00:00,-22.13249,13.0521,0.0,0.0,0.0,0.0 +2009-07-12 02:00:00+00:00,-18.45929,27.35737,0.0,0.0,0.0,0.0 +2009-07-12 03:00:00+00:00,-12.53057,40.41785,0.0,0.0,0.0,0.0 +2009-07-12 04:00:00+00:00,-4.84902,52.15285,1.2421,4.9005,23.3832,4.59624 +2009-07-12 05:00:00+00:00,4.10789,62.80058,51.4441,47.5719,302.5096,59.63713 +2009-07-12 06:00:00+00:00,13.93921,72.77237,177.8372,85.0891,537.0757,234.91605 +2009-07-12 07:00:00+00:00,24.31203,82.59633,303.2679,121.5737,610.9364,428.0604 +2009-07-12 08:00:00+00:00,34.91997,92.97471,479.3531,128.3813,742.788,648.03782 +2009-07-12 09:00:00+00:00,45.40703,104.99125,455.9864,222.8379,593.7701,721.2353 +2009-07-12 10:00:00+00:00,55.20735,120.57558,182.644,373.4821,216.0035,556.59217 +2009-07-12 11:00:00+00:00,63.16619,143.12468,179.1913,430.0031,196.5999,598.58417 +2009-07-12 12:00:00+00:00,67.00673,175.30016,185.5595,437.3183,202.4893,606.8154 +2009-07-12 13:00:00+00:00,64.68676,209.27264,260.3788,402.0775,297.6538,642.69273 +2009-07-12 14:00:00+00:00,57.53711,234.30403,206.3438,368.7997,257.1175,539.49189 +2009-07-12 15:00:00+00:00,48.06597,251.31949,331.3794,227.0156,492.3087,495.07234 +2009-07-12 16:00:00+00:00,37.68394,264.03145,283.1507,152.701,521.9771,328.96706 +2009-07-12 17:00:00+00:00,27.06107,274.70758,213.5861,92.2861,573.8196,155.0263 +2009-07-12 18:00:00+00:00,16.58682,284.59487,80.4939,57.7285,383.0992,55.63311 +2009-07-12 19:00:00+00:00,6.57083,294.46803,5.5889,12.5657,68.5959,11.83986 +2009-07-12 20:00:00+00:00,-2.66638,304.8913,0.0,0.0,0.0,0.0 +2009-07-12 21:00:00+00:00,-10.74418,316.30955,0.0,0.0,0.0,0.0 +2009-07-12 22:00:00+00:00,-17.20259,329.02074,0.0,0.0,0.0,0.0 +2009-07-12 23:00:00+00:00,-21.53409,343.04711,0.0,0.0,0.0,0.0 diff --git a/tests/test_processing.py b/tests/test_processing.py index f25df6d..ae14f64 100644 --- a/tests/test_processing.py +++ b/tests/test_processing.py @@ -30,6 +30,9 @@ Interpolate, ExpressionCombine, FillOikoMeteo, + AddOikoData, + AddSolarAngles, + ProjectSolarRadOnSurfaces, ) RESOURCES_PATH = Path(__file__).parent / "resources" @@ -695,7 +698,7 @@ def test_combiner(self): assert res.shape == (3, 6) - @patch("tide.processing.get_oikolab_df", side_effect=mock_get_oikolab_df) + @patch("tide.base.get_oikolab_df", side_effect=mock_get_oikolab_df) def test_fill_oiko_meteo(self, mock_get_oikolab): data = pd.read_csv( RESOURCES_PATH / "meteo_fill_df.csv", parse_dates=True, index_col=0 @@ -715,7 +718,7 @@ def test_fill_oiko_meteo(self, mock_get_oikolab): gaps_gte="4h", lat=-48.87667, lon=-123.39333, - param_map={ + columns_param_map={ "text__°C__outdoor": "temperature", "gh__W/m²__outdoor": "surface_solar_radiation", "rh__0-1__outdoor": "relative_humidity", @@ -728,3 +731,61 @@ def test_fill_oiko_meteo(self, mock_get_oikolab): data["gh__W/m²__outdoor"], data_gap["gh__W/m²__outdoor"] ) assert float(data_gap["text__°C__outdoor"].isnull().sum()) == 13 + + @patch("tide.base.get_oikolab_df", side_effect=mock_get_oikolab_df) + def test_add_oiko_data(self, mock_get_oikolab): + data_idx = pd.date_range( + start="2009-07-11 16:00:00+00:00", + end="2009-07-12 23:15:00+00:00", + freq="15min", + ) + data = pd.DataFrame( + {"tin__°C__Building": np.random.randn(len(data_idx))}, index=data_idx + ) + add_oiko = AddOikoData(lat=-48.87667, lon=-123.39333) + res = add_oiko.fit_transform(data) + assert not res.isnull().any().any() + assert res.shape == (126, 13) + + def test_add_solar_angles(self): + df = pd.DataFrame( + {"a": np.random.randn(24)}, + index=pd.date_range("2024-12-19", freq="h", periods=24), + ) + + sun_angle = AddSolarAngles() + sun_angle.fit(df.copy()) + assert sun_angle.get_feature_names_out() == [ + "a", + "sun_el__angle_deg__OTHER__OTHER_SUB_BLOC", + "sun_az__angle_deg__OTHER__OTHER_SUB_BLOC", + ] + + res = sun_angle.transform(df.copy()) + assert res.shape == (24, 3) + + def test_processing(self): + test_df = pd.read_csv( + RESOURCES_PATH / "solar_projection.csv", index_col=0, parse_dates=True + ) + + test_df["GHI"] = test_df["BHI"] + test_df["DHI"] + + projector = ProjectSolarRadOnSurfaces( + bni_column_name="BNI", + dhi_column_name="DHI", + ghi_column_name="GHI", + lat=44.844, + lon=-0.564, + surface_azimuth_angles=[180.0, 154], + surface_tilt_angle=[90.0, 35], + albedo=0.25, + surface_name=["proj_180_90", "proj_tilt_35_az_154_alb_025"], + data_bloc="PV", + data_sub_bloc="Pyranometer", + ) + + projector.fit(test_df) + res = projector.transform(test_df.copy()) + + assert res.shape == (24, 9) diff --git a/tide/base.py b/tide/base.py index 4ef8fe2..18e17e7 100644 --- a/tide/base.py +++ b/tide/base.py @@ -1,3 +1,5 @@ +import os + import datetime as dt import typing from abc import ABC, abstractmethod @@ -15,13 +17,11 @@ validate_odd_param, process_stl_odd_args, get_data_blocks, + get_freq_delta_or_min_time_interval, + ensure_list, ) - -def _ensure_list(item): - if item is None: - return [] - return item if isinstance(item, list) else [item] +from tide.meteo import get_oikolab_df class BaseProcessing(ABC, TransformerMixin, BaseEstimator): @@ -110,8 +110,8 @@ def fit_check_features(self, X): def get_feature_names_out(self, input_features=None): check_is_fitted(self, attributes=["feature_names_in_"]) - added_columns = _ensure_list(self.added_columns) - removed_columns = _ensure_list(self.removed_columns) + added_columns = ensure_list(self.added_columns) + removed_columns = ensure_list(self.removed_columns) features_out = self.feature_names_in_.copy() + added_columns return [feature for feature in features_out if feature not in removed_columns] @@ -142,7 +142,7 @@ def _transform_implementation(self, X: pd.Series | pd.DataFrame): pass -class BaseSTL(ABC, BaseEstimator): +class BaseSTL(BaseEstimator): def __init__( self, period: int | str | dt.timedelta = "24h", @@ -203,3 +203,45 @@ def get_gaps_mask(self, X: pd.Series | pd.DataFrame): df_mask[col] = np.zeros_like(X.shape[0]).astype(bool) return df_mask + + +class BaseOikoMeteo: + def __init__( + self, + lat: float = 43.47, + lon: float = -1.51, + model: str = "era5", + env_oiko_api_key: str = "OIKO_API_KEY", + ): + self.lat = lat + self.lon = lon + self.model = model + self.env_oiko_api_key = env_oiko_api_key + + def get_api_key_from_env(self): + self.api_key_ = os.getenv(self.env_oiko_api_key) + + def get_meteo_at_x_freq(self, X: pd.Series | pd.DataFrame, param: list[str]): + check_is_fitted(self, attributes=["api_key_"]) + x_freq = get_freq_delta_or_min_time_interval(X) + end = ( + X.index[-1] + if X.index[-1] <= X.index[-1].replace(hour=23, minute=0) + else X.index[-1] + pd.Timedelta("1h") + ) + df = get_oikolab_df( + lat=self.lat, + lon=self.lon, + start=X.index[0], + end=end, + api_key=self.api_key_, + param=param, + model=self.model, + ) + + df = df[param] + if x_freq < pd.Timedelta("1h"): + df = df.asfreq(x_freq).interpolate("linear") + elif x_freq > pd.Timedelta("1h"): + df = df.resample(x_freq).mean() + return df.loc[X.index, :] diff --git a/tide/plumbing.py b/tide/plumbing.py index 38d8c22..3b3e9f8 100644 --- a/tide/plumbing.py +++ b/tide/plumbing.py @@ -22,7 +22,7 @@ import tide.processing as pc -def _get_pipe_from_proc_list(proc_list: list) -> Pipeline: +def _get_pipe_from_proc_list(proc_list: list, verbose: bool = False) -> Pipeline: proc_units = [ getattr(pc, proc[0])( *proc[1] if len(proc) > 1 and isinstance(proc[1], list) else (), @@ -30,11 +30,14 @@ def _get_pipe_from_proc_list(proc_list: list) -> Pipeline: ) for proc in proc_list ] - return make_pipeline(*proc_units) + return make_pipeline(*proc_units, verbose=verbose) def _get_column_wise_transformer( - proc_dict, data_columns: pd.Index | list[str], process_name: str = None + proc_dict, + data_columns: pd.Index | list[str], + process_name: str = None, + verbose: bool = False, ) -> ColumnTransformer | None: col_trans_list = [] for req, proc_list in proc_dict.items(): @@ -46,7 +49,7 @@ def _get_column_wise_transformer( col_trans_list.append( ( f"{process_name}->{name}" if process_name is not None else name, - _get_pipe_from_proc_list(proc_list), + _get_pipe_from_proc_list(proc_list, verbose=verbose), requested_col, ) ) @@ -58,6 +61,7 @@ def _get_column_wise_transformer( col_trans_list, remainder="passthrough", verbose_feature_names_out=False, + verbose=verbose, ).set_output(transform="pandas") @@ -70,10 +74,12 @@ def get_pipeline_from_dict( steps_list = [] for step, op_conf in pipe_dict.items(): if isinstance(op_conf, list): - operation = _get_pipe_from_proc_list(op_conf) + operation = _get_pipe_from_proc_list(op_conf, verbose=verbose) elif isinstance(op_conf, dict): - operation = _get_column_wise_transformer(op_conf, data_columns, step) + operation = _get_column_wise_transformer( + op_conf, data_columns, step, verbose + ) else: raise ValueError(f"{op_conf} is an invalid operation config") diff --git a/tide/processing.py b/tide/processing.py index ae69482..09f650b 100644 --- a/tide/processing.py +++ b/tide/processing.py @@ -1,5 +1,3 @@ -import os - import pandas as pd import numpy as np import datetime as dt @@ -9,21 +7,36 @@ from sklearn.utils.validation import check_is_fitted from scipy.ndimage import gaussian_filter1d -from tide.base import BaseProcessing, BaseFiller +from tide.base import BaseProcessing, BaseFiller, BaseOikoMeteo from tide.math import time_gradient from tide.utils import ( get_data_blocks, get_outer_timestamps, check_and_return_dt_index_df, parse_request_to_col_names, - get_freq_delta_or_min_time_interval, + ensure_list, ) from tide.regressors import SkSTLForecast from tide.classifiers import STLEDetector -from tide.meteo import get_oikolab_df +from tide.meteo import sun_position, beam_component, sky_diffuse, ground_diffuse MODEL_MAP = {"STL": SkSTLForecast} +OIKOLAB_DEFAULT_MAP = { + "temperature": "t_ext__°C__outdoor__meteo", + "dewpoint_temperature": "t_dp__°C__outdoor__meteo", + "mean_sea_level_pressure": "pressure__Pa__outdoor__meteo", + "wind_speed": "wind_speed__m/s__outdoor__meteo", + "100m_wind_speed": "100m_wind_speed__m/s__outdoor__meteo", + "relative_humidity": "rh__0-1RH__outdoor__meteo", + "surface_solar_radiation": "gho__w/m²__outdoor__meteo", + "direct_normal_solar_radiation": "dni__w/m²__outdoor__meteo", + "surface_diffuse_solar_radiation": "dhi__w/m²__outdoor__meteo", + "surface_thermal_radiation": "thermal_radiation__w/m²__outdoor__meteo", + "total_cloud_cover": "total_cloud_cover__0-1cover__outdoor__meteo", + "total_precipitation": "total_precipitation__mm__outdoor__meteo", +} + class Identity(BaseProcessing): """ @@ -1226,7 +1239,7 @@ def _transform_implementation(self, X: pd.Series | pd.DataFrame): return X -class FillOikoMeteo(BaseFiller, BaseProcessing): +class FillOikoMeteo(BaseFiller, BaseOikoMeteo, BaseProcessing): """ A processor that fills gaps using meteorological data from the Oikolab API. @@ -1276,51 +1289,238 @@ def __init__( gaps_gte: str | pd.Timedelta | dt.timedelta = None, lat: float = 43.47, lon: float = -1.51, - param_map: dict[str, str] = None, + columns_param_map: dict[str, str] = None, model: str = "era5", env_oiko_api_key: str = "OIKO_API_KEY", ): BaseFiller.__init__(self, gaps_lte, gaps_gte) + BaseOikoMeteo.__init__(self, lat, lon, model, env_oiko_api_key) BaseProcessing.__init__(self) - self.lat = lat - self.lon = lon - self.param_map = param_map - self.model = model - self.env_oiko_api_key = env_oiko_api_key + self.columns_param_map = columns_param_map def _fit_implementation(self, X, y=None): - if self.param_map is None: + if self.columns_param_map is None: # Dumb action fill everything with temperature - self.param_map = {col: "temperature" for col in X.columns} - self.api_key_ = os.getenv(self.env_oiko_api_key) + self.columns_param_map = {col: "temperature" for col in X.columns} + self.get_api_key_from_env() self.fitted_ = True return self def _transform_implementation(self, X: pd.Series | pd.DataFrame): check_is_fitted(self, attributes=["fitted_", "api_key_"]) - x_freq = get_freq_delta_or_min_time_interval(X) gaps_dict = self.get_gaps_dict_to_fill(X) for col, idx_list in gaps_dict.items(): for idx in idx_list: - end = ( - idx[-1] - if idx[-1] <= idx[-1].replace(hour=23, minute=0) - else idx[-1] + pd.Timedelta("1h") - ) - df = get_oikolab_df( - lat=self.lat, - lon=self.lon, - start=idx[0], - end=end, - api_key=self.api_key_, - param=[self.param_map[col]], - model=self.model, + df = self.get_meteo_at_x_freq(X, [self.columns_param_map[col]]) + X.loc[idx, col] = df.loc[idx, self.columns_param_map[col]] + return X + + +class AddOikoData(BaseOikoMeteo, BaseProcessing): + """ + A transformer class to fetch and integrate Oikolab meteorological data + into a given time-indexed DataFrame or Series. + + It retrieves weather data such as temperature, wind speed, or humidity + at specified latitude and longitude, and adds it to the input DataFrame + under user-specified column names. + + Parameters + ---------- + lat : float, optional + Latitude of the location for which meteorological data is to be fetched. + Default is 43.47. + lon : float, optional + Longitude of the location for which meteorological data is to be fetched. + Default is -1.51. + param_columns_map : dict[str, str], optional + A mapping of meteorological parameter names (keys) to column names (values) + in the resulting DataFrame. Default is `OIKOLAB_DEFAULT_MAP`. + Example: + `{"temperature": "text__°C__meteo", "wind_speed": "wind__m/s__meteo"}` + model : str, optional + The meteorological model to use for fetching data. Default is "era5". + env_oiko_api_key : str, optional + The name of the environment variable containing the Oikolab API key. + Default is "OIKO_API_KEY". + + Methods + ------- + fit(X: pd.Series | pd.DataFrame, y=None) + Checks the input DataFrame for conflicts with target column names + and validates the API key availability. + + transform(X: pd.Series | pd.DataFrame) + Fetches meteorological data and appends it to the input DataFrame + under the specified column names at given frequency. + + Notes + ----- + - This class requires access to the Oikolab API, and a valid API key must + be set as an environment variable. + - The input DataFrame must have a DateTimeIndex for fetching data at specific + time frequencies. + """ + + def __init__( + self, + lat: float = 43.47, + lon: float = -1.51, + param_columns_map: dict[str, str] = OIKOLAB_DEFAULT_MAP, + model: str = "era5", + env_oiko_api_key: str = "OIKO_API_KEY", + ): + BaseOikoMeteo.__init__(self, lat, lon, model, env_oiko_api_key) + BaseProcessing.__init__(self) + self.param_columns_map = param_columns_map + self.added_columns = list(self.param_columns_map.values()) + + def _fit_implementation(self, X: pd.Series | pd.DataFrame, y=None): + mask = X.columns.isin(self.param_columns_map.values()) + if mask.any(): + raise ValueError( + f"Cannot add Oikolab meteo data. {X.columns[mask]} already in columns" + ) + self.get_api_key_from_env() + self.added_columns = list(self.param_columns_map.values()) + self.columns_check_ = True + return self + + def _transform_implementation(self, X: pd.Series | pd.DataFrame): + check_is_fitted(self, attributes=["columns_check_", "api_key_"]) + df = self.get_meteo_at_x_freq(X, list(self.param_columns_map.keys())) + X.loc[:, list(self.param_columns_map.values())] = df.to_numpy() + return X + + +class AddSolarAngles(BaseProcessing): + """ + Transformer that adds solar elevation and azimuth angle to passed DataFrame. + + Attributes: + lat (float): The latitude of the location in degrees. + lon (float): The longitude of the location in degrees. + data_bloc (str): Identifier for the tide data block. + Default to "OTHER". + data_sub_bloc (str): Identifier for the data sub-block; + Default to "OTHER_SUB_BLOC". + """ + + def __init__( + self, + lat: float = 43.47, + lon: float = -1.51, + data_bloc: str = "OTHER", + data_sub_bloc: str = "OTHER_SUB_BLOC", + ): + self.lat = lat + self.lon = lon + self.data_bloc = data_bloc + self.data_sub_bloc = data_sub_bloc + BaseProcessing.__init__(self) + + def _fit_implementation(self, X: pd.Series | pd.DataFrame, y=None): + self.added_columns = [ + f"sun_el__angle_deg__{self.data_bloc}__{self.data_sub_bloc}", + f"sun_az__angle_deg__{self.data_bloc}__{self.data_sub_bloc}", + ] + + def _transform_implementation(self, X: pd.Series | pd.DataFrame): + df = pd.DataFrame( + data=np.array([sun_position(date, self.lat, self.lon) for date in X.index]), + columns=self.added_columns, + index=X.index, + ) + return pd.concat([X, df], axis=1) + + +class ProjectSolarRadOnSurfaces(BaseProcessing): + """ + Project solar radiation on various surfaces with specific orientations and tilts. + + Attributes: + bni_column_name (str): Name of the column containing beam normal irradiance + (BNI) data. + dhi_column_name (str): Name of the column containing diffuse horizontal + irradiance (DHI) data. + ghi_column_name (str): Name of the column containing global horizontal + irradiance (GHI) data. + lat (float): Latitude of the location (default is 43.47). + lon (float): Longitude of the location (default is -1.51). + surface_azimuth_angles (int | float | list[int | float]): Azimuth angles of + the surfaces in degrees east of north (default is 180.0, + which corresponds to a south-facing surface in the northern hemisphere). + surface_tilt_angle (float | list[float]): Tilt angles of the surfaces in + degrees (default is 35.0). 0 is façing ground. + albedo (float): Ground reflectivity or albedo (default is 0.25). + surface_name (str | list[str]): Names for the surfaces + (default is "az_180_tilt_35"). + data_bloc (str): Tide bloc name Default is "OTHER". + data_sub_bloc (str): Tide sub_bloc_name default is "OTHER_SUB_BLOC". + + Raises: + ValueError: If the number of azimuth angles, tilt angles, and surface names + do not match. + """ + + def __init__( + self, + bni_column_name: str, + dhi_column_name: str, + ghi_column_name: str, + lat: float = 43.47, + lon: float = -1.51, + surface_azimuth_angles: int | float | list[int | float] = 180.0, + surface_tilt_angle: float | list[float] = 35.0, + albedo: float = 0.25, + surface_name: str | list[str] = "az_180_tilt_35", + data_bloc: str = "OTHER", + data_sub_bloc: str = "OTHER_SUB_BLOC", + ): + BaseProcessing.__init__(self) + self.bni_column_name = bni_column_name + self.dhi_column_name = dhi_column_name + self.ghi_column_name = ghi_column_name + self.lat = lat + self.lon = lon + self.surface_azimuth_angles = surface_azimuth_angles + self.surface_tilt_angle = surface_tilt_angle + self.albedo = albedo + self.surface_name = surface_name + self.data_bloc = data_bloc + self.data_sub_bloc = data_sub_bloc + + def _fit_implementation(self, X: pd.Series | pd.DataFrame, y=None): + if ( + not len(ensure_list(self.surface_azimuth_angles)) + == len(ensure_list(self.surface_tilt_angle)) + == len(ensure_list(self.surface_name)) + ): + raise ValueError("Number of surface azimuth, tilt and name does not match") + + self.required_columns = [ + self.bni_column_name, + self.dhi_column_name, + self.ghi_column_name, + ] + self.added_columns = [ + f"{name}__W/m²__{self.data_bloc}__{self.data_sub_bloc}" + for name in ensure_list(self.surface_name) + ] + + def _transform_implementation(self, X: pd.Series | pd.DataFrame): + sun_pos = np.array([sun_position(date, self.lat, self.lon) for date in X.index]) + for az, til, name in zip( + ensure_list(self.surface_azimuth_angles), + ensure_list(self.surface_tilt_angle), + self.added_columns, + ): + X[name] = ( + beam_component( + til, az, 90 - sun_pos[:, 0], sun_pos[:, 1], X[self.bni_column_name] ) + + sky_diffuse(til, X[self.dhi_column_name]) + + ground_diffuse(til, X[self.ghi_column_name], self.albedo) + ) - ts = df[self.param_map[col]] - if x_freq < pd.Timedelta("1h"): - ts = ts.asfreq(x_freq).interpolate("linear") - elif x_freq > pd.Timedelta("1h"): - ts = ts.resample(x_freq).mean() - X.loc[idx, col] = ts.loc[idx] return X diff --git a/tide/utils.py b/tide/utils.py index b6636ba..ccbc684 100644 --- a/tide/utils.py +++ b/tide/utils.py @@ -432,3 +432,9 @@ def process_stl_odd_args(param_name, X, stl_kwargs): if processed_value % 2 == 0: processed_value += 1 # Ensure the value is odd stl_kwargs[param_name] = processed_value + + +def ensure_list(item): + if item is None: + return [] + return item if isinstance(item, list) else [item]