diff --git a/pyproject.toml b/pyproject.toml index 2e83e4a..e0bade4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,6 +48,7 @@ build-backend = "setuptools.build_meta" [project.optional-dependencies] dev = [ 'pytest', + 'pytest-benchmark', 'coverage', 'poethepoet', 'types-pyyaml', diff --git a/tests/test_detrend.py b/tests/test_detrend.py index b158499..3ed3010 100644 --- a/tests/test_detrend.py +++ b/tests/test_detrend.py @@ -3,6 +3,7 @@ import pandas as pd import pytest from pandas.testing import assert_frame_equal +from pytest_benchmark.fixture import BenchmarkFixture from wind_up.detrend import apply_wsratio_v_wd_scen, calc_wsratio_v_wd_scen, check_applied_detrend from wind_up.models import WindUpConfig @@ -66,7 +67,7 @@ def test_check_applied_detrend(test_lsa_t13_config: WindUpConfig) -> None: assert detrend_post_r2_improvement == pytest.approx(0.03776561982402227) -def test_calc_wsratio_v_wd_scen(test_lsa_t13_config: WindUpConfig) -> None: +def test_calc_wsratio_v_wd_scen(benchmark: BenchmarkFixture, test_lsa_t13_config: WindUpConfig) -> None: # this test case borrows logic and results from check_applied_detrend where data which has already been detrended # is used to calculate the wsratio_v_wd_scen again to check it is flat cfg = test_lsa_t13_config @@ -85,7 +86,8 @@ def test_calc_wsratio_v_wd_scen(test_lsa_t13_config: WindUpConfig) -> None: expected_pre_df = pd.read_parquet( Path(__file__).parents[0] / "test_data/LSA_T13_LSA_T12_check_pre_wsratio_v_dir_scen.parquet", ) - actual_pre_df = calc_wsratio_v_wd_scen( + actual_pre_df = benchmark( + calc_wsratio_v_wd_scen, test_name=test_name, ref_name=ref_name, ref_lat=ref_lat, diff --git a/wind_up/detrend.py b/wind_up/detrend.py index 9178a2b..bfce5eb 100644 --- a/wind_up/detrend.py +++ b/wind_up/detrend.py @@ -35,40 +35,57 @@ def calc_wsratio_v_wd( # IEC says only use 4-16 m/s test_ws_ll = 4 test_ws_ul = 16 - ref_ws_ll = test_ws_ll * detrend_df[ref_ws_col].mean() / detrend_df[test_ws_col].mean() - ref_ws_ul = test_ws_ul * detrend_df[ref_ws_col].mean() / detrend_df[test_ws_col].mean() - detrend_df = detrend_df[(detrend_df[test_ws_col] >= test_ws_ll) & (detrend_df[test_ws_col] < test_ws_ul)] - detrend_df = detrend_df[(detrend_df[ref_ws_col] >= ref_ws_ll) & (detrend_df[ref_ws_col] < ref_ws_ul)] + test_ws_mean = detrend_df[test_ws_col].mean() + ref_ws_mean = detrend_df[ref_ws_col].mean() - directions = [] - hours = [] + ref_ws_ll = test_ws_ll * ref_ws_mean / test_ws_mean + ref_ws_ul = test_ws_ul * ref_ws_mean / test_ws_mean + + detrend_df = detrend_df[ + (detrend_df[test_ws_col] >= test_ws_ll) + & (detrend_df[test_ws_col] < test_ws_ul) + & (detrend_df[ref_ws_col] >= ref_ws_ll) + & (detrend_df[ref_ws_col] < ref_ws_ul) + ] + + rows_per_hour = 3600 / timebase_s + min_count = min_hours * rows_per_hour + iec_ws_threshold = 8 + + # Vectorized circular difference calculation + directions = np.arange(0, 360, 1) + circ_diffs = np.array([circ_diff(detrend_df[ref_wd_col], d) for d in directions]) + + within_dir_bins = np.abs(circ_diffs) < dir_bin_width / 2 + + valid_directions = [] + valid_hours = [] test_rf_ws_roms = [] - for d in list(range(0, 360, 1)): - detrend_df["circ_diff_to_d"] = circ_diff(detrend_df[ref_wd_col], d) - detrend_df["within_dir_bin"] = detrend_df["circ_diff_to_d"].abs() < dir_bin_width / 2 - subsector_df = detrend_df[detrend_df["within_dir_bin"]].copy() - if len(subsector_df) > 0: - directions.append(d) - rows_per_hour = 3600 / timebase_s - hours.append(len(subsector_df) / rows_per_hour) - # 61400-12-1 requires >=24h data, >=6h above 8m/s, >= below 8m/s - min_count = min_hours * rows_per_hour - accept_sector = len(subsector_df) >= min_count - iec_ws_threshold = 8 - accept_sector = accept_sector and ((subsector_df[test_ws_col] < iec_ws_threshold).sum() >= (min_count / 4)) - accept_sector = accept_sector and ((subsector_df[test_ws_col] >= iec_ws_threshold).sum() >= (min_count / 4)) - if accept_sector: - rom = subsector_df[test_ws_col].mean() / subsector_df[ref_ws_col].mean() - test_rf_ws_roms.append(rom) + for i, direction in enumerate(directions): + subsector_df = detrend_df[within_dir_bins[i]].copy() + + if (subsector_df_len := len(subsector_df)) > 0: + valid_directions.append(direction) + valid_hours.append(subsector_df_len / rows_per_hour) + + if subsector_df_len >= min_count: + below_thresh = (subsector_df[test_ws_col] < iec_ws_threshold).sum() + above_thresh = (subsector_df[test_ws_col] >= iec_ws_threshold).sum() + + if below_thresh >= (min_count / 4) and above_thresh >= (min_count / 4): + rom = subsector_df[test_ws_col].mean() / subsector_df[ref_ws_col].mean() + test_rf_ws_roms.append(rom) + else: + test_rf_ws_roms.append(np.nan) else: test_rf_ws_roms.append(np.nan) return pd.DataFrame( { - "direction": directions, - "hours": hours, + "direction": valid_directions, + "hours": valid_hours, "ws_rom": test_rf_ws_roms, - }, + } )