diff --git a/.github/workflows/nbval.yaml b/.github/workflows/nbval.yaml index e014b494..84d99b25 100644 --- a/.github/workflows/nbval.yaml +++ b/.github/workflows/nbval.yaml @@ -29,7 +29,7 @@ jobs: - name: Run notebook and check output run: | # --sanitize-with: pre-process text to remove irrelevant differences (e.g. warning filepaths) - pytest --nbval docs/${{ matrix.notebook-file }} --sanitize-with docs/nbval_sanitization_rules.cfg + pytest --nbval --nbval-sanitize-with docs/nbval_sanitization_rules.cfg docs/${{ matrix.notebook-file }} - name: Run notebooks again, save files run: | pip install nbconvert[webpdf] diff --git a/docs/TrendAnalysis_example.ipynb b/docs/TrendAnalysis_example.ipynb index 45744e99..14eb7955 100644 --- a/docs/TrendAnalysis_example.ipynb +++ b/docs/TrendAnalysis_example.ipynb @@ -62160,7 +62160,7 @@ "# Visualize the results\n", "ta_new_filter.plot_degradation_summary('sensor', summary_title='Sensor-based degradation results',\n", " scatter_ymin=0.5, scatter_ymax=1.1,\n", - " hist_xmin=-30, hist_xmax=45);\n", + " hist_xmin=-30, hist_xmax=45)\n", "plt.show()" ] }, @@ -62247,7 +62247,7 @@ "# Visualize the results\n", "ta_stuck_filter.plot_degradation_summary('sensor', summary_title='Sensor-based degradation results',\n", " scatter_ymin=0.5, scatter_ymax=1.1,\n", - " hist_xmin=-30, hist_xmax=45);\n", + " hist_xmin=-30, hist_xmax=45)\n", "plt.show()" ] }, diff --git a/docs/TrendAnalysis_example_NSRDB.ipynb b/docs/TrendAnalysis_example_NSRDB.ipynb index 6c9f6b7d..fce1fa92 100644 --- a/docs/TrendAnalysis_example_NSRDB.ipynb +++ b/docs/TrendAnalysis_example_NSRDB.ipynb @@ -158,7 +158,7 @@ "ax.plot(df.index, df.soiling, 'o', alpha=0.01)\n", "#ax.set_ylim(0,1500)\n", "fig.autofmt_xdate()\n", - "ax.set_ylabel('soiling signal');\n", + "ax.set_ylabel('soiling signal')\n", "df['power'] = df['power_ac'] * df['soiling']\n", "\n", "plt.show()" diff --git a/docs/notebook_requirements.txt b/docs/notebook_requirements.txt index f3c5dc95..b47fa02e 100644 --- a/docs/notebook_requirements.txt +++ b/docs/notebook_requirements.txt @@ -15,7 +15,7 @@ ipython==8.26.0 ipython-genutils==0.2.0 ipywidgets==8.1.3 jedi==0.19.1 -Jinja2==3.1.5 +Jinja2==3.1.6 jsonschema==4.23.0 jupyter==1.0.0 jupyter-client==8.6.2 @@ -29,7 +29,7 @@ nbclient==0.10.0 nbconvert==7.16.4 nbformat==5.10.4 nest-asyncio==1.6.0 -notebook==7.2.1 +notebook==7.2.2 numexpr==2.10.1 pandocfilters==1.5.1 parso==0.8.4 @@ -48,7 +48,7 @@ soupsieve==2.6 terminado==0.18.1 testpath==0.6.0 tinycss2==1.3.0 -tornado==6.4.2 +tornado==6.5.1 traitlets==5.14.3 wcwidth==0.2.13 webencodings==0.5.1 diff --git a/docs/sphinx/source/changelog.rst b/docs/sphinx/source/changelog.rst index fc3d805a..8c279d97 100644 --- a/docs/sphinx/source/changelog.rst +++ b/docs/sphinx/source/changelog.rst @@ -1,5 +1,7 @@ RdTools Change Log ================== +.. include:: changelog/pending.rst +.. include:: changelog/v3.0.1.rst .. include:: changelog/v3.0.0.rst .. include:: changelog/v2.1.8.rst .. include:: changelog/v2.1.7.rst diff --git a/docs/sphinx/source/changelog/pending.rst b/docs/sphinx/source/changelog/pending.rst new file mode 100644 index 00000000..ddca3ae2 --- /dev/null +++ b/docs/sphinx/source/changelog/pending.rst @@ -0,0 +1,16 @@ +************************* +v3.0.x (X, X, 2025) +************************* + +Enhancements +------------ +* :py:func:`~rdtools.degradation.degradation_year_on_year` has new parameter ``label=`` + to return the calc_info['YoY_values'] as either right labeled (default), left or center + labeled. (:issue:`459`) + + + +Contributors +------------ +* Chris Deline (:ghuser:`cdeline`) + diff --git a/docs/sphinx/source/changelog/v3.0.1.rst b/docs/sphinx/source/changelog/v3.0.1.rst new file mode 100644 index 00000000..9bfaaaa3 --- /dev/null +++ b/docs/sphinx/source/changelog/v3.0.1.rst @@ -0,0 +1,11 @@ +************************* +v3.0.1 (August 21, 2025) +************************* + +Requirements +------------ +* Updated Jinja2==3.1.6 in ``notebook_requirements.txt`` (:pull:`465`) +* Updated tornado==6.5.1 in ``notebook_requirements.txt`` (:pull:`465`) +* Updated requests==2.32.4 in ``requirements.txt`` (:pull:`465`) +* Updated urllib3==2.5.0 in ``requirements.txt`` (:pull:`465`) +* Removed constraint that scipy<1.16.0 (:pull:`465`) \ No newline at end of file diff --git a/docs/system_availability_example.ipynb b/docs/system_availability_example.ipynb index 7a44ee09..0b9925d9 100644 --- a/docs/system_availability_example.ipynb +++ b/docs/system_availability_example.ipynb @@ -618,7 +618,7 @@ } ], "source": [ - "aa2.plot();" + "plt.show(aa2.plot())" ] }, { diff --git a/rdtools/degradation.py b/rdtools/degradation.py index 1698b368..b7d28435 100644 --- a/rdtools/degradation.py +++ b/rdtools/degradation.py @@ -179,7 +179,8 @@ def degradation_classical_decomposition(energy_normalized, def degradation_year_on_year(energy_normalized, recenter=True, exceedance_prob=95, confidence_level=68.2, - uncertainty_method='simple', block_length=30): + uncertainty_method='simple', block_length=30, + label='right'): ''' Estimate the trend of a timeseries using the year-on-year decomposition approach and calculate a Monte Carlo-derived confidence interval of slope. @@ -208,6 +209,8 @@ def degradation_year_on_year(energy_normalized, recenter=True, If `uncertainty_method` is 'circular_block', `block_length` determines the length of the blocks used in the circular block bootstrapping in number of days. Must be shorter than a third of the time series. + label : {'right', 'center', 'left'}, default 'right' + Which Year-on-Year slope edge to label. Returns ------- @@ -218,7 +221,8 @@ def degradation_year_on_year(energy_normalized, recenter=True, degradation rate estimate calc_info : dict - * `YoY_values` - pandas series of right-labeled year on year slopes + * `YoY_values` - pandas series of year on year slopes, either right + left or center labeled, depending on the `label` parameter. * `renormalizing_factor` - float of value used to recenter data * `exceedance_level` - the degradation rate that was outperformed with probability of `exceedance_prob` @@ -233,6 +237,12 @@ def degradation_year_on_year(energy_normalized, recenter=True, energy_normalized.name = 'energy' energy_normalized.index.name = 'dt' + if label not in {None, "right", "left", "center"}: + raise ValueError(f"Unsupported value {label} for `label`." + " Must be 'right', 'left' or 'center'.") + if label is None: + label = "right" + # Detect less than 2 years of data. This is complicated by two things: # - leap days muddle the precise meaning of "two years of data". # - can't just check the number of days between the first and last @@ -276,30 +286,59 @@ def degradation_year_on_year(energy_normalized, recenter=True, df = pd.merge_asof(energy_normalized[['dt', 'energy']], energy_normalized.sort_values('dt_shifted'), left_on='dt', right_on='dt_shifted', - suffixes=['', '_right'], + suffixes=['', '_left'], tolerance=pd.Timedelta('8D') ) - df['time_diff_years'] = (df.dt - df.dt_right) / pd.Timedelta('365d') - df['yoy'] = 100.0 * (df.energy - df.energy_right) / (df.time_diff_years) - df.index = df.dt + df['time_diff_years'] = (df.dt - df.dt_left) / pd.Timedelta('365d') + df['yoy'] = 100.0 * (df.energy - df.energy_left) / (df.time_diff_years) + # df.index = df.dt yoy_result = df.yoy.dropna() - df_right = df.set_index(df.dt_right).drop_duplicates('dt_right') - df['usage_of_points'] = df.yoy.notnull().astype(int).add( - df_right.yoy.notnull().astype(int), fill_value=0) - if not len(yoy_result): raise ValueError('no year-over-year aggregated data pairs found') Rd_pct = yoy_result.median() + YoY_times = df.dropna(subset=['yoy'], inplace=False).copy() + + # calculate usage of points. + df_left = YoY_times.set_index(YoY_times.dt_left) # .drop_duplicates('dt_left') + df_right = YoY_times.set_index(YoY_times.dt) # .drop_duplicates('dt') + usage_of_points = df_right.yoy.notnull().astype(int).add( + df_left.yoy.notnull().astype(int), + fill_value=0).groupby(level=0).sum() + usage_of_points.name = 'usage_of_points' + + if pd.__version__ < '2.0.0': + # For old Pandas versions < 2.0.0, time columns cannot be averaged + # with each other, so we use a custom function to calculate center label + YoY_times['dt_center'] = _avg_timestamp_old_Pandas(YoY_times['dt'], YoY_times['dt_left']) + else: + YoY_times['dt_center'] = pd.to_datetime(YoY_times[['dt', 'dt_left']].mean(axis=1)) + + YoY_times = YoY_times[['dt', 'dt_center', 'dt_left']] + YoY_times = YoY_times.rename(columns={'dt': 'dt_right'}) + + YoY_times.set_index(YoY_times[f'dt_{label}'], inplace=True) + YoY_times.index.name = None + + # now apply either right, left, or center label index to the yoy_result + yoy_result.index = YoY_times[f'dt_{label}'] + yoy_result.index.name = None + + # the following is throwing a futurewarning if infer_objects() isn't included here. + # see https://github.com/pandas-dev/pandas/issues/57734 + energy_normalized = energy_normalized.merge(usage_of_points, how='left', left_on='dt', + right_index=True, left_index=False + ).infer_objects().fillna(0.0) + if uncertainty_method == 'simple': # If we need the full results calc_info = { 'YoY_values': yoy_result, 'renormalizing_factor': renorm, - 'usage_of_points': df['usage_of_points'] + 'usage_of_points': energy_normalized.set_index('dt')['usage_of_points'] } # bootstrap to determine 68% CI and exceedance probability @@ -347,15 +386,79 @@ def degradation_year_on_year(energy_normalized, recenter=True, calc_info = { 'renormalizing_factor': renorm, 'exceedance_level': exceedance_level, - 'usage_of_points': df['usage_of_points'], + 'usage_of_points': energy_normalized.set_index('dt')['usage_of_points'], 'bootstrap_rates': bootstrap_rates} return (Rd_pct, Rd_CI, calc_info) else: # If we do not need confidence intervals and exceedance level + """ # TODO: return tuple just like all other cases. Issue: test_bootstrap_module + return (Rd_pct, None, { + 'YoY_values': yoy_result, + 'usage_of_points': energy_normalized.set_index('dt')['usage_of_points'] + }) + """ return Rd_pct +def _avg_timestamp_old_Pandas(dt, dt_left): + ''' + For old Pandas versions < 2.0.0, time columns cannot be averaged + together. From https://stackoverflow.com/questions/57812300/ + python-pandas-to-calculate-mean-of-datetime-of-multiple-columns + + Parameters + ---------- + dt : pandas.Series + First series with datetime values + dt_left : pandas.Series + Second series with datetime values. + + Returns + ------- + pandas.Series + Series with the average timestamp of df1 and df2. + ''' + import calendar + + # allow for numeric index + try: + temp_df = pd.DataFrame({'dt' : dt.dt.tz_localize(None), + 'dt_left' : dt_left.dt.tz_localize(None) + }).tz_localize(None) + except TypeError: # in case numeric index passed + temp_df = pd.DataFrame({'dt' : dt.dt.tz_localize(None), + 'dt_left' : dt_left.dt.tz_localize(None) + }) + + # conversion from dates to seconds since epoch (unix time) + def to_unix(s): + if type(s) is pd.Timestamp: + return calendar.timegm(s.timetuple()) + else: + return pd.NaT + + # sum the seconds since epoch, calculate average, and convert back to readable date + averages = [] + for index, row in temp_df.iterrows(): + unix = [to_unix(i) for i in row] + # unix = [pd.Timestamp(i).timestamp() for i in row] + try: + average = sum(unix) / len(unix) + # averages.append(datetime.datetime.utcfromtimestamp(average).strftime('%Y-%m-%d')) + averages.append(pd.to_datetime(average, unit='s')) + except TypeError: + averages.append(pd.NaT) + temp_df['averages'] = averages + + try: + dt_center = (temp_df['averages'].tz_localize(dt.dt.tz)).dt.tz_localize(dt.dt.tz) + except TypeError: # not a timeseries index + dt_center = (temp_df['averages']).dt.tz_localize(dt.dt.tz) + + return dt_center + + def _mk_test(x, alpha=0.05): ''' Mann-Kendall test of significance for trend (used in classical diff --git a/rdtools/plotting.py b/rdtools/plotting.py index 93a07bac..94af0134 100644 --- a/rdtools/plotting.py +++ b/rdtools/plotting.py @@ -54,8 +54,8 @@ def degradation_summary_plots(yoy_rd, yoy_ci, yoy_info, normalized_yield, Include extra information in the returned figure: * Color code points by the number of times they get used in calculating - Rd slopes. Default color: 2 times (as a start and endpoint). Green: - 1 time. Red: 0 times. + Rd slopes. Default color: even times (as a start and endpoint). Green: + odd times. Red: 0 times. * The number of year-on-year slopes contributing to the histogram. Note @@ -109,7 +109,9 @@ def degradation_summary_plots(yoy_rd, yoy_ci, yoy_info, normalized_yield, renormalized_yield = normalized_yield / yoy_info['renormalizing_factor'] if detailed: - colors = yoy_info['usage_of_points'].map({0: 'red', 1: 'green', 2: plot_color}) + colors = yoy_info['usage_of_points'].map({0: 'red', 1: 'green', 3: 'green', 5: 'green', + 7: 'green', 9: 'green', 11: 'green' + }, na_action='ignore').fillna(plot_color) else: colors = plot_color ax1.scatter( @@ -483,8 +485,12 @@ def _bootstrap(x, percentile, reps): plot_color = 'tab:orange' if ci_color is None: ci_color = 'C0' + try: + roller = results_values.rolling(f'{rolling_days}d', min_periods=rolling_days//2) + except ValueError: # this occurs with degradation_yoy(multi_yoy=True). resample to daily mean + roller = results_values.resample('D').mean().rolling(f'{rolling_days}d', + min_periods=rolling_days//2) - roller = results_values.rolling(f'{rolling_days}d', min_periods=rolling_days//2) # unfortunately it seems that you can't return multiple values in the rolling.apply() kernel. # TODO: figure out some workaround to return both percentiles in a single pass if include_ci: diff --git a/rdtools/test/degradation_test.py b/rdtools/test/degradation_test.py index 4e92a1f1..ada944c4 100644 --- a/rdtools/test/degradation_test.py +++ b/rdtools/test/degradation_test.py @@ -202,6 +202,56 @@ def test_usage_of_points(self): self.test_corr_energy[input_freq]) self.assertTrue((np.sum(rd_result[2]['usage_of_points'])) == 1462) + def test_degradation_year_on_year_label_center(self): + ''' Test degradation_year_on_year with label="center". ''' + + funcName = sys._getframe().f_code.co_name + logging.debug('Running {}'.format(funcName)) + + # test YOY degradation calc with label='center' + input_freq = 'D' + rd_result = degradation_year_on_year( + self.test_corr_energy[input_freq], label='center') + self.assertAlmostEqual(rd_result[0], 100 * self.rd, places=1) + rd_result1 = degradation_year_on_year( + self.test_corr_energy[input_freq], label=None) + rd_result2 = degradation_year_on_year( + self.test_corr_energy[input_freq], label='right') + pd.testing.assert_index_equal(rd_result1[2]['YoY_values'].index, + rd_result2[2]['YoY_values'].index) + # 365/2 days difference between center and right label + assert (rd_result2[2]['YoY_values'].index - + rd_result[2]['YoY_values'].index).mean().days == \ + pytest.approx(183, abs=1) + + with pytest.raises(ValueError): + degradation_year_on_year(self.test_corr_energy[input_freq], + label='LEFT') + + def test_avg_timestamp_old_Pandas(self): + """Test the _avg_timestamp_old_Pandas function for correct averaging.""" + from rdtools.degradation import _avg_timestamp_old_Pandas + funcName = sys._getframe().f_code.co_name + logging.debug('Running {}'.format(funcName)) + dt = pd.Series(self.get_corr_energy(0, 'D').index[-4:].tz_localize('UTC'), + index=self.get_corr_energy(0, 'D').index[-4:].tz_localize('UTC')) + dt_right = pd.Series(self.get_corr_energy(0, 'D').index[-3:].tz_localize('UTC') + + pd.Timedelta(days=365), + index=self.get_corr_energy(0, 'D').index[-3:].tz_localize('UTC')) + # Expected result is the midpoint between each pair + expected = pd.Series([ + pd.NaT, + pd.Timestamp("2015-06-30 12:00:00"), + pd.Timestamp("2015-07-01 12:00:00"), + pd.Timestamp("2015-07-02 12:00:00")], + index=self.get_corr_energy(0, 'D').index[-4:], + name='averages', dtype='datetime64[ns, UTC]' + ).tz_localize('UTC') + + result = _avg_timestamp_old_Pandas(dt, dt_right).asfreq(freq='D') + + pd.testing.assert_series_equal(result, expected) + @pytest.mark.parametrize( "start,end,freq", diff --git a/requirements.txt b/requirements.txt index 387589a4..5ce3b27f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -20,7 +20,7 @@ python-dateutil==2.9.0 pytz==2024.1 arch==7.0.0 filterpy==1.4.5 -requests==2.32.3 +requests==2.32.4 retrying==1.3.4 scikit-learn==1.5.1 scipy==1.13.1 @@ -30,6 +30,6 @@ statsmodels==0.14.2 threadpoolctl==3.5.0 tomli==2.0.1 typing_extensions==4.12.2 -urllib3==2.2.2 +urllib3==2.5.0 xgboost==2.1.1 diff --git a/setup.py b/setup.py index 441b16c0..f38a2bec 100755 --- a/setup.py +++ b/setup.py @@ -36,9 +36,7 @@ "pytest-cov", "coverage", "flake8", - # nbval greater than 0.9.6 has a bug with semicolon - # https://github.com/computationalmodelling/nbval/issues/194 - "nbval<=0.9.6", + "nbval", "pytest-mock", ]