From 916fedda1cf7220368927959c2cc108ad1c12b5a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 23 Jan 2025 00:51:31 +0000 Subject: [PATCH 01/36] Bump notebook from 7.2.1 to 7.2.2 in /docs Bumps [notebook](https://github.com/jupyter/notebook) from 7.2.1 to 7.2.2. - [Release notes](https://github.com/jupyter/notebook/releases) - [Changelog](https://github.com/jupyter/notebook/blob/@jupyter-notebook/tree@7.2.2/CHANGELOG.md) - [Commits](https://github.com/jupyter/notebook/compare/@jupyter-notebook/tree@7.2.1...@jupyter-notebook/tree@7.2.2) --- updated-dependencies: - dependency-name: notebook dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- docs/notebook_requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/notebook_requirements.txt b/docs/notebook_requirements.txt index f3c5dc95..4398ec1f 100644 --- a/docs/notebook_requirements.txt +++ b/docs/notebook_requirements.txt @@ -29,7 +29,7 @@ nbclient==0.10.0 nbconvert==7.16.4 nbformat==5.10.4 nest-asyncio==1.6.0 -notebook==7.2.1 +notebook==7.2.2 numexpr==2.10.1 pandocfilters==1.5.1 parso==0.8.4 From 3e4be1997d8a1017590bbfc9dbedec46bfca7178 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 23 May 2025 19:34:16 +0000 Subject: [PATCH 02/36] Bump tornado from 6.4.2 to 6.5.1 in /docs Bumps [tornado](https://github.com/tornadoweb/tornado) from 6.4.2 to 6.5.1. - [Changelog](https://github.com/tornadoweb/tornado/blob/master/docs/releases.rst) - [Commits](https://github.com/tornadoweb/tornado/compare/v6.4.2...v6.5.1) --- updated-dependencies: - dependency-name: tornado dependency-version: 6.5.1 dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- docs/notebook_requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/notebook_requirements.txt b/docs/notebook_requirements.txt index 4398ec1f..81c4b6e2 100644 --- a/docs/notebook_requirements.txt +++ b/docs/notebook_requirements.txt @@ -48,7 +48,7 @@ soupsieve==2.6 terminado==0.18.1 testpath==0.6.0 tinycss2==1.3.0 -tornado==6.4.2 +tornado==6.5.1 traitlets==5.14.3 wcwidth==0.2.13 webencodings==0.5.1 From 0548eab705452989e84164682fa9e341f21c874f Mon Sep 17 00:00:00 2001 From: cdeline Date: Sun, 22 Jun 2025 14:37:42 -0600 Subject: [PATCH 03/36] add 'label' input option to `degradation_year_on_year`. Fixes #459 --- rdtools/degradation.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/rdtools/degradation.py b/rdtools/degradation.py index 1698b368..4c0689c0 100644 --- a/rdtools/degradation.py +++ b/rdtools/degradation.py @@ -179,7 +179,8 @@ def degradation_classical_decomposition(energy_normalized, def degradation_year_on_year(energy_normalized, recenter=True, exceedance_prob=95, confidence_level=68.2, - uncertainty_method='simple', block_length=30): + uncertainty_method='simple', block_length=30, + label='right'): ''' Estimate the trend of a timeseries using the year-on-year decomposition approach and calculate a Monte Carlo-derived confidence interval of slope. @@ -208,6 +209,8 @@ def degradation_year_on_year(energy_normalized, recenter=True, If `uncertainty_method` is 'circular_block', `block_length` determines the length of the blocks used in the circular block bootstrapping in number of days. Must be shorter than a third of the time series. + label : {'right', 'center'}, default 'right' + Which Year-on-Year slope edge to label. Returns ------- @@ -218,7 +221,8 @@ def degradation_year_on_year(energy_normalized, recenter=True, degradation rate estimate calc_info : dict - * `YoY_values` - pandas series of right-labeled year on year slopes + * `YoY_values` - pandas series of year on year slopes, either right + or center labeled, depending on the `label` parameter. * `renormalizing_factor` - float of value used to recenter data * `exceedance_level` - the degradation rate that was outperformed with probability of `exceedance_prob` @@ -233,6 +237,12 @@ def degradation_year_on_year(energy_normalized, recenter=True, energy_normalized.name = 'energy' energy_normalized.index.name = 'dt' + if label not in {None, "right", "center"}: + raise ValueError(f"Unsupported value {label} for `label`." + " Must be 'right' or 'center'.") + if label is None: + label = "right" + # Detect less than 2 years of data. This is complicated by two things: # - leap days muddle the precise meaning of "two years of data". # - can't just check the number of days between the first and last @@ -284,11 +294,15 @@ def degradation_year_on_year(energy_normalized, recenter=True, df['yoy'] = 100.0 * (df.energy - df.energy_right) / (df.time_diff_years) df.index = df.dt - yoy_result = df.yoy.dropna() - df_right = df.set_index(df.dt_right).drop_duplicates('dt_right') df['usage_of_points'] = df.yoy.notnull().astype(int).add( df_right.yoy.notnull().astype(int), fill_value=0) + df['dt_center'] = df[['dt', 'dt_right']].mean(axis=1) + if label == 'center': + df = df.set_index(df.dt_center) + df.index.name = 'dt' + + yoy_result = df.yoy.dropna() if not len(yoy_result): raise ValueError('no year-over-year aggregated data pairs found') From 9af563568c0facef64cce1da97842e0c4fb2a639 Mon Sep 17 00:00:00 2001 From: cdeline Date: Sun, 22 Jun 2025 15:55:03 -0600 Subject: [PATCH 04/36] add pytests and update changelog. --- docs/sphinx/source/changelog.rst | 1 + docs/sphinx/source/changelog/pending.rst | 16 +++++++++++++++ rdtools/test/degradation_test.py | 25 ++++++++++++++++++++++++ 3 files changed, 42 insertions(+) create mode 100644 docs/sphinx/source/changelog/pending.rst diff --git a/docs/sphinx/source/changelog.rst b/docs/sphinx/source/changelog.rst index fc3d805a..341cb307 100644 --- a/docs/sphinx/source/changelog.rst +++ b/docs/sphinx/source/changelog.rst @@ -1,5 +1,6 @@ RdTools Change Log ================== +.. include:: changelog/pending.rst .. include:: changelog/v3.0.0.rst .. include:: changelog/v2.1.8.rst .. include:: changelog/v2.1.7.rst diff --git a/docs/sphinx/source/changelog/pending.rst b/docs/sphinx/source/changelog/pending.rst new file mode 100644 index 00000000..815f82e5 --- /dev/null +++ b/docs/sphinx/source/changelog/pending.rst @@ -0,0 +1,16 @@ +************************* +v3.0.x (X, X, 2025) +************************* + +Enhancements +------------ +* :py:func:`~rdtools.degradation.degradation_year_on_year` has new parameter ``label=`` + to return the calc_info['YoY_values'] as either right labeled (default), or center labeled. + (:issue:`459`) + + + +Contributors +------------ +* Chris Deline (:ghuser:`cdeline`) + diff --git a/rdtools/test/degradation_test.py b/rdtools/test/degradation_test.py index 4e92a1f1..8c1f5881 100644 --- a/rdtools/test/degradation_test.py +++ b/rdtools/test/degradation_test.py @@ -202,6 +202,31 @@ def test_usage_of_points(self): self.test_corr_energy[input_freq]) self.assertTrue((np.sum(rd_result[2]['usage_of_points'])) == 1462) + def test_degradation_year_on_year_label_center(self): + ''' Test degradation_year_on_year with label="center". ''' + + funcName = sys._getframe().f_code.co_name + logging.debug('Running {}'.format(funcName)) + + # test YOY degradation calc with label='center' + input_freq = 'D' + rd_result = degradation_year_on_year( + self.test_corr_energy[input_freq], label='center') + self.assertAlmostEqual(rd_result[0], 100 * self.rd, places=1) + rd_result1 = degradation_year_on_year( + self.test_corr_energy[input_freq], label=None) + rd_result2 = degradation_year_on_year( + self.test_corr_energy[input_freq], label='right') + pd.testing.assert_index_equal(rd_result1[2]['YoY_values'].index, + rd_result2[2]['YoY_values'].index) + # 365/2 days difference between center and right label + self.assertAlmostEqual((rd_result2[2]['YoY_values'].index - + rd_result[2]['YoY_values'].index).mean(), + pd.Timedelta('183d'), + delta=pd.Timedelta('1d')) + with pytest.raises(ValueError): + degradation_year_on_year(self.test_corr_energy[input_freq], + label='LEFT') @pytest.mark.parametrize( "start,end,freq", From 89ccbabe468315ead750f6ce009e8ad9570440b8 Mon Sep 17 00:00:00 2001 From: cdeline Date: Sun, 22 Jun 2025 15:56:36 -0600 Subject: [PATCH 05/36] flake8 grumbles --- rdtools/test/degradation_test.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/rdtools/test/degradation_test.py b/rdtools/test/degradation_test.py index 8c1f5881..96a216c6 100644 --- a/rdtools/test/degradation_test.py +++ b/rdtools/test/degradation_test.py @@ -218,16 +218,17 @@ def test_degradation_year_on_year_label_center(self): rd_result2 = degradation_year_on_year( self.test_corr_energy[input_freq], label='right') pd.testing.assert_index_equal(rd_result1[2]['YoY_values'].index, - rd_result2[2]['YoY_values'].index) + rd_result2[2]['YoY_values'].index) # 365/2 days difference between center and right label self.assertAlmostEqual((rd_result2[2]['YoY_values'].index - rd_result[2]['YoY_values'].index).mean(), pd.Timedelta('183d'), delta=pd.Timedelta('1d')) with pytest.raises(ValueError): - degradation_year_on_year(self.test_corr_energy[input_freq], + degradation_year_on_year(self.test_corr_energy[input_freq], label='LEFT') + @pytest.mark.parametrize( "start,end,freq", [ From b32211f72fdd55fbca30e81af909f475ea5e4101 Mon Sep 17 00:00:00 2001 From: cdeline Date: Sun, 22 Jun 2025 18:48:55 -0600 Subject: [PATCH 06/36] Minor updates to setup.py (constrain scipy<1.16) and refactor degradation_test --- rdtools/test/degradation_test.py | 8 ++++---- setup.py | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/rdtools/test/degradation_test.py b/rdtools/test/degradation_test.py index 96a216c6..2f2a6ee3 100644 --- a/rdtools/test/degradation_test.py +++ b/rdtools/test/degradation_test.py @@ -220,10 +220,10 @@ def test_degradation_year_on_year_label_center(self): pd.testing.assert_index_equal(rd_result1[2]['YoY_values'].index, rd_result2[2]['YoY_values'].index) # 365/2 days difference between center and right label - self.assertAlmostEqual((rd_result2[2]['YoY_values'].index - - rd_result[2]['YoY_values'].index).mean(), - pd.Timedelta('183d'), - delta=pd.Timedelta('1d')) + assert (rd_result2[2]['YoY_values'].index - + rd_result[2]['YoY_values'].index).mean().days == \ + pytest.approx(183, abs=1) + with pytest.raises(ValueError): degradation_year_on_year(self.test_corr_energy[input_freq], label='LEFT') diff --git a/setup.py b/setup.py index 441b16c0..75549f6a 100755 --- a/setup.py +++ b/setup.py @@ -47,7 +47,8 @@ "numpy >= 1.22.4", "pandas >= 1.4.4", "statsmodels >= 0.13.5", - "scipy >= 1.8.1", + # statsmodels 0.14.4 is not able to handle the latest scipy + "scipy >= 1.8.1, <1.16.0", "h5py >= 3.7.0", "plotly>=4.0.0", "xgboost >= 1.6.0", From 8ad5dac505cc879814abb3df76cedb13cca76eb2 Mon Sep 17 00:00:00 2001 From: cdeline Date: Mon, 23 Jun 2025 14:35:36 -0600 Subject: [PATCH 07/36] Custom fix for Pandas < 2.0.0 which can't average two columns of timestamps. --- rdtools/degradation.py | 52 +++++++++++++++++++++++++++++++++++++++++- setup.py | 2 +- 2 files changed, 52 insertions(+), 2 deletions(-) diff --git a/rdtools/degradation.py b/rdtools/degradation.py index 4c0689c0..1a4ede15 100644 --- a/rdtools/degradation.py +++ b/rdtools/degradation.py @@ -297,7 +297,12 @@ def degradation_year_on_year(energy_normalized, recenter=True, df_right = df.set_index(df.dt_right).drop_duplicates('dt_right') df['usage_of_points'] = df.yoy.notnull().astype(int).add( df_right.yoy.notnull().astype(int), fill_value=0) - df['dt_center'] = df[['dt', 'dt_right']].mean(axis=1) + if pd.__version__ < '2.0.0': + # For old Pandas versions < 2.0.0, time columns cannot be averaged + # with each other, so we use a custom function to calculate center label + df['dt_center'] = _avg_timestamp_old_Pandas(df.dt, df.dt_right) + else: + df['dt_center'] = pd.to_datetime(df[['dt', 'dt_right']].mean(axis=1)) if label == 'center': df = df.set_index(df.dt_center) df.index.name = 'dt' @@ -370,6 +375,51 @@ def degradation_year_on_year(energy_normalized, recenter=True, return Rd_pct +def _avg_timestamp_old_Pandas(dt, dt_right): + ''' + For old Pandas versions < 2.0.0, time columns cannot be averaged + together. From https://stackoverflow.com/questions/57812300/ + python-pandas-to-calculate-mean-of-datetime-of-multiple-columns + + Parameters + ---------- + dt : pandas.Series + First series with datetime values + dt_right : pandas.Series + Second series with datetime values. + + Returns + ------- + pandas.Series + Series with the average timestamp of df1 and df2. + ''' + import time + import datetime + + temp_df = pd.DataFrame({'dt' : dt.dt.tz_localize(None), + 'dt_right' : dt_right.dt.tz_localize(None) + }).tz_localize(None) + + # conversion from dates to seconds since epoch (unix time) + def to_unix(s): + if type(s) is pd.Timestamp: + return time.mktime(s.date().timetuple()) + else: + return pd.NaT + + # sum the seconds since epoch, calculate average, and convert back to readable date + averages = [] + for index, row in temp_df.iterrows(): + unix = [to_unix(i) for i in row] + try: + average = sum(unix) / len(unix) + averages.append(datetime.datetime.utcfromtimestamp(average).strftime('%Y-%m-%d')) + except TypeError: + averages.append(pd.NaT) + temp_df['averages'] = averages + return temp_df['averages'] + + def _mk_test(x, alpha=0.05): ''' Mann-Kendall test of significance for trend (used in classical diff --git a/setup.py b/setup.py index 75549f6a..ed56d3cc 100755 --- a/setup.py +++ b/setup.py @@ -48,7 +48,7 @@ "pandas >= 1.4.4", "statsmodels >= 0.13.5", # statsmodels 0.14.4 is not able to handle the latest scipy - "scipy >= 1.8.1, <1.16.0", + "scipy >= 1.8.1, <1.16.0", "h5py >= 3.7.0", "plotly>=4.0.0", "xgboost >= 1.6.0", From cf5ff77201ed0501f04b3f6dede27746734d7534 Mon Sep 17 00:00:00 2001 From: cdeline Date: Mon, 23 Jun 2025 14:39:45 -0600 Subject: [PATCH 08/36] flake8 grumbles --- rdtools/test/degradation_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rdtools/test/degradation_test.py b/rdtools/test/degradation_test.py index 2f2a6ee3..9c00fd80 100644 --- a/rdtools/test/degradation_test.py +++ b/rdtools/test/degradation_test.py @@ -221,8 +221,8 @@ def test_degradation_year_on_year_label_center(self): rd_result2[2]['YoY_values'].index) # 365/2 days difference between center and right label assert (rd_result2[2]['YoY_values'].index - - rd_result[2]['YoY_values'].index).mean().days == \ - pytest.approx(183, abs=1) + rd_result[2]['YoY_values'].index).mean().days == \ + pytest.approx(183, abs=1) with pytest.raises(ValueError): degradation_year_on_year(self.test_corr_energy[input_freq], From 424fc7d991d4239dba202c812c1d26700ba1f8f4 Mon Sep 17 00:00:00 2001 From: cdeline Date: Mon, 23 Jun 2025 15:47:31 -0600 Subject: [PATCH 09/36] statsmodels 0.14.4 is not able to handle the latest scipy. --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 441b16c0..ed56d3cc 100755 --- a/setup.py +++ b/setup.py @@ -47,7 +47,8 @@ "numpy >= 1.22.4", "pandas >= 1.4.4", "statsmodels >= 0.13.5", - "scipy >= 1.8.1", + # statsmodels 0.14.4 is not able to handle the latest scipy + "scipy >= 1.8.1, <1.16.0", "h5py >= 3.7.0", "plotly>=4.0.0", "xgboost >= 1.6.0", From ea8854e3c0ae33160c9307cb9e128825d52d58f1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 24 Jun 2025 20:48:39 +0000 Subject: [PATCH 10/36] Bump jinja2 from 3.1.5 to 3.1.6 in /docs Bumps [jinja2](https://github.com/pallets/jinja) from 3.1.5 to 3.1.6. - [Release notes](https://github.com/pallets/jinja/releases) - [Changelog](https://github.com/pallets/jinja/blob/main/CHANGES.rst) - [Commits](https://github.com/pallets/jinja/compare/3.1.5...3.1.6) --- updated-dependencies: - dependency-name: jinja2 dependency-version: 3.1.6 dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- docs/notebook_requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/notebook_requirements.txt b/docs/notebook_requirements.txt index 4398ec1f..0cee77e6 100644 --- a/docs/notebook_requirements.txt +++ b/docs/notebook_requirements.txt @@ -15,7 +15,7 @@ ipython==8.26.0 ipython-genutils==0.2.0 ipywidgets==8.1.3 jedi==0.19.1 -Jinja2==3.1.5 +Jinja2==3.1.6 jsonschema==4.23.0 jupyter==1.0.0 jupyter-client==8.6.2 From f4b77bb04a1444f530c9d920e122430b7b377b32 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 24 Jun 2025 20:50:34 +0000 Subject: [PATCH 11/36] Bump requests from 2.32.3 to 2.32.4 Bumps [requests](https://github.com/psf/requests) from 2.32.3 to 2.32.4. - [Release notes](https://github.com/psf/requests/releases) - [Changelog](https://github.com/psf/requests/blob/main/HISTORY.md) - [Commits](https://github.com/psf/requests/compare/v2.32.3...v2.32.4) --- updated-dependencies: - dependency-name: requests dependency-version: 2.32.4 dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 387589a4..d6f4a47b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -20,7 +20,7 @@ python-dateutil==2.9.0 pytz==2024.1 arch==7.0.0 filterpy==1.4.5 -requests==2.32.3 +requests==2.32.4 retrying==1.3.4 scikit-learn==1.5.1 scipy==1.13.1 From e2c387a95891c2f2d0058422024aad5f3c190a7a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 24 Jun 2025 20:51:19 +0000 Subject: [PATCH 12/36] Bump urllib3 from 2.2.2 to 2.5.0 Bumps [urllib3](https://github.com/urllib3/urllib3) from 2.2.2 to 2.5.0. - [Release notes](https://github.com/urllib3/urllib3/releases) - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) - [Commits](https://github.com/urllib3/urllib3/compare/2.2.2...2.5.0) --- updated-dependencies: - dependency-name: urllib3 dependency-version: 2.5.0 dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 387589a4..9a0ddd05 100644 --- a/requirements.txt +++ b/requirements.txt @@ -30,6 +30,6 @@ statsmodels==0.14.2 threadpoolctl==3.5.0 tomli==2.0.1 typing_extensions==4.12.2 -urllib3==2.2.2 +urllib3==2.5.0 xgboost==2.1.1 From 1ff743e7d6d065aaedab61b0e28dec4d2303c443 Mon Sep 17 00:00:00 2001 From: cdeline Date: Wed, 25 Jun 2025 14:29:58 -0600 Subject: [PATCH 13/36] keep TZ-aware timestamps. Update pytests to specifically test _avg_timestamp_old_Pandas --- rdtools/degradation.py | 7 ++++--- rdtools/test/degradation_test.py | 22 ++++++++++++++++++++++ 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/rdtools/degradation.py b/rdtools/degradation.py index 1a4ede15..485c0465 100644 --- a/rdtools/degradation.py +++ b/rdtools/degradation.py @@ -398,7 +398,7 @@ def _avg_timestamp_old_Pandas(dt, dt_right): temp_df = pd.DataFrame({'dt' : dt.dt.tz_localize(None), 'dt_right' : dt_right.dt.tz_localize(None) - }).tz_localize(None) + }) # conversion from dates to seconds since epoch (unix time) def to_unix(s): @@ -413,11 +413,12 @@ def to_unix(s): unix = [to_unix(i) for i in row] try: average = sum(unix) / len(unix) - averages.append(datetime.datetime.utcfromtimestamp(average).strftime('%Y-%m-%d')) + #averages.append(datetime.datetime.utcfromtimestamp(average).strftime('%Y-%m-%d')) + averages.append(pd.to_datetime(average, unit='s')) except TypeError: averages.append(pd.NaT) temp_df['averages'] = averages - return temp_df['averages'] + return temp_df['averages'].dt.tz_localize(dt.dt.tz) def _mk_test(x, alpha=0.05): diff --git a/rdtools/test/degradation_test.py b/rdtools/test/degradation_test.py index 9c00fd80..0a1291b2 100644 --- a/rdtools/test/degradation_test.py +++ b/rdtools/test/degradation_test.py @@ -227,6 +227,28 @@ def test_degradation_year_on_year_label_center(self): with pytest.raises(ValueError): degradation_year_on_year(self.test_corr_energy[input_freq], label='LEFT') + + def test_avg_timestamp_old_Pandas(self): + """Test the _avg_timestamp_old_Pandas function for correct averaging.""" + from rdtools.degradation import _avg_timestamp_old_Pandas + funcName = sys._getframe().f_code.co_name + logging.debug('Running {}'.format(funcName)) + dt = pd.Series(self.test_corr_energy['D'].index[-3:], index = self.test_corr_energy['D'].index[-3:]) + dt_right = pd.Series(self.test_corr_energy['D'].index[-3:]+ + pd.Timedelta(days=365), index = self.test_corr_energy['D'].index[-3:]) + # Expected result is the midpoint between each pair + expected = pd.Series([ + pd.Timestamp("2015-06-30 19:00:00"), + pd.Timestamp("2015-07-01 19:00:00"), + pd.Timestamp("2015-07-02 19:00:00")], + index = self.test_corr_energy['D'].index[-3:], + name = 'averages' + ) + + result = _avg_timestamp_old_Pandas(dt, dt_right) + print(result) + print(expected) + pd.testing.assert_series_equal(result, expected) @pytest.mark.parametrize( From bc86af61341e1099bc588759362bad21fbdc0ad8 Mon Sep 17 00:00:00 2001 From: cdeline Date: Wed, 25 Jun 2025 15:07:00 -0600 Subject: [PATCH 14/36] flake8 grumbles --- rdtools/degradation.py | 3 +-- rdtools/test/degradation_test.py | 13 +++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/rdtools/degradation.py b/rdtools/degradation.py index 485c0465..ea3ec9e3 100644 --- a/rdtools/degradation.py +++ b/rdtools/degradation.py @@ -394,7 +394,6 @@ def _avg_timestamp_old_Pandas(dt, dt_right): Series with the average timestamp of df1 and df2. ''' import time - import datetime temp_df = pd.DataFrame({'dt' : dt.dt.tz_localize(None), 'dt_right' : dt_right.dt.tz_localize(None) @@ -413,7 +412,7 @@ def to_unix(s): unix = [to_unix(i) for i in row] try: average = sum(unix) / len(unix) - #averages.append(datetime.datetime.utcfromtimestamp(average).strftime('%Y-%m-%d')) + # averages.append(datetime.datetime.utcfromtimestamp(average).strftime('%Y-%m-%d')) averages.append(pd.to_datetime(average, unit='s')) except TypeError: averages.append(pd.NaT) diff --git a/rdtools/test/degradation_test.py b/rdtools/test/degradation_test.py index 0a1291b2..4d437c72 100644 --- a/rdtools/test/degradation_test.py +++ b/rdtools/test/degradation_test.py @@ -227,22 +227,23 @@ def test_degradation_year_on_year_label_center(self): with pytest.raises(ValueError): degradation_year_on_year(self.test_corr_energy[input_freq], label='LEFT') - + def test_avg_timestamp_old_Pandas(self): """Test the _avg_timestamp_old_Pandas function for correct averaging.""" from rdtools.degradation import _avg_timestamp_old_Pandas funcName = sys._getframe().f_code.co_name logging.debug('Running {}'.format(funcName)) - dt = pd.Series(self.test_corr_energy['D'].index[-3:], index = self.test_corr_energy['D'].index[-3:]) - dt_right = pd.Series(self.test_corr_energy['D'].index[-3:]+ - pd.Timedelta(days=365), index = self.test_corr_energy['D'].index[-3:]) + dt = pd.Series(self.test_corr_energy['D'].index[-3:], + index=self.test_corr_energy['D'].index[-3:]) + dt_right = pd.Series(self.test_corr_energy['D'].index[-3:] + + pd.Timedelta(days=365), index=self.test_corr_energy['D'].index[-3:]) # Expected result is the midpoint between each pair expected = pd.Series([ pd.Timestamp("2015-06-30 19:00:00"), pd.Timestamp("2015-07-01 19:00:00"), pd.Timestamp("2015-07-02 19:00:00")], - index = self.test_corr_energy['D'].index[-3:], - name = 'averages' + index=self.test_corr_energy['D'].index[-3:], + name='averages' ) result = _avg_timestamp_old_Pandas(dt, dt_right) From ae080fd8dbee3c2aa16b504ceede00965ed76eec Mon Sep 17 00:00:00 2001 From: cdeline Date: Wed, 25 Jun 2025 15:41:10 -0600 Subject: [PATCH 15/36] try to UTC localize the pytest... --- rdtools/degradation.py | 4 ++-- rdtools/test/degradation_test.py | 20 ++++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/rdtools/degradation.py b/rdtools/degradation.py index ea3ec9e3..aba83681 100644 --- a/rdtools/degradation.py +++ b/rdtools/degradation.py @@ -397,7 +397,7 @@ def _avg_timestamp_old_Pandas(dt, dt_right): temp_df = pd.DataFrame({'dt' : dt.dt.tz_localize(None), 'dt_right' : dt_right.dt.tz_localize(None) - }) + }).tz_localize(None) # conversion from dates to seconds since epoch (unix time) def to_unix(s): @@ -417,7 +417,7 @@ def to_unix(s): except TypeError: averages.append(pd.NaT) temp_df['averages'] = averages - return temp_df['averages'].dt.tz_localize(dt.dt.tz) + return (temp_df['averages'].tz_localize(dt.dt.tz)).dt.tz_localize(dt.dt.tz) def _mk_test(x, alpha=0.05): diff --git a/rdtools/test/degradation_test.py b/rdtools/test/degradation_test.py index 4d437c72..20cf4103 100644 --- a/rdtools/test/degradation_test.py +++ b/rdtools/test/degradation_test.py @@ -233,18 +233,18 @@ def test_avg_timestamp_old_Pandas(self): from rdtools.degradation import _avg_timestamp_old_Pandas funcName = sys._getframe().f_code.co_name logging.debug('Running {}'.format(funcName)) - dt = pd.Series(self.test_corr_energy['D'].index[-3:], - index=self.test_corr_energy['D'].index[-3:]) - dt_right = pd.Series(self.test_corr_energy['D'].index[-3:] + - pd.Timedelta(days=365), index=self.test_corr_energy['D'].index[-3:]) + dt = pd.Series(self.get_corr_energy(0,'D').index[-3:].tz_localize('UTC'), + index=self.get_corr_energy(0,'D').index[-3:].tz_localize('UTC')) + dt_right = pd.Series(self.get_corr_energy(0,'D').index[-3:].tz_localize('UTC') + + pd.Timedelta(days=365), index=self.get_corr_energy(0,'D').index[-3:].tz_localize('UTC')) # Expected result is the midpoint between each pair expected = pd.Series([ - pd.Timestamp("2015-06-30 19:00:00"), - pd.Timestamp("2015-07-01 19:00:00"), - pd.Timestamp("2015-07-02 19:00:00")], - index=self.test_corr_energy['D'].index[-3:], - name='averages' - ) + pd.Timestamp("2015-06-30 12:00:00"), + pd.Timestamp("2015-07-01 12:00:00"), + pd.Timestamp("2015-07-02 12:00:00")], + index=self.get_corr_energy(0,'D').index[-3:], + name='averages', dtype='datetime64[ns, UTC]' + ).tz_localize('UTC') result = _avg_timestamp_old_Pandas(dt, dt_right) print(result) From e448560015d9b39faef973777c1c7baa3eb734d1 Mon Sep 17 00:00:00 2001 From: cdeline Date: Wed, 25 Jun 2025 15:57:19 -0600 Subject: [PATCH 16/36] Add .asfreq() to get pytests to agree --- rdtools/degradation.py | 3 ++- rdtools/test/degradation_test.py | 13 +++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/rdtools/degradation.py b/rdtools/degradation.py index aba83681..741a16de 100644 --- a/rdtools/degradation.py +++ b/rdtools/degradation.py @@ -409,7 +409,8 @@ def to_unix(s): # sum the seconds since epoch, calculate average, and convert back to readable date averages = [] for index, row in temp_df.iterrows(): - unix = [to_unix(i) for i in row] + # unix = [to_unix(i) for i in row] + unix = [pd.Timestamp(i).timestamp() for i in row] try: average = sum(unix) / len(unix) # averages.append(datetime.datetime.utcfromtimestamp(average).strftime('%Y-%m-%d')) diff --git a/rdtools/test/degradation_test.py b/rdtools/test/degradation_test.py index 20cf4103..b2f8df31 100644 --- a/rdtools/test/degradation_test.py +++ b/rdtools/test/degradation_test.py @@ -233,20 +233,21 @@ def test_avg_timestamp_old_Pandas(self): from rdtools.degradation import _avg_timestamp_old_Pandas funcName = sys._getframe().f_code.co_name logging.debug('Running {}'.format(funcName)) - dt = pd.Series(self.get_corr_energy(0,'D').index[-3:].tz_localize('UTC'), - index=self.get_corr_energy(0,'D').index[-3:].tz_localize('UTC')) - dt_right = pd.Series(self.get_corr_energy(0,'D').index[-3:].tz_localize('UTC') + - pd.Timedelta(days=365), index=self.get_corr_energy(0,'D').index[-3:].tz_localize('UTC')) + dt = pd.Series(self.get_corr_energy(0, 'D').index[-3:].tz_localize('UTC'), + index=self.get_corr_energy(0, 'D').index[-3:].tz_localize('UTC')) + dt_right = pd.Series(self.get_corr_energy(0, 'D').index[-3:].tz_localize('UTC') + + pd.Timedelta(days=365), + index=self.get_corr_energy(0, 'D').index[-3:].tz_localize('UTC')) # Expected result is the midpoint between each pair expected = pd.Series([ pd.Timestamp("2015-06-30 12:00:00"), pd.Timestamp("2015-07-01 12:00:00"), pd.Timestamp("2015-07-02 12:00:00")], - index=self.get_corr_energy(0,'D').index[-3:], + index=self.get_corr_energy(0, 'D').index[-3:], name='averages', dtype='datetime64[ns, UTC]' ).tz_localize('UTC') - result = _avg_timestamp_old_Pandas(dt, dt_right) + result = _avg_timestamp_old_Pandas(dt, dt_right).asfreq(freq='D') print(result) print(expected) pd.testing.assert_series_equal(result, expected) From fd62ea57fdda115b7c472c6bc7837925f02ee28b Mon Sep 17 00:00:00 2001 From: cdeline Date: Wed, 25 Jun 2025 16:45:56 -0600 Subject: [PATCH 17/36] switch to calendar.timegm to hopefully remove TZ issues.. --- rdtools/degradation.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rdtools/degradation.py b/rdtools/degradation.py index 741a16de..058d7ee4 100644 --- a/rdtools/degradation.py +++ b/rdtools/degradation.py @@ -393,7 +393,7 @@ def _avg_timestamp_old_Pandas(dt, dt_right): pandas.Series Series with the average timestamp of df1 and df2. ''' - import time + import calendar temp_df = pd.DataFrame({'dt' : dt.dt.tz_localize(None), 'dt_right' : dt_right.dt.tz_localize(None) @@ -402,15 +402,15 @@ def _avg_timestamp_old_Pandas(dt, dt_right): # conversion from dates to seconds since epoch (unix time) def to_unix(s): if type(s) is pd.Timestamp: - return time.mktime(s.date().timetuple()) + return calendar.timegm(s.timetuple()) else: return pd.NaT # sum the seconds since epoch, calculate average, and convert back to readable date averages = [] for index, row in temp_df.iterrows(): - # unix = [to_unix(i) for i in row] - unix = [pd.Timestamp(i).timestamp() for i in row] + unix = [to_unix(i) for i in row] + # unix = [pd.Timestamp(i).timestamp() for i in row] try: average = sum(unix) / len(unix) # averages.append(datetime.datetime.utcfromtimestamp(average).strftime('%Y-%m-%d')) From 03e094e020bceb4f87aa1e26f5201f63ca59ef34 Mon Sep 17 00:00:00 2001 From: cdeline Date: Mon, 7 Jul 2025 13:01:19 -0600 Subject: [PATCH 18/36] try setup.py now that statsmodels has a new release. --- setup.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/setup.py b/setup.py index ed56d3cc..441b16c0 100755 --- a/setup.py +++ b/setup.py @@ -47,8 +47,7 @@ "numpy >= 1.22.4", "pandas >= 1.4.4", "statsmodels >= 0.13.5", - # statsmodels 0.14.4 is not able to handle the latest scipy - "scipy >= 1.8.1, <1.16.0", + "scipy >= 1.8.1", "h5py >= 3.7.0", "plotly>=4.0.0", "xgboost >= 1.6.0", From c220fadba9622eb97be5bd781a142b67082f6b83 Mon Sep 17 00:00:00 2001 From: cdeline Date: Tue, 5 Aug 2025 10:45:02 -0600 Subject: [PATCH 19/36] update _right dt labels to correct _left labels in degradation_year_on_year --- rdtools/degradation.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/rdtools/degradation.py b/rdtools/degradation.py index 058d7ee4..e0299649 100644 --- a/rdtools/degradation.py +++ b/rdtools/degradation.py @@ -286,23 +286,23 @@ def degradation_year_on_year(energy_normalized, recenter=True, df = pd.merge_asof(energy_normalized[['dt', 'energy']], energy_normalized.sort_values('dt_shifted'), left_on='dt', right_on='dt_shifted', - suffixes=['', '_right'], + suffixes=['', '_left'], tolerance=pd.Timedelta('8D') ) - df['time_diff_years'] = (df.dt - df.dt_right) / pd.Timedelta('365d') - df['yoy'] = 100.0 * (df.energy - df.energy_right) / (df.time_diff_years) + df['time_diff_years'] = (df.dt - df.dt_left) / pd.Timedelta('365d') + df['yoy'] = 100.0 * (df.energy - df.energy_left) / (df.time_diff_years) df.index = df.dt - df_right = df.set_index(df.dt_right).drop_duplicates('dt_right') + df_left = df.set_index(df.dt_left).drop_duplicates('dt_left') df['usage_of_points'] = df.yoy.notnull().astype(int).add( - df_right.yoy.notnull().astype(int), fill_value=0) + df_left.yoy.notnull().astype(int), fill_value=0) if pd.__version__ < '2.0.0': # For old Pandas versions < 2.0.0, time columns cannot be averaged # with each other, so we use a custom function to calculate center label - df['dt_center'] = _avg_timestamp_old_Pandas(df.dt, df.dt_right) + df['dt_center'] = _avg_timestamp_old_Pandas(df.dt, df.dt_left) else: - df['dt_center'] = pd.to_datetime(df[['dt', 'dt_right']].mean(axis=1)) + df['dt_center'] = pd.to_datetime(df[['dt', 'dt_left']].mean(axis=1)) if label == 'center': df = df.set_index(df.dt_center) df.index.name = 'dt' @@ -375,7 +375,7 @@ def degradation_year_on_year(energy_normalized, recenter=True, return Rd_pct -def _avg_timestamp_old_Pandas(dt, dt_right): +def _avg_timestamp_old_Pandas(dt, dt_left): ''' For old Pandas versions < 2.0.0, time columns cannot be averaged together. From https://stackoverflow.com/questions/57812300/ @@ -385,7 +385,7 @@ def _avg_timestamp_old_Pandas(dt, dt_right): ---------- dt : pandas.Series First series with datetime values - dt_right : pandas.Series + dt_left : pandas.Series Second series with datetime values. Returns @@ -396,7 +396,7 @@ def _avg_timestamp_old_Pandas(dt, dt_right): import calendar temp_df = pd.DataFrame({'dt' : dt.dt.tz_localize(None), - 'dt_right' : dt_right.dt.tz_localize(None) + 'dt_left' : dt_left.dt.tz_localize(None) }).tz_localize(None) # conversion from dates to seconds since epoch (unix time) From 0464c256a552adac096988744fea0881e0cc1650 Mon Sep 17 00:00:00 2001 From: cdeline Date: Wed, 6 Aug 2025 15:13:48 -0600 Subject: [PATCH 20/36] update _avg_timestamp_old_Pandas to allow for numeric index instead of timestamp --- rdtools/degradation.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/rdtools/degradation.py b/rdtools/degradation.py index e0299649..0339f7b3 100644 --- a/rdtools/degradation.py +++ b/rdtools/degradation.py @@ -395,9 +395,15 @@ def _avg_timestamp_old_Pandas(dt, dt_left): ''' import calendar - temp_df = pd.DataFrame({'dt' : dt.dt.tz_localize(None), + # allow for numeric index + try: + temp_df = pd.DataFrame({'dt' : dt.dt.tz_localize(None), + 'dt_left' : dt_left.dt.tz_localize(None) + }).tz_localize(None) + except TypeError: # in case numeric index passed + temp_df = pd.DataFrame({'dt' : dt.dt.tz_localize(None), 'dt_left' : dt_left.dt.tz_localize(None) - }).tz_localize(None) + }) # conversion from dates to seconds since epoch (unix time) def to_unix(s): @@ -418,7 +424,13 @@ def to_unix(s): except TypeError: averages.append(pd.NaT) temp_df['averages'] = averages - return (temp_df['averages'].tz_localize(dt.dt.tz)).dt.tz_localize(dt.dt.tz) + + try: + dt_center = (temp_df['averages'].tz_localize(dt.dt.tz)).dt.tz_localize(dt.dt.tz) + except TypeError: # not a timeseries index + dt_center = (temp_df['averages']).dt.tz_localize(dt.dt.tz) + + return dt_center def _mk_test(x, alpha=0.05): From 644f4a8170bbd1908b964faa134d739bb767bfcc Mon Sep 17 00:00:00 2001 From: cdeline Date: Wed, 6 Aug 2025 15:16:32 -0600 Subject: [PATCH 21/36] add left label option to degradation_year_on_year --- rdtools/degradation.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rdtools/degradation.py b/rdtools/degradation.py index 0339f7b3..9c47b25f 100644 --- a/rdtools/degradation.py +++ b/rdtools/degradation.py @@ -237,9 +237,9 @@ def degradation_year_on_year(energy_normalized, recenter=True, energy_normalized.name = 'energy' energy_normalized.index.name = 'dt' - if label not in {None, "right", "center"}: + if label not in {None, "right", "left", "center"}: raise ValueError(f"Unsupported value {label} for `label`." - " Must be 'right' or 'center'.") + " Must be 'right', 'left' or 'center'.") if label is None: label = "right" @@ -424,7 +424,7 @@ def to_unix(s): except TypeError: averages.append(pd.NaT) temp_df['averages'] = averages - + try: dt_center = (temp_df['averages'].tz_localize(dt.dt.tz)).dt.tz_localize(dt.dt.tz) except TypeError: # not a timeseries index From 0957ade6986aaf2ea54ec8318a2f2bdbc08b34b9 Mon Sep 17 00:00:00 2001 From: cdeline Date: Wed, 6 Aug 2025 16:15:25 -0600 Subject: [PATCH 22/36] update degradation_year_on_year, index set to either left, center or right. Consistent with #394 - multi_yoy --- rdtools/degradation.py | 59 ++++++++++++++++++++++++++++-------------- 1 file changed, 39 insertions(+), 20 deletions(-) diff --git a/rdtools/degradation.py b/rdtools/degradation.py index 9c47b25f..9382b32c 100644 --- a/rdtools/degradation.py +++ b/rdtools/degradation.py @@ -292,33 +292,52 @@ def degradation_year_on_year(energy_normalized, recenter=True, df['time_diff_years'] = (df.dt - df.dt_left) / pd.Timedelta('365d') df['yoy'] = 100.0 * (df.energy - df.energy_left) / (df.time_diff_years) - df.index = df.dt + # df.index = df.dt + + yoy_result = df.yoy.dropna() + + if not len(yoy_result): + raise ValueError('no year-over-year aggregated data pairs found') + + Rd_pct = yoy_result.median() + + YoY_times = df.dropna(subset=['yoy'], inplace=False) + + # calculate usage of points. + df_left = YoY_times.set_index(YoY_times.dt_left) # .drop_duplicates('dt_left') + df_right = YoY_times.set_index(YoY_times.dt) # .drop_duplicates('dt') + usage_of_points = df_right.yoy.notnull().astype(int).add( + df_left.yoy.notnull().astype(int), + fill_value=0).groupby(level=0).sum() + usage_of_points.name = 'usage_of_points' - df_left = df.set_index(df.dt_left).drop_duplicates('dt_left') - df['usage_of_points'] = df.yoy.notnull().astype(int).add( - df_left.yoy.notnull().astype(int), fill_value=0) if pd.__version__ < '2.0.0': # For old Pandas versions < 2.0.0, time columns cannot be averaged # with each other, so we use a custom function to calculate center label - df['dt_center'] = _avg_timestamp_old_Pandas(df.dt, df.dt_left) + YoY_times['dt_center'] = _avg_timestamp_old_Pandas(YoY_times['dt'], YoY_times['dt_left']) else: - df['dt_center'] = pd.to_datetime(df[['dt', 'dt_left']].mean(axis=1)) - if label == 'center': - df = df.set_index(df.dt_center) - df.index.name = 'dt' + YoY_times['dt_center'] = pd.to_datetime(YoY_times[['dt', 'dt_left']].mean(axis=1)) + # if label == 'center': + # df = df.set_index(df.dt_center) + # df.index.name = 'dt' - yoy_result = df.yoy.dropna() + YoY_times = YoY_times[['dt', 'dt_center', 'dt_left']] + YoY_times = YoY_times.rename(columns={'dt': 'dt_right'}) - if not len(yoy_result): - raise ValueError('no year-over-year aggregated data pairs found') + YoY_times.set_index(YoY_times[f'dt_{label}'], inplace=True) + # YoY_times = YoY_times.rename_axis(None, axis=1) + YoY_times.index.name = None + yoy_result.index = YoY_times[f'dt_{label}'] + yoy_result.index.name = None - Rd_pct = yoy_result.median() + energy_normalized = energy_normalized.merge(usage_of_points, how='left', left_on='dt', + right_index=True, left_index=False).fillna(0.0) if uncertainty_method == 'simple': # If we need the full results calc_info = { 'YoY_values': yoy_result, 'renormalizing_factor': renorm, - 'usage_of_points': df['usage_of_points'] + 'usage_of_points': energy_normalized.set_index('dt')['usage_of_points'] } # bootstrap to determine 68% CI and exceedance probability @@ -366,13 +385,13 @@ def degradation_year_on_year(energy_normalized, recenter=True, calc_info = { 'renormalizing_factor': renorm, 'exceedance_level': exceedance_level, - 'usage_of_points': df['usage_of_points'], + 'usage_of_points': energy_normalized.set_index('dt')['usage_of_points'], 'bootstrap_rates': bootstrap_rates} return (Rd_pct, Rd_CI, calc_info) else: # If we do not need confidence intervals and exceedance level - return Rd_pct + return (Rd_pct, None, calc_info) def _avg_timestamp_old_Pandas(dt, dt_left): @@ -400,10 +419,10 @@ def _avg_timestamp_old_Pandas(dt, dt_left): temp_df = pd.DataFrame({'dt' : dt.dt.tz_localize(None), 'dt_left' : dt_left.dt.tz_localize(None) }).tz_localize(None) - except TypeError: # in case numeric index passed + except TypeError: # in case numeric index passed temp_df = pd.DataFrame({'dt' : dt.dt.tz_localize(None), - 'dt_left' : dt_left.dt.tz_localize(None) - }) + 'dt_left' : dt_left.dt.tz_localize(None) + }) # conversion from dates to seconds since epoch (unix time) def to_unix(s): @@ -429,7 +448,7 @@ def to_unix(s): dt_center = (temp_df['averages'].tz_localize(dt.dt.tz)).dt.tz_localize(dt.dt.tz) except TypeError: # not a timeseries index dt_center = (temp_df['averages']).dt.tz_localize(dt.dt.tz) - + return dt_center From c624a8c9c04f228a52404eb7901ee579cfe12851 Mon Sep 17 00:00:00 2001 From: cdeline Date: Wed, 6 Aug 2025 16:28:00 -0600 Subject: [PATCH 23/36] update return for default = none uncertainty option --- rdtools/degradation.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/rdtools/degradation.py b/rdtools/degradation.py index 9382b32c..3260a007 100644 --- a/rdtools/degradation.py +++ b/rdtools/degradation.py @@ -391,7 +391,10 @@ def degradation_year_on_year(energy_normalized, recenter=True, return (Rd_pct, Rd_CI, calc_info) else: # If we do not need confidence intervals and exceedance level - return (Rd_pct, None, calc_info) + return (Rd_pct, None, { + 'YoY_values': yoy_result, + 'usage_of_points': energy_normalized.set_index('dt')['usage_of_points'] + }) def _avg_timestamp_old_Pandas(dt, dt_left): From 3623edfd0ba4da8fcfd6ead28d3577ee554fc782 Mon Sep 17 00:00:00 2001 From: cdeline Date: Fri, 8 Aug 2025 16:11:34 -0600 Subject: [PATCH 24/36] degradation_year_on_year - go back to single return when uncertainty_value = None to avoid breaking pytests. --- rdtools/degradation.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/rdtools/degradation.py b/rdtools/degradation.py index 3260a007..85d1ceb9 100644 --- a/rdtools/degradation.py +++ b/rdtools/degradation.py @@ -301,7 +301,7 @@ def degradation_year_on_year(energy_normalized, recenter=True, Rd_pct = yoy_result.median() - YoY_times = df.dropna(subset=['yoy'], inplace=False) + YoY_times = df.dropna(subset=['yoy'], inplace=False).copy() # calculate usage of points. df_left = YoY_times.set_index(YoY_times.dt_left) # .drop_duplicates('dt_left') @@ -391,10 +391,13 @@ def degradation_year_on_year(energy_normalized, recenter=True, return (Rd_pct, Rd_CI, calc_info) else: # If we do not need confidence intervals and exceedance level + """ # TODO: return tuple just like all other cases. Issue: test_bootstrap_module return (Rd_pct, None, { 'YoY_values': yoy_result, 'usage_of_points': energy_normalized.set_index('dt')['usage_of_points'] }) + """ + return Rd_pct def _avg_timestamp_old_Pandas(dt, dt_left): From 8a5c9351d75ce5e02274f29f3de44e8814f5c2c3 Mon Sep 17 00:00:00 2001 From: cdeline Date: Mon, 18 Aug 2025 10:26:24 -0600 Subject: [PATCH 25/36] update plotting for detailed=True, allow usage_of_points > 2 --- rdtools/plotting.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rdtools/plotting.py b/rdtools/plotting.py index 93a07bac..ecc1b69b 100644 --- a/rdtools/plotting.py +++ b/rdtools/plotting.py @@ -109,7 +109,8 @@ def degradation_summary_plots(yoy_rd, yoy_ci, yoy_info, normalized_yield, renormalized_yield = normalized_yield / yoy_info['renormalizing_factor'] if detailed: - colors = yoy_info['usage_of_points'].map({0: 'red', 1: 'green', 2: plot_color}) + colors = yoy_info['usage_of_points'].map({0: 'red', 1: 'green', 2: plot_color}, + na_action='ignore').fillna(plot_color) else: colors = plot_color ax1.scatter( From d6670b9ca7c84edc6e42a4e9851491de910945f0 Mon Sep 17 00:00:00 2001 From: cdeline Date: Mon, 18 Aug 2025 11:00:02 -0600 Subject: [PATCH 26/36] update plotting detailed=True for (even) and (odd) number of points coloring --- rdtools/plotting.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/rdtools/plotting.py b/rdtools/plotting.py index ecc1b69b..30762cae 100644 --- a/rdtools/plotting.py +++ b/rdtools/plotting.py @@ -54,8 +54,8 @@ def degradation_summary_plots(yoy_rd, yoy_ci, yoy_info, normalized_yield, Include extra information in the returned figure: * Color code points by the number of times they get used in calculating - Rd slopes. Default color: 2 times (as a start and endpoint). Green: - 1 time. Red: 0 times. + Rd slopes. Default color: even times (as a start and endpoint). Green: + odd times. Red: 0 times. * The number of year-on-year slopes contributing to the histogram. Note @@ -109,8 +109,9 @@ def degradation_summary_plots(yoy_rd, yoy_ci, yoy_info, normalized_yield, renormalized_yield = normalized_yield / yoy_info['renormalizing_factor'] if detailed: - colors = yoy_info['usage_of_points'].map({0: 'red', 1: 'green', 2: plot_color}, - na_action='ignore').fillna(plot_color) + colors = yoy_info['usage_of_points'].map({0: 'red', 1: 'green', 3: 'green', 5: 'green', + 7: 'green', 9: 'green', 11: 'green' + }, na_action='ignore').fillna(plot_color) else: colors = plot_color ax1.scatter( From 13ac2c3dd96a658b7440b087404806686e630b34 Mon Sep 17 00:00:00 2001 From: cdeline Date: Wed, 20 Aug 2025 13:58:40 -0600 Subject: [PATCH 27/36] To allow multi_yoy=True in plotting.degradation_timeseries_plot, resample.mean() the YoY_values. --- rdtools/plotting.py | 6 +++++- rdtools/test/degradation_test.py | 8 +++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/rdtools/plotting.py b/rdtools/plotting.py index 30762cae..94af0134 100644 --- a/rdtools/plotting.py +++ b/rdtools/plotting.py @@ -485,8 +485,12 @@ def _bootstrap(x, percentile, reps): plot_color = 'tab:orange' if ci_color is None: ci_color = 'C0' + try: + roller = results_values.rolling(f'{rolling_days}d', min_periods=rolling_days//2) + except ValueError: # this occurs with degradation_yoy(multi_yoy=True). resample to daily mean + roller = results_values.resample('D').mean().rolling(f'{rolling_days}d', + min_periods=rolling_days//2) - roller = results_values.rolling(f'{rolling_days}d', min_periods=rolling_days//2) # unfortunately it seems that you can't return multiple values in the rolling.apply() kernel. # TODO: figure out some workaround to return both percentiles in a single pass if include_ci: diff --git a/rdtools/test/degradation_test.py b/rdtools/test/degradation_test.py index b2f8df31..cd5ebbfe 100644 --- a/rdtools/test/degradation_test.py +++ b/rdtools/test/degradation_test.py @@ -233,23 +233,25 @@ def test_avg_timestamp_old_Pandas(self): from rdtools.degradation import _avg_timestamp_old_Pandas funcName = sys._getframe().f_code.co_name logging.debug('Running {}'.format(funcName)) - dt = pd.Series(self.get_corr_energy(0, 'D').index[-3:].tz_localize('UTC'), - index=self.get_corr_energy(0, 'D').index[-3:].tz_localize('UTC')) + dt = pd.Series(self.get_corr_energy(0, 'D').index[-4:].tz_localize('UTC'), + index=self.get_corr_energy(0, 'D').index[-4:].tz_localize('UTC')) dt_right = pd.Series(self.get_corr_energy(0, 'D').index[-3:].tz_localize('UTC') + pd.Timedelta(days=365), index=self.get_corr_energy(0, 'D').index[-3:].tz_localize('UTC')) # Expected result is the midpoint between each pair expected = pd.Series([ + pd.NaT, pd.Timestamp("2015-06-30 12:00:00"), pd.Timestamp("2015-07-01 12:00:00"), pd.Timestamp("2015-07-02 12:00:00")], - index=self.get_corr_energy(0, 'D').index[-3:], + index=self.get_corr_energy(0, 'D').index[-4:], name='averages', dtype='datetime64[ns, UTC]' ).tz_localize('UTC') result = _avg_timestamp_old_Pandas(dt, dt_right).asfreq(freq='D') print(result) print(expected) + pd.testing.assert_series_equal(result, expected) From 3c43bdb7cbd6c3a5f7150bf4e91909dbf29496ef Mon Sep 17 00:00:00 2001 From: Michael Deceglie Date: Wed, 20 Aug 2025 19:28:16 -0600 Subject: [PATCH 28/36] Update changelog --- docs/sphinx/source/changelog.rst | 1 + docs/sphinx/source/changelog/v3.0.1.rst | 11 +++++++++++ 2 files changed, 12 insertions(+) create mode 100644 docs/sphinx/source/changelog/v3.0.1.rst diff --git a/docs/sphinx/source/changelog.rst b/docs/sphinx/source/changelog.rst index fc3d805a..371f6e00 100644 --- a/docs/sphinx/source/changelog.rst +++ b/docs/sphinx/source/changelog.rst @@ -1,5 +1,6 @@ RdTools Change Log ================== +.. include:: changelog/v3.0.1.rst .. include:: changelog/v3.0.0.rst .. include:: changelog/v2.1.8.rst .. include:: changelog/v2.1.7.rst diff --git a/docs/sphinx/source/changelog/v3.0.1.rst b/docs/sphinx/source/changelog/v3.0.1.rst new file mode 100644 index 00000000..cc941121 --- /dev/null +++ b/docs/sphinx/source/changelog/v3.0.1.rst @@ -0,0 +1,11 @@ +************************* +v3.0.1 (August 20, 2025) +************************* + +Requirements +------------ +* Updated Jinja2==3.1.6 in ``notebook_requirements.txt`` (:pull:`465`) +* Updated tornado==6.5.1 in ``notebook_requirements.txt`` (:pull:`465`) +* Updated requests==2.32.4 in ``requirements.txt`` (:pull:`465`) +* Updated urllib3==2.5.0 in ``requirements.txt`` (:pull:`465`) +* Removed constraint that scipy<1.16.0 (:pull:`465`) \ No newline at end of file From 8060f317773c9a7cc86c59a3ab1a14889064f112 Mon Sep 17 00:00:00 2001 From: Michael Deceglie Date: Wed, 20 Aug 2025 20:12:08 -0600 Subject: [PATCH 29/36] Update release date --- docs/sphinx/source/changelog/v3.0.1.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/sphinx/source/changelog/v3.0.1.rst b/docs/sphinx/source/changelog/v3.0.1.rst index cc941121..9bfaaaa3 100644 --- a/docs/sphinx/source/changelog/v3.0.1.rst +++ b/docs/sphinx/source/changelog/v3.0.1.rst @@ -1,5 +1,5 @@ ************************* -v3.0.1 (August 20, 2025) +v3.0.1 (August 21, 2025) ************************* Requirements From 1646f165112042b1e6071909fe1f12544b56fb6e Mon Sep 17 00:00:00 2001 From: cdeline Date: Tue, 16 Sep 2025 16:54:35 -0600 Subject: [PATCH 30/36] nbval fixes from qnguyen345-bare_except_error --- .github/workflows/nbval.yaml | 2 +- docs/TrendAnalysis_example.ipynb | 4 ++-- docs/TrendAnalysis_example_NSRDB.ipynb | 2 +- setup.py | 4 +--- 4 files changed, 5 insertions(+), 7 deletions(-) diff --git a/.github/workflows/nbval.yaml b/.github/workflows/nbval.yaml index e014b494..84d99b25 100644 --- a/.github/workflows/nbval.yaml +++ b/.github/workflows/nbval.yaml @@ -29,7 +29,7 @@ jobs: - name: Run notebook and check output run: | # --sanitize-with: pre-process text to remove irrelevant differences (e.g. warning filepaths) - pytest --nbval docs/${{ matrix.notebook-file }} --sanitize-with docs/nbval_sanitization_rules.cfg + pytest --nbval --nbval-sanitize-with docs/nbval_sanitization_rules.cfg docs/${{ matrix.notebook-file }} - name: Run notebooks again, save files run: | pip install nbconvert[webpdf] diff --git a/docs/TrendAnalysis_example.ipynb b/docs/TrendAnalysis_example.ipynb index 45744e99..14eb7955 100644 --- a/docs/TrendAnalysis_example.ipynb +++ b/docs/TrendAnalysis_example.ipynb @@ -62160,7 +62160,7 @@ "# Visualize the results\n", "ta_new_filter.plot_degradation_summary('sensor', summary_title='Sensor-based degradation results',\n", " scatter_ymin=0.5, scatter_ymax=1.1,\n", - " hist_xmin=-30, hist_xmax=45);\n", + " hist_xmin=-30, hist_xmax=45)\n", "plt.show()" ] }, @@ -62247,7 +62247,7 @@ "# Visualize the results\n", "ta_stuck_filter.plot_degradation_summary('sensor', summary_title='Sensor-based degradation results',\n", " scatter_ymin=0.5, scatter_ymax=1.1,\n", - " hist_xmin=-30, hist_xmax=45);\n", + " hist_xmin=-30, hist_xmax=45)\n", "plt.show()" ] }, diff --git a/docs/TrendAnalysis_example_NSRDB.ipynb b/docs/TrendAnalysis_example_NSRDB.ipynb index 6c9f6b7d..fce1fa92 100644 --- a/docs/TrendAnalysis_example_NSRDB.ipynb +++ b/docs/TrendAnalysis_example_NSRDB.ipynb @@ -158,7 +158,7 @@ "ax.plot(df.index, df.soiling, 'o', alpha=0.01)\n", "#ax.set_ylim(0,1500)\n", "fig.autofmt_xdate()\n", - "ax.set_ylabel('soiling signal');\n", + "ax.set_ylabel('soiling signal')\n", "df['power'] = df['power_ac'] * df['soiling']\n", "\n", "plt.show()" diff --git a/setup.py b/setup.py index 441b16c0..f38a2bec 100755 --- a/setup.py +++ b/setup.py @@ -36,9 +36,7 @@ "pytest-cov", "coverage", "flake8", - # nbval greater than 0.9.6 has a bug with semicolon - # https://github.com/computationalmodelling/nbval/issues/194 - "nbval<=0.9.6", + "nbval", "pytest-mock", ] From 963527bc0c09f6f55b7e63bc0f5be67fb93ec25f Mon Sep 17 00:00:00 2001 From: cdeline Date: Thu, 18 Sep 2025 10:47:17 -0600 Subject: [PATCH 31/36] Add pandas 3.0 futurewarning handling --- rdtools/degradation.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/rdtools/degradation.py b/rdtools/degradation.py index 85d1ceb9..64100cd6 100644 --- a/rdtools/degradation.py +++ b/rdtools/degradation.py @@ -330,8 +330,11 @@ def degradation_year_on_year(energy_normalized, recenter=True, yoy_result.index = YoY_times[f'dt_{label}'] yoy_result.index.name = None - energy_normalized = energy_normalized.merge(usage_of_points, how='left', left_on='dt', - right_index=True, left_index=False).fillna(0.0) + with pd.option_context('future.no_silent_downcasting', True): + # the following is throwing a warning without the above context manager. + # see https://github.com/pandas-dev/pandas/issues/57734 + energy_normalized = energy_normalized.merge(usage_of_points, how='left', left_on='dt', + right_index=True, left_index=False).fillna(0.0) if uncertainty_method == 'simple': # If we need the full results calc_info = { From 74357346359d98e3dfaa46015ac41660a56cce99 Mon Sep 17 00:00:00 2001 From: cdeline Date: Thu, 18 Sep 2025 10:55:26 -0600 Subject: [PATCH 32/36] Try again to solve pandas3.0 futurewarning --- rdtools/degradation.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/rdtools/degradation.py b/rdtools/degradation.py index 64100cd6..0be8db97 100644 --- a/rdtools/degradation.py +++ b/rdtools/degradation.py @@ -330,11 +330,12 @@ def degradation_year_on_year(energy_normalized, recenter=True, yoy_result.index = YoY_times[f'dt_{label}'] yoy_result.index.name = None - with pd.option_context('future.no_silent_downcasting', True): - # the following is throwing a warning without the above context manager. - # see https://github.com/pandas-dev/pandas/issues/57734 - energy_normalized = energy_normalized.merge(usage_of_points, how='left', left_on='dt', - right_index=True, left_index=False).fillna(0.0) + # with pd.option_context('future.no_silent_downcasting', True): + # the following is throwing a warning without the above context manager. + # see https://github.com/pandas-dev/pandas/issues/57734 + energy_normalized = energy_normalized.merge(usage_of_points, how='left', left_on='dt', + right_index=True, left_index=False + ).fillna(0.0).infer_objects(copy=False) if uncertainty_method == 'simple': # If we need the full results calc_info = { From 3810fd57a95f2a6e54eeffe11cd02cd4d0e3c853 Mon Sep 17 00:00:00 2001 From: cdeline Date: Thu, 18 Sep 2025 16:21:00 -0600 Subject: [PATCH 33/36] attempt 3 to fix nbval --- docs/system_availability_example.ipynb | 2 +- rdtools/degradation.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/system_availability_example.ipynb b/docs/system_availability_example.ipynb index 7a44ee09..0b9925d9 100644 --- a/docs/system_availability_example.ipynb +++ b/docs/system_availability_example.ipynb @@ -618,7 +618,7 @@ } ], "source": [ - "aa2.plot();" + "plt.show(aa2.plot())" ] }, { diff --git a/rdtools/degradation.py b/rdtools/degradation.py index 0be8db97..ea1dcfb4 100644 --- a/rdtools/degradation.py +++ b/rdtools/degradation.py @@ -335,7 +335,7 @@ def degradation_year_on_year(energy_normalized, recenter=True, # see https://github.com/pandas-dev/pandas/issues/57734 energy_normalized = energy_normalized.merge(usage_of_points, how='left', left_on='dt', right_index=True, left_index=False - ).fillna(0.0).infer_objects(copy=False) + ).fillna(0.0).infer_objects() if uncertainty_method == 'simple': # If we need the full results calc_info = { From fecbd2ecf400104cc443c01e4b680b3ba400f96b Mon Sep 17 00:00:00 2001 From: cdeline Date: Thu, 18 Sep 2025 18:50:02 -0600 Subject: [PATCH 34/36] Add infer_objects to remove futurewarning --- rdtools/degradation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rdtools/degradation.py b/rdtools/degradation.py index ea1dcfb4..b98d07db 100644 --- a/rdtools/degradation.py +++ b/rdtools/degradation.py @@ -331,11 +331,11 @@ def degradation_year_on_year(energy_normalized, recenter=True, yoy_result.index.name = None # with pd.option_context('future.no_silent_downcasting', True): - # the following is throwing a warning without the above context manager. + # the following is throwing a futurewarning if infer_objects() isn't included here. # see https://github.com/pandas-dev/pandas/issues/57734 energy_normalized = energy_normalized.merge(usage_of_points, how='left', left_on='dt', right_index=True, left_index=False - ).fillna(0.0).infer_objects() + ).infer_objects().fillna(0.0) if uncertainty_method == 'simple': # If we need the full results calc_info = { From 67795f43d2ebbee4ec63c834e5bc812aba551b99 Mon Sep 17 00:00:00 2001 From: cdeline Date: Fri, 19 Sep 2025 14:11:58 -0600 Subject: [PATCH 35/36] update inline comments and whatsnew docs --- docs/sphinx/source/changelog/pending.rst | 4 ++-- rdtools/degradation.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/sphinx/source/changelog/pending.rst b/docs/sphinx/source/changelog/pending.rst index 815f82e5..ddca3ae2 100644 --- a/docs/sphinx/source/changelog/pending.rst +++ b/docs/sphinx/source/changelog/pending.rst @@ -5,8 +5,8 @@ v3.0.x (X, X, 2025) Enhancements ------------ * :py:func:`~rdtools.degradation.degradation_year_on_year` has new parameter ``label=`` - to return the calc_info['YoY_values'] as either right labeled (default), or center labeled. - (:issue:`459`) + to return the calc_info['YoY_values'] as either right labeled (default), left or center + labeled. (:issue:`459`) diff --git a/rdtools/degradation.py b/rdtools/degradation.py index b98d07db..92923efd 100644 --- a/rdtools/degradation.py +++ b/rdtools/degradation.py @@ -209,7 +209,7 @@ def degradation_year_on_year(energy_normalized, recenter=True, If `uncertainty_method` is 'circular_block', `block_length` determines the length of the blocks used in the circular block bootstrapping in number of days. Must be shorter than a third of the time series. - label : {'right', 'center'}, default 'right' + label : {'right', 'center', 'left'}, default 'right' Which Year-on-Year slope edge to label. Returns @@ -222,7 +222,7 @@ def degradation_year_on_year(energy_normalized, recenter=True, calc_info : dict * `YoY_values` - pandas series of year on year slopes, either right - or center labeled, depending on the `label` parameter. + left or center labeled, depending on the `label` parameter. * `renormalizing_factor` - float of value used to recenter data * `exceedance_level` - the degradation rate that was outperformed with probability of `exceedance_prob` From a0f4a1e1491eb031e2794b0251a3ffa9d5e9c49a Mon Sep 17 00:00:00 2001 From: cdeline Date: Fri, 19 Sep 2025 14:35:36 -0600 Subject: [PATCH 36/36] Clean up inline comments per Copilot review --- rdtools/degradation.py | 7 ++----- rdtools/test/degradation_test.py | 2 -- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/rdtools/degradation.py b/rdtools/degradation.py index 92923efd..b7d28435 100644 --- a/rdtools/degradation.py +++ b/rdtools/degradation.py @@ -317,20 +317,17 @@ def degradation_year_on_year(energy_normalized, recenter=True, YoY_times['dt_center'] = _avg_timestamp_old_Pandas(YoY_times['dt'], YoY_times['dt_left']) else: YoY_times['dt_center'] = pd.to_datetime(YoY_times[['dt', 'dt_left']].mean(axis=1)) - # if label == 'center': - # df = df.set_index(df.dt_center) - # df.index.name = 'dt' YoY_times = YoY_times[['dt', 'dt_center', 'dt_left']] YoY_times = YoY_times.rename(columns={'dt': 'dt_right'}) YoY_times.set_index(YoY_times[f'dt_{label}'], inplace=True) - # YoY_times = YoY_times.rename_axis(None, axis=1) YoY_times.index.name = None + + # now apply either right, left, or center label index to the yoy_result yoy_result.index = YoY_times[f'dt_{label}'] yoy_result.index.name = None - # with pd.option_context('future.no_silent_downcasting', True): # the following is throwing a futurewarning if infer_objects() isn't included here. # see https://github.com/pandas-dev/pandas/issues/57734 energy_normalized = energy_normalized.merge(usage_of_points, how='left', left_on='dt', diff --git a/rdtools/test/degradation_test.py b/rdtools/test/degradation_test.py index cd5ebbfe..ada944c4 100644 --- a/rdtools/test/degradation_test.py +++ b/rdtools/test/degradation_test.py @@ -249,8 +249,6 @@ def test_avg_timestamp_old_Pandas(self): ).tz_localize('UTC') result = _avg_timestamp_old_Pandas(dt, dt_right).asfreq(freq='D') - print(result) - print(expected) pd.testing.assert_series_equal(result, expected)