Skip to content

Commit ce90d42

Browse files
authored
Merge pull request #7 from BuildingEnergySimulationTools/resample_ar_predicors
Resample ar predicors
2 parents 9260f2b + e9ac7f1 commit ce90d42

File tree

3 files changed

+66
-4
lines changed

3 files changed

+66
-4
lines changed

tests/test_processing.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -644,12 +644,37 @@ def test_pd_fill_gap(self):
644644
toy_df_gaps.loc[gap[0], gap[1]] = np.nan
645645

646646
filler = FillGapsAR()
647-
res = filler.fit_transform(toy_df_gaps)
647+
res = filler.fit_transform(toy_df_gaps.copy())
648648

649649
for gap in holes_pairs[1:]:
650650
# Skip the first one. r2_score doesn't work for only value
651651
assert r2_score(toy_df.loc[gap[0], gap[1]], res.loc[gap[0], gap[1]]) > 0.99
652652

653+
toy_df_15min = toy_df.resample("15min").mean().interpolate()
654+
hole_backast = pd.date_range("2009-06-05", "2009-06-06 01:15:00", freq="15min")
655+
hole_forecast = pd.date_range("2009-08-05", "2009-08-06 01:45:00", freq="15min")
656+
toy_df_15min_hole = toy_df_15min.copy()
657+
toy_df_15min_hole.loc[hole_backast, "Temp_1"] = np.nan
658+
toy_df_15min_hole.loc[hole_forecast, "Temp_1"] = np.nan
659+
660+
filler = FillGapsAR(resample_at_td="1h")
661+
res = filler.fit_transform(toy_df_15min_hole.copy())
662+
663+
assert (
664+
r2_score(
665+
res.loc[hole_backast, "Temp_1"],
666+
toy_df_15min.loc[hole_backast, "Temp_1"],
667+
)
668+
> 0.95
669+
)
670+
assert (
671+
r2_score(
672+
res.loc[hole_forecast, "Temp_1"],
673+
toy_df_15min.loc[hole_forecast, "Temp_1"],
674+
)
675+
> 0.95
676+
)
677+
653678
def test_combiner(self):
654679
test_df = pd.DataFrame(
655680
{

tide/base.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,8 +157,6 @@ def __init__(
157157

158158
def _pre_fit(self, X: pd.Series | pd.DataFrame):
159159
self.stl_kwargs = {} if self.stl_kwargs is None else self.stl_kwargs
160-
161-
X = check_and_return_dt_index_df(X)
162160
check_array(X)
163161

164162
self.stl_kwargs["period"] = timedelta_to_int(self.period, X)

tide/processing.py

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1087,6 +1087,12 @@ class FillGapsAR(BaseFiller, BaseProcessing):
10871087
thresholds.
10881088
2- The biggest group of valid data is identified and is used to fit the model.
10891089
3- The neighboring gaps are filled using backcasting or forecasting.
1090+
4- OPTIONAL When the data's timestep is too short compared to the periodic behavior
1091+
(e.g., 5-min data for a 24h pattern):
1092+
- Resample data to a larger timestep
1093+
- Perform predictions at the resampled timestep
1094+
- Use linear interpolation to restore original data resolution
1095+
10901096
10911097
The process is repeated at step 2 until there are no more gaps to fill
10921098
@@ -1101,6 +1107,8 @@ class FillGapsAR(BaseFiller, BaseProcessing):
11011107
The lower threshold for the size of gaps to be considered, by default None.
11021108
upper_gap_threshold : str or datetime.datetime, optional
11031109
The upper threshold for the size of gaps to be considered, by default None.
1110+
resample_at_td: str or time delta, optinal
1111+
The time delta to resample fitting data before prediction
11041112
11051113
Attributes
11061114
----------
@@ -1118,19 +1126,50 @@ def __init__(
11181126
model_kwargs: dict = {},
11191127
gaps_lte: str | dt.datetime | pd.Timestamp = None,
11201128
gaps_gte: str | dt.datetime | pd.Timestamp = None,
1129+
resample_at_td: str | dt.timedelta | pd.Timedelta = None,
11211130
):
11221131
BaseFiller.__init__(self, gaps_lte, gaps_gte)
11231132
BaseProcessing.__init__(self)
11241133
self.model_name = model_name
11251134
self.model_kwargs = model_kwargs
1135+
self.resample_at_td = resample_at_td
1136+
gaps_lte = pd.Timedelta(gaps_lte) if isinstance(gaps_lte, str) else gaps_lte
1137+
resample_at_td = (
1138+
pd.Timedelta(resample_at_td)
1139+
if isinstance(resample_at_td, str)
1140+
else resample_at_td
1141+
)
1142+
if (
1143+
resample_at_td is not None
1144+
and gaps_lte is not None
1145+
and gaps_lte < resample_at_td
1146+
):
1147+
raise ValueError(
1148+
f"Cannot predict data for gaps LTE to {gaps_lte} with data"
1149+
f"at a {resample_at_td} timestep"
1150+
)
11261151

11271152
def _fit_and_fill_x(self, X, biggest_group, col, idx, backcast):
11281153
check_is_fitted(self, attributes=["model_"])
11291154
bc_model = self.model_(backcast=backcast, **self.model_kwargs)
1130-
bc_model.fit(X.loc[biggest_group, col])
1155+
if self.resample_at_td is not None:
1156+
x_fit = X.loc[biggest_group, col].resample(self.resample_at_td).mean()
1157+
idx_origin = idx
1158+
idx = pd.date_range(idx[0], idx[-1], freq=self.resample_at_td)
1159+
if not backcast and x_fit.index[-1] == idx[0]:
1160+
x_fit = x_fit[:-1]
1161+
elif x_fit.index[0] == idx[-1]:
1162+
x_fit = x_fit[1:]
1163+
else:
1164+
x_fit = X.loc[biggest_group, col]
1165+
idx_origin = None
1166+
1167+
bc_model.fit(x_fit)
11311168
to_predict = idx.to_series()
11321169
to_predict.name = col
11331170
X.loc[idx, col] = bc_model.predict(to_predict).to_numpy().flatten()
1171+
if self.resample_at_td is not None:
1172+
X.loc[idx_origin, col] = X.loc[idx_origin, col].interpolate()
11341173

11351174
def _fit_implementation(self, X: pd.Series | pd.DataFrame, y=None):
11361175
self.model_ = MODEL_MAP[self.model_name]

0 commit comments

Comments
 (0)