diff --git a/tests/test_processing.py b/tests/test_processing.py index 99583a9..29c4453 100644 --- a/tests/test_processing.py +++ b/tests/test_processing.py @@ -656,6 +656,8 @@ def test_pd_fill_gap(self): toy_df_15min_hole = toy_df_15min.copy() toy_df_15min_hole.loc[hole_backast, "Temp_1"] = np.nan toy_df_15min_hole.loc[hole_forecast, "Temp_1"] = np.nan + toy_df_15min_hole.iloc[:12, 0] = np.nan + toy_df_15min_hole.iloc[-12:, 0] = np.nan filler = FillGapsAR(resample_at_td="1h") res = filler.fit_transform(toy_df_15min_hole.copy()) diff --git a/tide/processing.py b/tide/processing.py index 50456ab..69c5b04 100644 --- a/tide/processing.py +++ b/tide/processing.py @@ -1155,11 +1155,18 @@ def _fit_and_fill_x(self, X, biggest_group, col, idx, backcast): if self.resample_at_td is not None: x_fit = X.loc[biggest_group, col].resample(self.resample_at_td).mean() idx_origin = idx - idx = pd.date_range(idx[0], idx[-1], freq=self.resample_at_td) - if not backcast and x_fit.index[-1] == idx[0]: - x_fit = x_fit[:-1] - elif x_fit.index[0] == idx[-1]: - x_fit = x_fit[1:] + if backcast: + idx = pd.date_range( + idx[0], + x_fit.index[0] - pd.Timedelta(self.resample_at_td), + freq=self.resample_at_td, + ) + else: + idx = pd.date_range( + x_fit.index[-1] + pd.Timedelta(self.resample_at_td), + idx[-1], + freq=self.resample_at_td, + ) else: x_fit = X.loc[biggest_group, col] idx_origin = None @@ -1169,7 +1176,15 @@ def _fit_and_fill_x(self, X, biggest_group, col, idx, backcast): to_predict.name = col X.loc[idx, col] = bc_model.predict(to_predict).to_numpy().flatten() if self.resample_at_td is not None: - X.loc[idx_origin, col] = X.loc[idx_origin, col].interpolate() + beg = idx_origin[0] - idx_origin.freq + end = idx_origin[-1] + idx_origin.freq + # Interpolate linearly between inferred values and using neighbor data + X.loc[idx_origin, col] = X.loc[beg:end, col].interpolate() + # If gap is at boundaries + if beg < X.index[0]: + X.loc[idx_origin, col] = X.loc[idx_origin, col].bfill() + if end > X.index[-1]: + X.loc[idx_origin, col] = X.loc[idx_origin, col].ffill() def _fit_implementation(self, X: pd.Series | pd.DataFrame, y=None): self.model_ = MODEL_MAP[self.model_name]