@@ -1087,6 +1087,12 @@ class FillGapsAR(BaseFiller, BaseProcessing):
10871087 thresholds.
10881088 2- The biggest group of valid data is identified and is used to fit the model.
10891089 3- The neighboring gaps are filled using backcasting or forecasting.
1090+ 4- OPTIONAL When the data's timestep is too short compared to the periodic behavior
1091+ (e.g., 5-min data for a 24h pattern):
1092+ - Resample data to a larger timestep
1093+ - Perform predictions at the resampled timestep
1094+ - Use linear interpolation to restore original data resolution
1095+
10901096
10911097 The process is repeated at step 2 until there are no more gaps to fill
10921098
@@ -1101,6 +1107,8 @@ class FillGapsAR(BaseFiller, BaseProcessing):
11011107 The lower threshold for the size of gaps to be considered, by default None.
11021108 upper_gap_threshold : str or datetime.datetime, optional
11031109 The upper threshold for the size of gaps to be considered, by default None.
1110+ resample_at_td: str or time delta, optinal
1111+ The time delta to resample fitting data before prediction
11041112
11051113 Attributes
11061114 ----------
@@ -1118,19 +1126,50 @@ def __init__(
11181126 model_kwargs : dict = {},
11191127 gaps_lte : str | dt .datetime | pd .Timestamp = None ,
11201128 gaps_gte : str | dt .datetime | pd .Timestamp = None ,
1129+ resample_at_td : str | dt .timedelta | pd .Timedelta = None ,
11211130 ):
11221131 BaseFiller .__init__ (self , gaps_lte , gaps_gte )
11231132 BaseProcessing .__init__ (self )
11241133 self .model_name = model_name
11251134 self .model_kwargs = model_kwargs
1135+ self .resample_at_td = resample_at_td
1136+ gaps_lte = pd .Timedelta (gaps_lte ) if isinstance (gaps_lte , str ) else gaps_lte
1137+ resample_at_td = (
1138+ pd .Timedelta (resample_at_td )
1139+ if isinstance (resample_at_td , str )
1140+ else resample_at_td
1141+ )
1142+ if (
1143+ resample_at_td is not None
1144+ and gaps_lte is not None
1145+ and gaps_lte < resample_at_td
1146+ ):
1147+ raise ValueError (
1148+ f"Cannot predict data for gaps LTE to { gaps_lte } with data"
1149+ f"at a { resample_at_td } timestep"
1150+ )
11261151
11271152 def _fit_and_fill_x (self , X , biggest_group , col , idx , backcast ):
11281153 check_is_fitted (self , attributes = ["model_" ])
11291154 bc_model = self .model_ (backcast = backcast , ** self .model_kwargs )
1130- bc_model .fit (X .loc [biggest_group , col ])
1155+ if self .resample_at_td is not None :
1156+ x_fit = X .loc [biggest_group , col ].resample (self .resample_at_td ).mean ()
1157+ idx_origin = idx
1158+ idx = pd .date_range (idx [0 ], idx [- 1 ], freq = self .resample_at_td )
1159+ if not backcast and x_fit .index [- 1 ] == idx [0 ]:
1160+ x_fit = x_fit [:- 1 ]
1161+ elif x_fit .index [0 ] == idx [- 1 ]:
1162+ x_fit = x_fit [1 :]
1163+ else :
1164+ x_fit = X .loc [biggest_group , col ]
1165+ idx_origin = None
1166+
1167+ bc_model .fit (x_fit )
11311168 to_predict = idx .to_series ()
11321169 to_predict .name = col
11331170 X .loc [idx , col ] = bc_model .predict (to_predict ).to_numpy ().flatten ()
1171+ if self .resample_at_td is not None :
1172+ X .loc [idx_origin , col ] = X .loc [idx_origin , col ].interpolate ()
11341173
11351174 def _fit_implementation (self , X : pd .Series | pd .DataFrame , y = None ):
11361175 self .model_ = MODEL_MAP [self .model_name ]
0 commit comments