From 7a38ab75eaa272ecdd8943b8b6cdcfe5f2ccc885 Mon Sep 17 00:00:00 2001
From: xiaolongsun <95378566+xiaolong0728@users.noreply.github.com>
Date: Mon, 17 Mar 2025 11:33:26 +0100
Subject: [PATCH 01/24] support standardizing a list of predictions

---
 .../manager/stepshifter_manager.py            | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/views_stepshifter/manager/stepshifter_manager.py b/views_stepshifter/manager/stepshifter_manager.py
index 260f59a..27f7548 100644
--- a/views_stepshifter/manager/stepshifter_manager.py
+++ b/views_stepshifter/manager/stepshifter_manager.py
@@ -36,9 +36,15 @@ def _get_standardized_df(df: pd.DataFrame) -> pd.DataFrame:
             The standardized DataFrame
         """
 
-        # post-process: replace inf and -inf with 0
-        df = df.replace([np.inf, -np.inf], 0)
-        df = df.mask(df < 0, 0)
+        def standardize_value(value):
+            # 1) Replace inf and -inf with 0; 
+            # 2) Replace negative values with 0
+            if isinstance(value, list):
+                return [0 if (v == np.inf or v == -np.inf or v < 0) else v for v in value]
+            else:
+                return 0 if (value == np.inf or value == -np.inf or value < 0) else value
+
+        df = df.applymap(standardize_value)
         return df
 
     def _split_hurdle_parameters(self):
@@ -157,10 +163,9 @@ def _evaluate_model_artifact(
             raise
         
         df_predictions = stepshift_model.predict(run_type, eval_type)
-        if not self._is_shurf:
-            df_predictions = [
-                StepshifterManager._get_standardized_df(df) for df in df_predictions
-            ]
+        df_predictions = [
+            StepshifterManager._get_standardized_df(df) for df in df_predictions
+        ]
         return df_predictions
 
     def _forecast_model_artifact(self, artifact_name: str) -> pd.DataFrame:

From 5d0b72279a9426e61ab230a5c2d5ffbeec8e4da5 Mon Sep 17 00:00:00 2001
From: xiaolongsun <95378566+xiaolong0728@users.noreply.github.com>
Date: Mon, 17 Mar 2025 11:33:34 +0100
Subject: [PATCH 02/24] add test

---
 tests/test_stepshifter_manager.py | 36 +++++++++++++++----------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/tests/test_stepshifter_manager.py b/tests/test_stepshifter_manager.py
index 4cd430c..fb56a2d 100644
--- a/tests/test_stepshifter_manager.py
+++ b/tests/test_stepshifter_manager.py
@@ -118,18 +118,30 @@ def test_get_standardized_df():
     """
     Test the _get_standardized_df method to ensure it correctly standardizes the DataFrame.
     """
-    df = pd.DataFrame({
+    df1 = pd.DataFrame({
         "a": [1.0, -1.0, np.inf, -np.inf, 3.0],
         "b": [4.0, 5.0, -6.0, 7.0, -8.0]
     })
-    expected_df = pd.DataFrame({
+    expected_df1 = pd.DataFrame({
         "a": [1.0, 0.0, 0.0, 0.0, 3.0],
         "b": [4.0, 5.0, 0.0, 7.0, 0.0]
     })
-    result_df = StepshifterManager._get_standardized_df(df)
-    pd.testing.assert_frame_equal(result_df, expected_df)
-
-
+    df2 = pd.DataFrame({
+        "a": [[1.0, -1.0, np.inf],
+               [-np.inf, 3.0, 4.0]],
+        "b": [[4.0, 5.0, -6.0],
+              [7.0, -8.0, 9.0]],
+    })
+    expected_df2 = pd.DataFrame({
+        "a": [[1.0, 0.0, 0.0],
+               [0.0, 3.0, 4.0]],
+        "b": [[4.0, 5.0, 0.0],
+              [7.0, 0.0, 9.0]],
+    })
+    result_df1 = StepshifterManager._get_standardized_df(df1)
+    result_df2 = StepshifterManager._get_standardized_df(df2)
+    pd.testing.assert_frame_equal(result_df1, expected_df1)
+    pd.testing.assert_frame_equal(result_df2, expected_df2)
 
 def test_split_hurdle_parameters(stepshifter_manager_hurdle):
     """
@@ -164,9 +176,6 @@ def test_get_model(stepshifter_manager, stepshifter_manager_hurdle, mock_partiti
         stepshifter_manager._get_model(mock_partitioner_dict)
         mock_stepshifter_model.assert_called_once_with(stepshifter_manager.config, mock_partitioner_dict)
         mock_hurdle_model.assert_not_called()
-    
-
-
 
 def test_train_model_artifact(stepshifter_manager, stepshifter_manager_hurdle):
     """
@@ -194,8 +203,6 @@ def test_train_model_artifact(stepshifter_manager, stepshifter_manager_hurdle):
 
         mock_split_hurdle.assert_called_once()
 
-
-
 def test_evaluate_model_artifact(stepshifter_manager):
     """
     Test the _evaluate_model_artifact method to ensure it correctly evaluates the model artifact.
@@ -233,8 +240,6 @@ def test_evaluate_model_artifact(stepshifter_manager):
         path_artifact = stepshifter_manager._model_path.artifacts / artifact_name
         assert path_artifact == Path("predictions_test_run_202401011200000/non_default_artifact.pkl")
 
-
-
 def test_forecast_model_artifact(stepshifter_manager):
     """
     Test the _forecast_model_artifact method to ensure it correctly forecasts the model artifact.
@@ -278,7 +283,6 @@ def test_forecast_model_artifact(stepshifter_manager):
         assert path_artifact == Path("predictions_test_run_202401011200000/non_default_artifact.pkl")
         mock_logger.exception.assert_called_once_with(f"Model artifact not found at {path_artifact}")
 
-
 def test_evaluate_sweep(stepshifter_manager):
     """
     Test the _evaluate_sweep method.
@@ -297,7 +301,3 @@ def test_evaluate_sweep(stepshifter_manager):
         # mock_read_dataframe.assert_called_once()
         mock_model.predict.assert_called_once_with("test_run_type", eval_type)
         mock_get_standardized_df.assert_called_once()
-
-
-
-

From 00c4562f4e116d8c4fd5f4316c92ccae5d0d1cce Mon Sep 17 00:00:00 2001
From: Dylan <52908667+smellycloud@users.noreply.github.com>
Date: Wed, 19 Mar 2025 14:11:11 +0100
Subject: [PATCH 03/24] add shurf_model.py

---
 .../manager/stepshifter_manager.py            |   6 +-
 views_stepshifter/models/shurf_model.py       | 283 ++++++++++++++++++
 2 files changed, 286 insertions(+), 3 deletions(-)
 create mode 100644 views_stepshifter/models/shurf_model.py

diff --git a/views_stepshifter/manager/stepshifter_manager.py b/views_stepshifter/manager/stepshifter_manager.py
index 27f7548..310bd56 100644
--- a/views_stepshifter/manager/stepshifter_manager.py
+++ b/views_stepshifter/manager/stepshifter_manager.py
@@ -8,7 +8,7 @@
 import pandas as pd
 import numpy as np
 from typing import Union, Optional, List, Dict
-# from views_stepshifter.models.shurf import StepShiftedHurdleUncertainRF
+from views_stepshifter.models.shurf_model import StepShiftedHurdleUncertainRF
 
 logger = logging.getLogger(__name__)
 
@@ -84,8 +84,8 @@ def _get_model(self, partitioner_dict: dict):
         """
         if self._is_hurdle:
             model = HurdleModel(self.config, partitioner_dict)
-        # elif self._is_shurf:
-        #     model = StepShiftedHurdleUncertainRF(self.config, partitioner_dict)
+        elif self._is_shurf:
+            model = StepShiftedHurdleUncertainRF(self.config, partitioner_dict)
         else:
             self.config["model_reg"] = self.config["algorithm"]
             model = StepshifterModel(self.config, partitioner_dict)
diff --git a/views_stepshifter/models/shurf_model.py b/views_stepshifter/models/shurf_model.py
new file mode 100644
index 0000000..94e1dd4
--- /dev/null
+++ b/views_stepshifter/models/shurf_model.py
@@ -0,0 +1,283 @@
+from views_pipeline_core.managers.model import ModelManager
+from views_stepshifter.models.stepshifter import StepshifterModel
+from views_stepshifter.models.hurdle_model import HurdleModel
+from views_stepshifter.models.validation import views_validate
+from sklearn.utils.validation import check_is_fitted
+import pandas as pd
+from typing import List, Dict
+import numpy as np
+import logging
+# from darts.models import RandomForest
+from tqdm import tqdm
+
+logger = logging.getLogger(__name__)
+
+class StepShiftedHurdleUncertainRF(HurdleModel):
+    """
+    Hurdle model for time series forecasting. The model consists of two stages:
+    1. Binary stage: Predicts whether the target variable is 0 or > 0.
+    2. Positive stage: Predicts the value of the target variable when it is > 0.
+
+    Note:
+    This algorithm uses a two-step approach. 
+
+    **Step 1: Classification Stage**  
+    In the first step, a regression model is used with a binary target (0 or 1), 
+    indicating the absence or presence of violence. This stage functions similarly 
+    to a linear probability model, estimating the likelihood of a positive outcome. 
+    Since the model is a regression rather than a classification model, 
+    these estimates are not strictly bounded between 0 and 1, 
+    but this is acceptable for the purpose of this step.
+
+    To determine whether an observation is classified as "positive," we apply a threshold. 
+    The default threshold is 1, meaning that predictions above this value 
+    are considered positive outcomes. This threshold can be adjusted as 
+    a tunable hyperparameter to better suit specific requirements.
+
+    **Step 2: Regression Stage**  
+    In the second step, we use a regression model to predict a continuous or count value 
+    (e.g., the expected number of conflict fatalities) for the selected time series. 
+    We include the entire time series for countries or PRIO grids where the 
+    classification stage yielded at least one "positive" prediction, 
+    rather than limiting the regression to just the predicted positive values.
+    """
+    
+    def __init__(self, config: Dict, partitioner_dict: Dict[str, List[int]], threshold: float = 0.1):
+        super().__init__(config, partitioner_dict, threshold)
+        print(config)
+#        self._clf = RandomForest
+#        self._reg = RandomForest
+        self._clf_params = self._get_parameters(config)['clf']
+        self._reg_params = self._get_parameters(config)['reg']
+        self._threshold = threshold
+
+        self._submodel_list = []
+
+        self._partitioner_dict = partitioner_dict
+        self._submodels_to_train = config['submodels_to_train']
+        # self._n_estimators = config['parameters']['n_estimators']
+        self.log_target = config['log_target']
+        self._max_features = config['max_features']
+        self._max_depth = config['max_depth']
+        self._max_samples = config['max_samples']
+        self._pred_samples = config['pred_samples']
+        self._draw_dist = config['draw_dist']
+        self._draw_sigma = config['draw_sigma']
+        self._geo_unit_samples = config['geo_unit_samples']
+        self._n_jobs = config['n_jobs']
+
+    @views_validate
+    def fit(self, df: pd.DataFrame):
+        """
+        Generate predictions using the trained submodels.
+        This method performs the following steps:
+        1. Prepares the data for classification and regression stages.
+        2. Iterates over each submodel to generate predictions:
+            - Predicts probabilities using the classification model.
+            - Predicts target values using the regression model.
+            - Handles infinite values in predictions.
+        3. Draws samples from the distributions:
+            - For each prediction sample, combines classification and regression predictions.
+            - Applies binomial, Poisson, or lognormal distributions to generate final predictions.
+        4. Aggregates the predictions from all submodels into a final DataFrame.
+        Returns:
+            pd.DataFrame: A DataFrame containing the final set of predictions with indices set to 'draw'.
+        """
+        df = self._process_data(df)
+        self._prepare_time_series(df)
+
+        target_binary = [s.map(lambda x: (x > self._threshold).astype(float)) for s in self._target_train]
+
+        # Positive outcome (for cases where target > threshold)
+        target_pos, past_cov_pos = zip(*[(t, p) for t, p in zip(self._target_train, self._past_cov)
+                                         if (t.values() > self._threshold).any()])
+
+        for i in tqdm(range(self._submodels_to_train), desc="Training submodel"):
+            # logger.info(f"Training submodel {i+1}/{self._submodels_to_train}")
+            
+            for step in tqdm(self._steps, desc=f"Steps for submodel {i+1}"):
+                # logger.info(f"Training step {step}")
+                # Fit binary-like stage using a regression model, but the target is binary (0 or 1)
+                binary_model = self._clf(lags_past_covariates=[-step], **self._clf_params)
+                binary_model.fit(target_binary, past_covariates=self._past_cov)
+
+                # Fit positive stage using the regression model
+                positive_model = self._reg(lags_past_covariates=[-step], **self._reg_params)
+                positive_model.fit(target_pos, past_covariates=past_cov_pos)
+                self._models[step] = (binary_model, positive_model)
+            self._submodel_list.append(self._models)
+            logger.info(f"Submodel {i+1}/{self._submodels_to_train} trained successfully")
+        self.is_fitted_ = True
+    
+        
+    def predict_sequence(self,run_type, eval_type, sequence_number) -> pd.DataFrame:
+        """
+        Predicts n draws of outcomes based on the provided DataFrame .
+
+        Parameters:
+        -----------
+        self: StepShiftedHurdleUncertainRF
+            The model object.
+            
+        run_type : str
+            The type of run to perform. Currently it is unlikely to affect the behaviour of the function.
+            
+        eval_type : str 
+            The type of evaluation to perform. Currently it is unlikely to affect the behaviour of the function.
+            
+        sequence_number : int
+            The sequence number to predict outcomes for.
+             
+            
+        Returns:
+        --------
+        pd.DataFrame
+            The final predictions as a DataFrame.
+        """
+        
+        sample_number = 0
+        final_preds = [] # This will hold predictions for all sub-models and all samples within sub-models
+        # Loop over submodels
+        submodel_number = 0
+        for submodel in tqdm(self._submodel_list, desc=f"Predicting submodel: {run_type}", leave=True):
+#            print(submodel)
+            pred_by_step_binary = [self._predict_by_step(submodel[step][0], step, sequence_number) 
+                                for step in self._steps]
+            pred_by_step_positive = [self._predict_by_step(submodel[step][1], step, sequence_number) 
+                                    for step in self._steps]
+            
+            pred_concat_binary = pd.concat(pred_by_step_binary, axis=0)
+            
+            pred_concat_binary.rename(columns={'step_combined':'Classification'}, inplace=True)
+            pred_concat_positive = pd.concat(pred_by_step_positive, axis=0)
+            pred_concat_positive.rename(columns={'step_combined':'Regression'}, inplace=True)
+            pred_concat = pd.concat([pred_concat_binary, pred_concat_positive], axis=1)
+            pred_concat['submodel'] = submodel_number
+#            print(pred_concat.tail(12))
+                
+            # Append the combined predictions to the final predictions list
+            final_preds.append(pred_concat)
+            submodel_number += 1
+#                    submodel_preds[i] = final_preds
+        # Generate a DataFrame from the final predictions list for this sequence number
+        final_preds_aslists = pd.concat(final_preds, axis=0)  
+        # Drawing samples from the classification model
+        # Ensuring that the classification probabilities are between 0 and 1:
+        final_preds_aslists['Classification'] = final_preds_aslists['Classification'].apply(lambda x: np.clip(x, 0, 1))
+        final_preds_aslists['ClassificationSample'] = final_preds_aslists['Classification'].apply(lambda x: np.random.binomial(1, x, self._pred_samples))
+        
+        # Drawing samples from the regression model
+
+        if self.log_target == True:
+            if self._draw_dist == 'Poisson': # Note: the Poisson distribution assumes a non-log-transformed target, so not defined here
+                print('Poisson not implemented')
+                final_preds_aslists['RegressionSample'] = final_preds_aslists['Regression']
+            if self._draw_dist == 'Lognormal':
+                # Draw from normal distribution for log-transformed outcomes, then exponentiate, then round to integer
+                #pred_concat['RegressionSample'] = pred_concat['Regression'].apply(lambda x: np.random.normal(x, self._draw_sigma, self._pred_samples))
+                final_preds_aslists['RegressionSample'] = final_preds_aslists['Regression'].apply(lambda x: np.abs(np.rint(np.expm1(np.random.normal(x, self._draw_sigma, self._pred_samples)))))
+        if self.log_target == False:
+            if self._draw_dist == 'Poisson': # Note: this assumes a non-log-transformed target
+                print('Poisson not implemented')
+                final_preds_aslists['RegressionSample'] = final_preds_aslists['Regression']
+            if self._draw_dist == 'Lognormal':
+                print('Draws for non-log-transformed target: first implementation' )
+                final_preds_aslists['RegressionSample'] = final_preds_aslists['Regression'].apply(lambda x: np.abs(np.rint(np.expm1(np.random.normal(np.log1p(x), self._draw_sigma, self._pred_samples)))))
+            
+        if self._draw_dist == '':
+            final_preds_aslists['RegressionSample'] = final_preds_aslists['Regression']
+        print('final_preds_aslists contains the samples in list form. Shape:', final_preds_aslists.shape, '. Looks like this:')
+        print(final_preds_aslists.tail(20))
+        # 'Explode' the samples to get one row per sample
+        final_preds_full = final_preds_aslists.explode(['ClassificationSample','RegressionSample'])
+        final_preds_full['Prediction'] = final_preds_full['ClassificationSample'] * final_preds_full['RegressionSample']
+        # Ensuring that the final predictions are positive:
+        final_preds_full['Prediction'] = final_preds_full['Prediction'].apply(lambda x: np.clip(x, 0, None))
+        # Column for the main prediction:
+        pred_col_name = 'pred_' + self.depvar 
+        final_preds_full[pred_col_name] = final_preds_full['Prediction']
+        # Log-transforming the final predictions if the target is log-transformed, exponentiating if not, and adding a column with the log-transformed predictions
+        if self.log_target == True:
+            final_preds_full['LogPrediction'] = final_preds_full['Prediction']
+            final_preds_full['Prediction'] = np.expm1(final_preds_full['Prediction'])
+        if self.log_target == False:
+            final_preds_full['LogPrediction'] = np.log1p(final_preds_full['Prediction'])
+        final_preds_full.drop(columns=['Classification','Regression','ClassificationSample','RegressionSample','submodel','Prediction','LogPrediction'],inplace=True)
+        final_preds = pd.DataFrame(final_preds_full.groupby(['month_id', 'country_id'])[pred_col_name].apply(list))
+        print('final_preds is the end product of the predict sequence function. Shape:', final_preds_full.shape)
+        print(final_preds.tail(20))
+        return final_preds
+
+    @views_validate
+    def predict(self, df: pd.DataFrame, run_type: str, eval_type: str = "standard") -> pd.DataFrame:
+        """
+        Predicts outcomes based on the provided DataFrame and run type.
+
+        Parameters:
+        -----------
+        df : pd.DataFrame
+            The input data for making predictions.
+        run_type : str
+            The type of run to perform. If 'forecasting', a single prediction is made.
+            Otherwise, multiple predictions are made based on the evaluation sequence number.
+        eval_type : str, optional
+            The type of evaluation to perform. Default is "standard".
+
+        Returns:
+        --------
+        pd.DataFrame
+            The final predictions as a DataFrame.
+        """
+        
+        # Process the input data to ensure it is in the correct format
+        df = self._process_data(df)
+        # Check if the model has been fitted before making predictions
+        check_is_fitted(self, 'is_fitted_')
+        print('Dependent variable:', self.depvar, 'Parameters:', 'Log target:', self.log_target, ' submodels:', self._submodels_to_train, ', samples within submodels: ', self._pred_samples, ', draw distribution: ', self._draw_dist, ', sigma: ', self._draw_sigma)
+
+        # If the run type is not 'forecasting', perform multiple predictions
+        if run_type != 'forecasting':
+            preds = []  # D: List to collect predictions for each sequence
+            # If the evaluation type is "standard", iterate over the evaluation sequence number
+            submodel_preds = {} # Not sure this belongs here
+            if eval_type == "standard":
+                # Loop over the evaluation sequence number
+                for sequence_number in tqdm(range(ModelManager._resolve_evaluation_sequence_number(eval_type)), desc=f"Sequence", leave=True):
+#                    print('sequence_number', sequence_number)
+                    temp_preds_full = self.predict_sequence(run_type, eval_type, sequence_number)   
+                        
+                    # Output the temporary final predictions with samples as parquet  
+                    temp_preds_full.to_parquet(f'data/generated/final_pred_full_{run_type}_{eval_type}_{sequence_number}.parquet')
+                    # Convert to views_pipeline standard format
+                    
+                    # Aggregate the predictions into point predictions
+#                    final_preds.pop('LogPrediction')
+#                    agg_preds = np.log1p(temp_preds_full.groupby(['month_id', 'country_id']).mean())
+#                    final_preds.rename(columns={'Prediction':'pred_ged_sb'}, inplace=True)   
+#                    agg_preds.pop('submodel')
+                    preds.append(temp_preds_full) # D: Append the final predictions for this sequence number
+                    # Output the final predictions as parquet
+#                    agg_preds.to_parquet(f'data/generated/final_preds_{run_type}_{eval_type}_{sequence_number}_agg.parquet')
+                return preds
+        else:
+            # If the run type is 'forecasting', perform a single prediction
+            sequence_number = 0
+            temp_preds_full = self.predict_sequence(run_type, eval_type, sequence_number)         
+            print('temp_preds_full')   
+            
+#            print('final_preds_aslists')
+#            print(final_preds_aslists.describe())
+                
+            # Output the final predictions with samples as parquet  
+            temp_preds_full.to_parquet(f'data/generated/final_pred_full_{run_type}_{eval_type}_{sequence_number}.parquet')
+            # Aggregate the predictions into point predictions
+#            agg_preds = temp_preds_full.groupby(['month_id', 'country_id']).mean()
+#            agg_preds.pop('submodel')
+            # Output the final predictions as parquet
+#            agg_preds['ged_sb_dep'] = agg_preds['Prediction']
+#            agg_preds.to_parquet(f'data/generated/final_preds_{run_type}_{eval_type}_{sequence_number}_agg.parquet')
+
+            # Return the final predictions as a DataFrame
+            print('Returning final predictions:')
+            print(temp_preds_full.tail(20))
+            return temp_preds_full
\ No newline at end of file

From f395b4b91f7b3b61a257ea02110ead5229c25db8 Mon Sep 17 00:00:00 2001
From: xiaolongsun <95378566+xiaolong0728@users.noreply.github.com>
Date: Wed, 19 Mar 2025 14:19:38 +0100
Subject: [PATCH 04/24] remove commented codes

---
 views_stepshifter/models/hurdle_model.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/views_stepshifter/models/hurdle_model.py b/views_stepshifter/models/hurdle_model.py
index 639adf1..c982a58 100644
--- a/views_stepshifter/models/hurdle_model.py
+++ b/views_stepshifter/models/hurdle_model.py
@@ -7,8 +7,6 @@
 import logging
 import tqdm
 from concurrent.futures import ProcessPoolExecutor, as_completed
-# import multiprocessing
-# multiprocessing.set_start_method('spawn')
 from functools import partial
 logger = logging.getLogger(__name__)
 

From 96a163ccab8cb9c0ec5d811c408b54a6d6ff83c8 Mon Sep 17 00:00:00 2001
From: xiaolongsun <95378566+xiaolong0728@users.noreply.github.com>
Date: Wed, 19 Mar 2025 14:20:01 +0100
Subject: [PATCH 05/24] adapt to latest class

---
 views_stepshifter/models/shurf_model.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/views_stepshifter/models/shurf_model.py b/views_stepshifter/models/shurf_model.py
index 94e1dd4..4a2c7b5 100644
--- a/views_stepshifter/models/shurf_model.py
+++ b/views_stepshifter/models/shurf_model.py
@@ -42,14 +42,10 @@ class StepShiftedHurdleUncertainRF(HurdleModel):
     rather than limiting the regression to just the predicted positive values.
     """
     
-    def __init__(self, config: Dict, partitioner_dict: Dict[str, List[int]], threshold: float = 0.1):
-        super().__init__(config, partitioner_dict, threshold)
-        print(config)
-#        self._clf = RandomForest
-#        self._reg = RandomForest
+    def __init__(self, config: Dict, partitioner_dict: Dict[str, List[int]]):
+        super().__init__(config, partitioner_dict)
         self._clf_params = self._get_parameters(config)['clf']
         self._reg_params = self._get_parameters(config)['reg']
-        self._threshold = threshold
 
         self._submodel_list = []
 
@@ -85,12 +81,16 @@ def fit(self, df: pd.DataFrame):
         """
         df = self._process_data(df)
         self._prepare_time_series(df)
+        self._clf = self._resolve_clf_model(self._config["model_clf"])
+        self._reg = self._resolve_reg_model(self._config["model_reg"])
 
-        target_binary = [s.map(lambda x: (x > self._threshold).astype(float)) for s in self._target_train]
+        target_binary = [
+            s.map(lambda x: (x > 0).astype(float)) for s in self._target_train
+        ]
 
         # Positive outcome (for cases where target > threshold)
         target_pos, past_cov_pos = zip(*[(t, p) for t, p in zip(self._target_train, self._past_cov)
-                                         if (t.values() > self._threshold).any()])
+                                         if (t.values() > 0).any()])
 
         for i in tqdm(range(self._submodels_to_train), desc="Training submodel"):
             # logger.info(f"Training submodel {i+1}/{self._submodels_to_train}")

From 22fca3f95ebbf62ea3804ee78053900e0d19f528 Mon Sep 17 00:00:00 2001
From: xiaolongsun <95378566+xiaolong0728@users.noreply.github.com>
Date: Wed, 19 Mar 2025 14:24:26 +0100
Subject: [PATCH 06/24] update

---
 views_stepshifter/models/shurf_model.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/views_stepshifter/models/shurf_model.py b/views_stepshifter/models/shurf_model.py
index 4a2c7b5..01d790c 100644
--- a/views_stepshifter/models/shurf_model.py
+++ b/views_stepshifter/models/shurf_model.py
@@ -110,7 +110,7 @@ def fit(self, df: pd.DataFrame):
         self.is_fitted_ = True
     
         
-    def predict_sequence(self,run_type, eval_type, sequence_number) -> pd.DataFrame:
+    def predict_sequence(self, run_type, eval_type, sequence_number) -> pd.DataFrame:
         """
         Predicts n draws of outcomes based on the provided DataFrame .
 
@@ -208,8 +208,7 @@ def predict_sequence(self,run_type, eval_type, sequence_number) -> pd.DataFrame:
         print(final_preds.tail(20))
         return final_preds
 
-    @views_validate
-    def predict(self, df: pd.DataFrame, run_type: str, eval_type: str = "standard") -> pd.DataFrame:
+    def predict(self, run_type: str, eval_type: str = "standard") -> pd.DataFrame:
         """
         Predicts outcomes based on the provided DataFrame and run type.
 
@@ -229,8 +228,6 @@ def predict(self, df: pd.DataFrame, run_type: str, eval_type: str = "standard")
             The final predictions as a DataFrame.
         """
         
-        # Process the input data to ensure it is in the correct format
-        df = self._process_data(df)
         # Check if the model has been fitted before making predictions
         check_is_fitted(self, 'is_fitted_')
         print('Dependent variable:', self.depvar, 'Parameters:', 'Log target:', self.log_target, ' submodels:', self._submodels_to_train, ', samples within submodels: ', self._pred_samples, ', draw distribution: ', self._draw_dist, ', sigma: ', self._draw_sigma)

From 6dd594f07aec9f73b906d0652bcadf39285ae5d0 Mon Sep 17 00:00:00 2001
From: Dylan <52908667+smellycloud@users.noreply.github.com>
Date: Wed, 19 Mar 2025 14:30:12 +0100
Subject: [PATCH 07/24] fix 1

---
 views_stepshifter/models/shurf_model.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/views_stepshifter/models/shurf_model.py b/views_stepshifter/models/shurf_model.py
index 01d790c..4a2b449 100644
--- a/views_stepshifter/models/shurf_model.py
+++ b/views_stepshifter/models/shurf_model.py
@@ -194,7 +194,7 @@ def predict_sequence(self, run_type, eval_type, sequence_number) -> pd.DataFrame
         # Ensuring that the final predictions are positive:
         final_preds_full['Prediction'] = final_preds_full['Prediction'].apply(lambda x: np.clip(x, 0, None))
         # Column for the main prediction:
-        pred_col_name = 'pred_' + self.depvar 
+        pred_col_name = 'pred_' + self._targets 
         final_preds_full[pred_col_name] = final_preds_full['Prediction']
         # Log-transforming the final predictions if the target is log-transformed, exponentiating if not, and adding a column with the log-transformed predictions
         if self.log_target == True:
@@ -230,7 +230,7 @@ def predict(self, run_type: str, eval_type: str = "standard") -> pd.DataFrame:
         
         # Check if the model has been fitted before making predictions
         check_is_fitted(self, 'is_fitted_')
-        print('Dependent variable:', self.depvar, 'Parameters:', 'Log target:', self.log_target, ' submodels:', self._submodels_to_train, ', samples within submodels: ', self._pred_samples, ', draw distribution: ', self._draw_dist, ', sigma: ', self._draw_sigma)
+        print('Dependent variable:', self._targets, 'Parameters:', 'Log target:', self.log_target, ' submodels:', self._submodels_to_train, ', samples within submodels: ', self._pred_samples, ', draw distribution: ', self._draw_dist, ', sigma: ', self._draw_sigma)
 
         # If the run type is not 'forecasting', perform multiple predictions
         if run_type != 'forecasting':

From 942c273de24b41a4783015d73df770c649671b8d Mon Sep 17 00:00:00 2001
From: xiaolongsun <95378566+xiaolong0728@users.noreply.github.com>
Date: Wed, 19 Mar 2025 15:39:38 +0100
Subject: [PATCH 08/24] add ShurfModel to stepshifter_manager

---
 views_stepshifter/manager/stepshifter_manager.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/views_stepshifter/manager/stepshifter_manager.py b/views_stepshifter/manager/stepshifter_manager.py
index 310bd56..96c4f27 100644
--- a/views_stepshifter/manager/stepshifter_manager.py
+++ b/views_stepshifter/manager/stepshifter_manager.py
@@ -3,12 +3,12 @@
 from views_pipeline_core.files.utils import read_dataframe
 from views_stepshifter.models.stepshifter import StepshifterModel
 from views_stepshifter.models.hurdle_model import HurdleModel
+from views_stepshifter.models.shurf_model import ShurfModel
 import logging
 import pickle
 import pandas as pd
 import numpy as np
 from typing import Union, Optional, List, Dict
-from views_stepshifter.models.shurf_model import StepShiftedHurdleUncertainRF
 
 logger = logging.getLogger(__name__)
 
@@ -22,7 +22,7 @@ def __init__(
     ) -> None:
         super().__init__(model_path, wandb_notifications, use_prediction_store)
         self._is_hurdle = self._config_meta["algorithm"] == "HurdleModel"
-        self._is_shurf = self._config_meta["algorithm"] == "SHURF"
+        self._is_shurf = self._config_meta["algorithm"] == "ShurfModel"
 
     @staticmethod
     def _get_standardized_df(df: pd.DataFrame) -> pd.DataFrame:
@@ -85,7 +85,7 @@ def _get_model(self, partitioner_dict: dict):
         if self._is_hurdle:
             model = HurdleModel(self.config, partitioner_dict)
         elif self._is_shurf:
-            model = StepShiftedHurdleUncertainRF(self.config, partitioner_dict)
+            model = ShurfModel(self.config, partitioner_dict)
         else:
             self.config["model_reg"] = self.config["algorithm"]
             model = StepshifterModel(self.config, partitioner_dict)
@@ -102,7 +102,7 @@ def _train_model_artifact(self):
         path_raw = self._model_path.data_raw
         path_artifacts = self._model_path.artifacts
         # W&B does not directly support nested dictionaries for hyperparameters
-        if self.config["sweep"] and self._is_hurdle:
+        if self.config["sweep"] and (self._is_hurdle or self._is_shurf):
             self.config = self._split_hurdle_parameters()
 
         run_type = self.config["run_type"]

From b2c5d96c01f4c6e209b3ebc003a51607e09bfcf4 Mon Sep 17 00:00:00 2001
From: xiaolongsun <95378566+xiaolong0728@users.noreply.github.com>
Date: Wed, 19 Mar 2025 15:39:59 +0100
Subject: [PATCH 09/24] add eval_type check

---
 views_stepshifter/models/hurdle_model.py | 31 +++++++++---------------
 views_stepshifter/models/stepshifter.py  | 21 ++++------------
 2 files changed, 17 insertions(+), 35 deletions(-)

diff --git a/views_stepshifter/models/hurdle_model.py b/views_stepshifter/models/hurdle_model.py
index c982a58..b539821 100644
--- a/views_stepshifter/models/hurdle_model.py
+++ b/views_stepshifter/models/hurdle_model.py
@@ -147,27 +147,17 @@ def fit(self, df: pd.DataFrame):
             self._models = models
         self.is_fitted_ = True
 
-        # for step in tqdm.tqdm(self._steps, desc="Fitting model for step", leave=True):
-        #     # Fit binary-like stage using a classification model, but the target is binary (0 or 1)
-        #     binary_model = self._clf(lags_past_covariates=[-step], **self._clf_params)
-        #     binary_model.fit(target_binary, past_covariates=self._past_cov)
-
-        #     # Fit positive stage using the regression model
-        #     positive_model = self._reg(lags_past_covariates=[-step], **self._reg_params)
-        #     positive_model.fit(target_pos, past_covariates=past_cov_pos)
-        #     self._models[step] = (binary_model, positive_model)
-        # self.is_fitted_ = True
-
     def predict(self, run_type: str, eval_type: str = "standard") -> pd.DataFrame:
         check_is_fitted(self, "is_fitted_")
 
         if run_type != "forecasting":
-            final_preds = []
+
             if eval_type == "standard":
                 total_sequence_number = (
                     ModelManager._resolve_evaluation_sequence_number(eval_type)
                 )
                 if self.get_device_params().get("device") == "cuda":
+                    pred = []
                     for sequence_number in tqdm.tqdm(
                         range(ModelManager._resolve_evaluation_sequence_number(eval_type)),
                         desc="Predicting for sequence number",
@@ -184,9 +174,9 @@ def predict(self, run_type: str, eval_type: str = "standard") -> pd.DataFrame:
                             )
                             for step in self._steps
                         ]
-                        final_pred = pd.concat(pred_by_step_binary, axis=0) * pd.concat(pred_by_step_positive, axis=0)
-                        final_preds.append(final_pred)
-                    return final_preds
+                        pred = pd.concat(pred_by_step_binary, axis=0) * pd.concat(pred_by_step_positive, axis=0)
+                        preds.append(pred)
+
                 else:
                     preds = [None] * total_sequence_number
                     with ProcessPoolExecutor() as executor:
@@ -201,7 +191,10 @@ def predict(self, run_type: str, eval_type: str = "standard") -> pd.DataFrame:
                         ):
                             sequence_number = futures[future]
                             preds[sequence_number] = future.result()
-                    return preds
+            else:
+                raise ValueError(
+                    f"{eval_type} is not supported now. Please use 'standard' evaluation type."
+                )
 
         else:
             if self.get_device_params().get("device") == "cuda":
@@ -215,10 +208,10 @@ def predict(self, run_type: str, eval_type: str = "standard") -> pd.DataFrame:
                         self._predict_by_step(self._models[step][1], step, 0)
                     )
                 
-                final_preds = pd.concat(pred_by_step_binary, axis=0) * pd.concat(
+                preds = pd.concat(pred_by_step_binary, axis=0) * pd.concat(
                     pred_by_step_positive, axis=0
                 )
-                return final_preds
+  
             else:
                 with ProcessPoolExecutor() as executor:
                     futures_binary = {
@@ -255,4 +248,4 @@ def predict(self, run_type: str, eval_type: str = "standard") -> pd.DataFrame:
                         pd.concat(pred_by_step_binary, axis=0).sort_index()
                         * pd.concat(pred_by_step_positive, axis=0).sort_index()
                     )
-                return preds
+        return preds
diff --git a/views_stepshifter/models/stepshifter.py b/views_stepshifter/models/stepshifter.py
index 2d42e5a..dca342b 100644
--- a/views_stepshifter/models/stepshifter.py
+++ b/views_stepshifter/models/stepshifter.py
@@ -237,18 +237,6 @@ def predict(self, run_type: str, eval_type: str = "standard") -> pd.DataFrame:
         if run_type != "forecasting":
 
             if eval_type == "standard":
-                # preds = []
-                # for sequence_number in tqdm.tqdm(
-                #     range(ModelManager._resolve_evaluation_sequence_number(eval_type)),
-                #     desc="Predicting for sequence number",
-                # ):
-                # pred_by_step = [
-                #     self._predict_by_step(self._models[step], step, sequence_number)
-                #     for step in self._steps
-                # ]
-                # pred = pd.concat(pred_by_step, axis=0)
-                # preds.append(pred)
-
                 total_sequence_number = (
                     ModelManager._resolve_evaluation_sequence_number(eval_type)
                 )
@@ -282,18 +270,19 @@ def predict(self, run_type: str, eval_type: str = "standard") -> pd.DataFrame:
                         ):
                             sequence_number = futures[future]
                             preds[sequence_number] = future.result()
+            else:
+                raise ValueError(
+                    f"{eval_type} is not supported now. Please use 'standard' evaluation type."
+                )
 
         else:
-            # preds = []
-            # for step in tqdm.tqdm(self._steps, desc="Predicting for steps"):
-            #     preds.append(self._predict_by_step(self._models[step], step, 0))
-            # preds = pd.concat(preds, axis=0).sort_index()
 
             if self.get_device_params().get("device") == "cuda":
                 preds = []
                 for step in tqdm.tqdm(self._steps, desc="Predicting for steps"):
                     preds.append(self._predict_by_step(self._models[step], step, 0))
                 preds = pd.concat(preds, axis=0).sort_index()
+                
             else:
                 with ProcessPoolExecutor() as executor:
                     futures = {

From 45b146eb52ebcf065f2f7a355833f5d353b67ce8 Mon Sep 17 00:00:00 2001
From: Dylan <52908667+smellycloud@users.noreply.github.com>
Date: Wed, 19 Mar 2025 15:46:30 +0100
Subject: [PATCH 10/24] rename

---
 views_stepshifter/models/shurf_model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/views_stepshifter/models/shurf_model.py b/views_stepshifter/models/shurf_model.py
index 4a2b449..9c5e103 100644
--- a/views_stepshifter/models/shurf_model.py
+++ b/views_stepshifter/models/shurf_model.py
@@ -12,7 +12,7 @@
 
 logger = logging.getLogger(__name__)
 
-class StepShiftedHurdleUncertainRF(HurdleModel):
+class ShurfModel(HurdleModel):
     """
     Hurdle model for time series forecasting. The model consists of two stages:
     1. Binary stage: Predicts whether the target variable is 0 or > 0.

From 92c2e227500ef9750c7489f6c9f632cb32a52311 Mon Sep 17 00:00:00 2001
From: Dylan <52908667+smellycloud@users.noreply.github.com>
Date: Wed, 19 Mar 2025 15:53:29 +0100
Subject: [PATCH 11/24] cleanup

---
 views_stepshifter/models/shurf_model.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/views_stepshifter/models/shurf_model.py b/views_stepshifter/models/shurf_model.py
index 9c5e103..155a1e4 100644
--- a/views_stepshifter/models/shurf_model.py
+++ b/views_stepshifter/models/shurf_model.py
@@ -53,14 +53,14 @@ def __init__(self, config: Dict, partitioner_dict: Dict[str, List[int]]):
         self._submodels_to_train = config['submodels_to_train']
         # self._n_estimators = config['parameters']['n_estimators']
         self.log_target = config['log_target']
-        self._max_features = config['max_features']
-        self._max_depth = config['max_depth']
-        self._max_samples = config['max_samples']
+        # self._max_features = config['max_features']
+        # self._max_depth = config['max_depth']
+        # self._max_samples = config['max_samples']
         self._pred_samples = config['pred_samples']
         self._draw_dist = config['draw_dist']
         self._draw_sigma = config['draw_sigma']
-        self._geo_unit_samples = config['geo_unit_samples']
-        self._n_jobs = config['n_jobs']
+        # self._geo_unit_samples = config['geo_unit_samples']
+        # self._n_jobs = config['n_jobs']
 
     @views_validate
     def fit(self, df: pd.DataFrame):

From 9a9cd0d90cb1a21bfee964c41e8b7131e020d6e7 Mon Sep 17 00:00:00 2001
From: xiaolongsun <95378566+xiaolong0728@users.noreply.github.com>
Date: Wed, 19 Mar 2025 16:00:17 +0100
Subject: [PATCH 12/24] update shurf

---
 views_stepshifter/models/shurf_model.py | 337 ++++++++++++------------
 1 file changed, 173 insertions(+), 164 deletions(-)

diff --git a/views_stepshifter/models/shurf_model.py b/views_stepshifter/models/shurf_model.py
index 155a1e4..51365da 100644
--- a/views_stepshifter/models/shurf_model.py
+++ b/views_stepshifter/models/shurf_model.py
@@ -1,5 +1,4 @@
 from views_pipeline_core.managers.model import ModelManager
-from views_stepshifter.models.stepshifter import StepshifterModel
 from views_stepshifter.models.hurdle_model import HurdleModel
 from views_stepshifter.models.validation import views_validate
 from sklearn.utils.validation import check_is_fitted
@@ -7,58 +6,28 @@
 from typing import List, Dict
 import numpy as np
 import logging
-# from darts.models import RandomForest
 from tqdm import tqdm
 
 logger = logging.getLogger(__name__)
 
+
 class ShurfModel(HurdleModel):
-    """
-    Hurdle model for time series forecasting. The model consists of two stages:
-    1. Binary stage: Predicts whether the target variable is 0 or > 0.
-    2. Positive stage: Predicts the value of the target variable when it is > 0.
-
-    Note:
-    This algorithm uses a two-step approach. 
-
-    **Step 1: Classification Stage**  
-    In the first step, a regression model is used with a binary target (0 or 1), 
-    indicating the absence or presence of violence. This stage functions similarly 
-    to a linear probability model, estimating the likelihood of a positive outcome. 
-    Since the model is a regression rather than a classification model, 
-    these estimates are not strictly bounded between 0 and 1, 
-    but this is acceptable for the purpose of this step.
-
-    To determine whether an observation is classified as "positive," we apply a threshold. 
-    The default threshold is 1, meaning that predictions above this value 
-    are considered positive outcomes. This threshold can be adjusted as 
-    a tunable hyperparameter to better suit specific requirements.
-
-    **Step 2: Regression Stage**  
-    In the second step, we use a regression model to predict a continuous or count value 
-    (e.g., the expected number of conflict fatalities) for the selected time series. 
-    We include the entire time series for countries or PRIO grids where the 
-    classification stage yielded at least one "positive" prediction, 
-    rather than limiting the regression to just the predicted positive values.
-    """
-    
     def __init__(self, config: Dict, partitioner_dict: Dict[str, List[int]]):
         super().__init__(config, partitioner_dict)
-        self._clf_params = self._get_parameters(config)['clf']
-        self._reg_params = self._get_parameters(config)['reg']
+        self._clf_params = self._get_parameters(config)["clf"]
+        self._reg_params = self._get_parameters(config)["reg"]
 
         self._submodel_list = []
+        self._submodels_to_train = config["submodels_to_train"]
+        self._log_target = config["log_target"]
+        self._pred_samples = config["pred_samples"]
+        self._draw_dist = config["draw_dist"]
+        self._draw_sigma = config["draw_sigma"]
 
-        self._partitioner_dict = partitioner_dict
-        self._submodels_to_train = config['submodels_to_train']
         # self._n_estimators = config['parameters']['n_estimators']
-        self.log_target = config['log_target']
         # self._max_features = config['max_features']
         # self._max_depth = config['max_depth']
         # self._max_samples = config['max_samples']
-        self._pred_samples = config['pred_samples']
-        self._draw_dist = config['draw_dist']
-        self._draw_sigma = config['draw_sigma']
         # self._geo_unit_samples = config['geo_unit_samples']
         # self._n_jobs = config['n_jobs']
 
@@ -88,29 +57,35 @@ def fit(self, df: pd.DataFrame):
             s.map(lambda x: (x > 0).astype(float)) for s in self._target_train
         ]
 
-        # Positive outcome (for cases where target > threshold)
-        target_pos, past_cov_pos = zip(*[(t, p) for t, p in zip(self._target_train, self._past_cov)
-                                         if (t.values() > 0).any()])
+        target_pos, past_cov_pos = zip(
+            *[
+                (t, p)
+                for t, p in zip(self._target_train, self._past_cov)
+                if (t.values() > 0).any()
+            ]
+        )
 
         for i in tqdm(range(self._submodels_to_train), desc="Training submodel"):
-            # logger.info(f"Training submodel {i+1}/{self._submodels_to_train}")
-            
+
             for step in tqdm(self._steps, desc=f"Steps for submodel {i+1}"):
-                # logger.info(f"Training step {step}")
                 # Fit binary-like stage using a regression model, but the target is binary (0 or 1)
-                binary_model = self._clf(lags_past_covariates=[-step], **self._clf_params)
+                binary_model = self._clf(
+                    lags_past_covariates=[-step], **self._clf_params
+                )
                 binary_model.fit(target_binary, past_covariates=self._past_cov)
 
                 # Fit positive stage using the regression model
-                positive_model = self._reg(lags_past_covariates=[-step], **self._reg_params)
+                positive_model = self._reg(
+                    lags_past_covariates=[-step], **self._reg_params
+                )
                 positive_model.fit(target_pos, past_covariates=past_cov_pos)
                 self._models[step] = (binary_model, positive_model)
+
             self._submodel_list.append(self._models)
-            logger.info(f"Submodel {i+1}/{self._submodels_to_train} trained successfully")
+
         self.is_fitted_ = True
-    
-        
-    def predict_sequence(self, run_type, eval_type, sequence_number) -> pd.DataFrame:
+
+    def predict_sequence(self, sequence_number) -> pd.DataFrame:
         """
         Predicts n draws of outcomes based on the provided DataFrame .
 
@@ -118,94 +93,157 @@ def predict_sequence(self, run_type, eval_type, sequence_number) -> pd.DataFrame
         -----------
         self: StepShiftedHurdleUncertainRF
             The model object.
-            
+
         run_type : str
             The type of run to perform. Currently it is unlikely to affect the behaviour of the function.
-            
-        eval_type : str 
+
+        eval_type : str
             The type of evaluation to perform. Currently it is unlikely to affect the behaviour of the function.
-            
+
         sequence_number : int
             The sequence number to predict outcomes for.
-             
-            
+
+
         Returns:
         --------
         pd.DataFrame
             The final predictions as a DataFrame.
         """
-        
-        sample_number = 0
-        final_preds = [] # This will hold predictions for all sub-models and all samples within sub-models
-        # Loop over submodels
+
+        final_preds = []  
         submodel_number = 0
-        for submodel in tqdm(self._submodel_list, desc=f"Predicting submodel: {run_type}", leave=True):
-#            print(submodel)
-            pred_by_step_binary = [self._predict_by_step(submodel[step][0], step, sequence_number) 
-                                for step in self._steps]
-            pred_by_step_positive = [self._predict_by_step(submodel[step][1], step, sequence_number) 
-                                    for step in self._steps]
-            
+
+        for submodel in tqdm(
+            self._submodel_list, desc=f"Predicting submodel number", leave=True
+        ):
+            pred_by_step_binary = [
+                self._predict_by_step(submodel[step][0], step, sequence_number)
+                for step in self._steps
+            ]
+            pred_by_step_positive = [
+                self._predict_by_step(submodel[step][1], step, sequence_number)
+                for step in self._steps
+            ]
+
             pred_concat_binary = pd.concat(pred_by_step_binary, axis=0)
-            
-            pred_concat_binary.rename(columns={'step_combined':'Classification'}, inplace=True)
+
+            pred_concat_binary.rename(
+                columns={f"pred_{self._targets}": "Classification"}, inplace=True
+            )
             pred_concat_positive = pd.concat(pred_by_step_positive, axis=0)
-            pred_concat_positive.rename(columns={'step_combined':'Regression'}, inplace=True)
+            pred_concat_positive.rename(
+                columns={f"pred_{self._targets}": "Regression"}, inplace=True
+            )
             pred_concat = pd.concat([pred_concat_binary, pred_concat_positive], axis=1)
-            pred_concat['submodel'] = submodel_number
-#            print(pred_concat.tail(12))
-                
-            # Append the combined predictions to the final predictions list
+            pred_concat["submodel"] = submodel_number
+
             final_preds.append(pred_concat)
             submodel_number += 1
-#                    submodel_preds[i] = final_preds
-        # Generate a DataFrame from the final predictions list for this sequence number
-        final_preds_aslists = pd.concat(final_preds, axis=0)  
+
+        final_preds_aslists = pd.concat(final_preds, axis=0)
+
         # Drawing samples from the classification model
         # Ensuring that the classification probabilities are between 0 and 1:
-        final_preds_aslists['Classification'] = final_preds_aslists['Classification'].apply(lambda x: np.clip(x, 0, 1))
-        final_preds_aslists['ClassificationSample'] = final_preds_aslists['Classification'].apply(lambda x: np.random.binomial(1, x, self._pred_samples))
-        
-        # Drawing samples from the regression model
+        final_preds_aslists["Classification"] = final_preds_aslists[
+            "Classification"
+        ].apply(lambda x: np.clip(x, 0, 1))
+        final_preds_aslists["ClassificationSample"] = final_preds_aslists[
+            "Classification"
+        ].apply(lambda x: np.random.binomial(1, x, self._pred_samples))
 
-        if self.log_target == True:
-            if self._draw_dist == 'Poisson': # Note: the Poisson distribution assumes a non-log-transformed target, so not defined here
-                print('Poisson not implemented')
-                final_preds_aslists['RegressionSample'] = final_preds_aslists['Regression']
-            if self._draw_dist == 'Lognormal':
+        # Drawing samples from the regression model
+        if self._log_target == True:
+            if (
+                self._draw_dist == "Poisson"
+            ):  # Note: the Poisson distribution assumes a non-log-transformed target, so not defined here
+                final_preds_aslists["RegressionSample"] = final_preds_aslists[
+                    "Regression"
+                ]
+            if self._draw_dist == "Lognormal":
                 # Draw from normal distribution for log-transformed outcomes, then exponentiate, then round to integer
-                #pred_concat['RegressionSample'] = pred_concat['Regression'].apply(lambda x: np.random.normal(x, self._draw_sigma, self._pred_samples))
-                final_preds_aslists['RegressionSample'] = final_preds_aslists['Regression'].apply(lambda x: np.abs(np.rint(np.expm1(np.random.normal(x, self._draw_sigma, self._pred_samples)))))
-        if self.log_target == False:
-            if self._draw_dist == 'Poisson': # Note: this assumes a non-log-transformed target
-                print('Poisson not implemented')
-                final_preds_aslists['RegressionSample'] = final_preds_aslists['Regression']
-            if self._draw_dist == 'Lognormal':
-                print('Draws for non-log-transformed target: first implementation' )
-                final_preds_aslists['RegressionSample'] = final_preds_aslists['Regression'].apply(lambda x: np.abs(np.rint(np.expm1(np.random.normal(np.log1p(x), self._draw_sigma, self._pred_samples)))))
-            
-        if self._draw_dist == '':
-            final_preds_aslists['RegressionSample'] = final_preds_aslists['Regression']
-        print('final_preds_aslists contains the samples in list form. Shape:', final_preds_aslists.shape, '. Looks like this:')
-        print(final_preds_aslists.tail(20))
+                final_preds_aslists["RegressionSample"] = final_preds_aslists[
+                    "Regression"
+                ].apply(
+                    lambda x: np.abs(
+                        np.rint(
+                            np.expm1(
+                                np.random.normal(
+                                    x, self._draw_sigma, self._pred_samples
+                                )
+                            )
+                        )
+                    )
+                )
+
+        if self._log_target == False:
+            if (
+                self._draw_dist == "Poisson"
+            ):  # Note: this assumes a non-log-transformed target
+                final_preds_aslists["RegressionSample"] = final_preds_aslists[
+                    "Regression"
+                ]
+            if self._draw_dist == "Lognormal":
+                final_preds_aslists["RegressionSample"] = final_preds_aslists[
+                    "Regression"
+                ].apply(
+                    lambda x: np.abs(
+                        np.rint(
+                            np.expm1(
+                                np.random.normal(
+                                    np.log1p(x), self._draw_sigma, self._pred_samples
+                                )
+                            )
+                        )
+                    )
+                )
+
+        if self._draw_dist == "":
+            final_preds_aslists["RegressionSample"] = final_preds_aslists["Regression"]
+
         # 'Explode' the samples to get one row per sample
-        final_preds_full = final_preds_aslists.explode(['ClassificationSample','RegressionSample'])
-        final_preds_full['Prediction'] = final_preds_full['ClassificationSample'] * final_preds_full['RegressionSample']
+        final_preds_full = final_preds_aslists.explode(
+            ["ClassificationSample", "RegressionSample"]
+        )
+        final_preds_full["Prediction"] = (
+            final_preds_full["ClassificationSample"]
+            * final_preds_full["RegressionSample"]
+        )
+
         # Ensuring that the final predictions are positive:
-        final_preds_full['Prediction'] = final_preds_full['Prediction'].apply(lambda x: np.clip(x, 0, None))
+        final_preds_full["Prediction"] = final_preds_full["Prediction"].apply(
+            lambda x: np.clip(x, 0, None)
+        )
+
         # Column for the main prediction:
-        pred_col_name = 'pred_' + self._targets 
-        final_preds_full[pred_col_name] = final_preds_full['Prediction']
-        # Log-transforming the final predictions if the target is log-transformed, exponentiating if not, and adding a column with the log-transformed predictions
-        if self.log_target == True:
-            final_preds_full['LogPrediction'] = final_preds_full['Prediction']
-            final_preds_full['Prediction'] = np.expm1(final_preds_full['Prediction'])
-        if self.log_target == False:
-            final_preds_full['LogPrediction'] = np.log1p(final_preds_full['Prediction'])
-        final_preds_full.drop(columns=['Classification','Regression','ClassificationSample','RegressionSample','submodel','Prediction','LogPrediction'],inplace=True)
-        final_preds = pd.DataFrame(final_preds_full.groupby(['month_id', 'country_id'])[pred_col_name].apply(list))
-        print('final_preds is the end product of the predict sequence function. Shape:', final_preds_full.shape)
-        print(final_preds.tail(20))
+        pred_col_name = "pred_" + self._targets
+        final_preds_full[pred_col_name] = final_preds_full["Prediction"]
+
+        # Log-transforming the final predictions if the target is log-transformed, exponentiating if not, 
+        # and adding a column with the log-transformed predictions
+        if self._log_target == True:
+            final_preds_full["LogPrediction"] = final_preds_full["Prediction"]
+            final_preds_full["Prediction"] = np.expm1(final_preds_full["Prediction"])
+        if self._log_target == False:
+            final_preds_full["LogPrediction"] = np.log1p(final_preds_full["Prediction"])
+
+        final_preds_full.drop(
+            columns=[
+                "Classification",
+                "Regression",
+                "ClassificationSample",
+                "RegressionSample",
+                "submodel",
+                "Prediction",
+                "LogPrediction",
+            ],
+            inplace=True,
+        )
+        final_preds = pd.DataFrame(
+            final_preds_full.groupby(["month_id", "country_id"])[pred_col_name].apply(
+                list
+            )
+        )
+
         return final_preds
 
     def predict(self, run_type: str, eval_type: str = "standard") -> pd.DataFrame:
@@ -227,54 +265,25 @@ def predict(self, run_type: str, eval_type: str = "standard") -> pd.DataFrame:
         pd.DataFrame
             The final predictions as a DataFrame.
         """
-        
-        # Check if the model has been fitted before making predictions
-        check_is_fitted(self, 'is_fitted_')
-        print('Dependent variable:', self._targets, 'Parameters:', 'Log target:', self.log_target, ' submodels:', self._submodels_to_train, ', samples within submodels: ', self._pred_samples, ', draw distribution: ', self._draw_dist, ', sigma: ', self._draw_sigma)
-
-        # If the run type is not 'forecasting', perform multiple predictions
-        if run_type != 'forecasting':
-            preds = []  # D: List to collect predictions for each sequence
-            # If the evaluation type is "standard", iterate over the evaluation sequence number
-            submodel_preds = {} # Not sure this belongs here
+        check_is_fitted(self, "is_fitted_")
+
+        if run_type != "forecasting":
+            preds = []
             if eval_type == "standard":
-                # Loop over the evaluation sequence number
-                for sequence_number in tqdm(range(ModelManager._resolve_evaluation_sequence_number(eval_type)), desc=f"Sequence", leave=True):
-#                    print('sequence_number', sequence_number)
-                    temp_preds_full = self.predict_sequence(run_type, eval_type, sequence_number)   
-                        
-                    # Output the temporary final predictions with samples as parquet  
-                    temp_preds_full.to_parquet(f'data/generated/final_pred_full_{run_type}_{eval_type}_{sequence_number}.parquet')
-                    # Convert to views_pipeline standard format
-                    
-                    # Aggregate the predictions into point predictions
-#                    final_preds.pop('LogPrediction')
-#                    agg_preds = np.log1p(temp_preds_full.groupby(['month_id', 'country_id']).mean())
-#                    final_preds.rename(columns={'Prediction':'pred_ged_sb'}, inplace=True)   
-#                    agg_preds.pop('submodel')
-                    preds.append(temp_preds_full) # D: Append the final predictions for this sequence number
-                    # Output the final predictions as parquet
-#                    agg_preds.to_parquet(f'data/generated/final_preds_{run_type}_{eval_type}_{sequence_number}_agg.parquet')
-                return preds
+                for sequence_number in tqdm(
+                    range(ModelManager._resolve_evaluation_sequence_number(eval_type)),
+                    desc=f"Predicting for sequence number",
+                    leave=True,
+                ):
+                    temp_preds_full = self.predict_sequence(sequence_number)
+                    preds.append(temp_preds_full)
+            else:
+                raise ValueError(
+                    f"{eval_type} is not supported now. Please use 'standard' evaluation type."
+                )
+
         else:
-            # If the run type is 'forecasting', perform a single prediction
             sequence_number = 0
-            temp_preds_full = self.predict_sequence(run_type, eval_type, sequence_number)         
-            print('temp_preds_full')   
-            
-#            print('final_preds_aslists')
-#            print(final_preds_aslists.describe())
-                
-            # Output the final predictions with samples as parquet  
-            temp_preds_full.to_parquet(f'data/generated/final_pred_full_{run_type}_{eval_type}_{sequence_number}.parquet')
-            # Aggregate the predictions into point predictions
-#            agg_preds = temp_preds_full.groupby(['month_id', 'country_id']).mean()
-#            agg_preds.pop('submodel')
-            # Output the final predictions as parquet
-#            agg_preds['ged_sb_dep'] = agg_preds['Prediction']
-#            agg_preds.to_parquet(f'data/generated/final_preds_{run_type}_{eval_type}_{sequence_number}_agg.parquet')
-
-            # Return the final predictions as a DataFrame
-            print('Returning final predictions:')
-            print(temp_preds_full.tail(20))
-            return temp_preds_full
\ No newline at end of file
+            preds = self.predict_sequence(sequence_number)
+
+        return preds

From af9477873aca6b99b51d3389432f91d6183175e8 Mon Sep 17 00:00:00 2001
From: Dylan <52908667+smellycloud@users.noreply.github.com>
Date: Thu, 20 Mar 2025 07:47:11 +0100
Subject: [PATCH 13/24] add extra condition

---
 views_stepshifter/manager/stepshifter_manager.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/views_stepshifter/manager/stepshifter_manager.py b/views_stepshifter/manager/stepshifter_manager.py
index 96c4f27..332a4a4 100644
--- a/views_stepshifter/manager/stepshifter_manager.py
+++ b/views_stepshifter/manager/stepshifter_manager.py
@@ -40,9 +40,9 @@ def standardize_value(value):
             # 1) Replace inf and -inf with 0; 
             # 2) Replace negative values with 0
             if isinstance(value, list):
-                return [0 if (v == np.inf or v == -np.inf or v < 0) else v for v in value]
+                return [0 if (v == np.inf or v == -np.inf or v < 0 or v == np.nan) else v for v in value]
             else:
-                return 0 if (value == np.inf or value == -np.inf or value < 0) else value
+                return 0 if (value == np.inf or value == -np.inf or value < 0 or v == np.nan) else value
 
         df = df.applymap(standardize_value)
         return df

From 2a5a8fc952f53778f8a83e8250a308b033d5a3e3 Mon Sep 17 00:00:00 2001
From: Dylan <52908667+smellycloud@users.noreply.github.com>
Date: Thu, 20 Mar 2025 07:58:24 +0100
Subject: [PATCH 14/24] fix typo

---
 views_stepshifter/manager/stepshifter_manager.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/views_stepshifter/manager/stepshifter_manager.py b/views_stepshifter/manager/stepshifter_manager.py
index 332a4a4..d724b41 100644
--- a/views_stepshifter/manager/stepshifter_manager.py
+++ b/views_stepshifter/manager/stepshifter_manager.py
@@ -42,7 +42,7 @@ def standardize_value(value):
             if isinstance(value, list):
                 return [0 if (v == np.inf or v == -np.inf or v < 0 or v == np.nan) else v for v in value]
             else:
-                return 0 if (value == np.inf or value == -np.inf or value < 0 or v == np.nan) else value
+                return 0 if (value == np.inf or value == -np.inf or value < 0 or value == np.nan) else value
 
         df = df.applymap(standardize_value)
         return df

From cd6ee7a37e9ed7a021673bfd31a1f11a5c82e4e1 Mon Sep 17 00:00:00 2001
From: Dylan <52908667+smellycloud@users.noreply.github.com>
Date: Wed, 26 Mar 2025 15:37:25 +0100
Subject: [PATCH 15/24] some fix idk

---
 views_stepshifter/manager/stepshifter_manager.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/views_stepshifter/manager/stepshifter_manager.py b/views_stepshifter/manager/stepshifter_manager.py
index d724b41..3ebe4ff 100644
--- a/views_stepshifter/manager/stepshifter_manager.py
+++ b/views_stepshifter/manager/stepshifter_manager.py
@@ -9,6 +9,7 @@
 import pandas as pd
 import numpy as np
 from typing import Union, Optional, List, Dict
+import math
 
 logger = logging.getLogger(__name__)
 
@@ -39,10 +40,15 @@ def _get_standardized_df(df: pd.DataFrame) -> pd.DataFrame:
         def standardize_value(value):
             # 1) Replace inf and -inf with 0; 
             # 2) Replace negative values with 0
-            if isinstance(value, list):
-                return [0 if (v == np.inf or v == -np.inf or v < 0 or v == np.nan) else v for v in value]
+            # if isinstance(value, list):
+            #     return [0 if (v == np.inf or v == -np.inf or v < 0 or v == np.nan) else v for v in value]
+            # else:
+            #     return 0 if (value == np.inf or value == -np.inf or value < 0 or value == np.nan) else value
+            to_exclude = [np.inf, -np.inf, np.nan, None]
+            if isinstance(value, list) or isinstance(value, np.ndarray) or isinstance(value, pd.Series):
+                return [0 if (v in to_exclude) else v for v in value]
             else:
-                return 0 if (value == np.inf or value == -np.inf or value < 0 or value == np.nan) else value
+                return 0 if (value in to_exclude) else value
 
         df = df.applymap(standardize_value)
         return df

From 2c6e87d6d917fb3f95a263526d390d53e6c4e68d Mon Sep 17 00:00:00 2001
From: xiaolongsun <95378566+xiaolong0728@users.noreply.github.com>
Date: Tue, 8 Apr 2025 11:29:50 +0200
Subject: [PATCH 16/24] update get_standardized_df

---
 views_stepshifter/manager/stepshifter_manager.py | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/views_stepshifter/manager/stepshifter_manager.py b/views_stepshifter/manager/stepshifter_manager.py
index 3ebe4ff..fdefdd4 100644
--- a/views_stepshifter/manager/stepshifter_manager.py
+++ b/views_stepshifter/manager/stepshifter_manager.py
@@ -38,19 +38,15 @@ def _get_standardized_df(df: pd.DataFrame) -> pd.DataFrame:
         """
 
         def standardize_value(value):
-            # 1) Replace inf and -inf with 0; 
+            # 1) Replace inf, -inf, nan with 0; 
             # 2) Replace negative values with 0
-            # if isinstance(value, list):
-            #     return [0 if (v == np.inf or v == -np.inf or v < 0 or v == np.nan) else v for v in value]
-            # else:
-            #     return 0 if (value == np.inf or value == -np.inf or value < 0 or value == np.nan) else value
-            to_exclude = [np.inf, -np.inf, np.nan, None]
-            if isinstance(value, list) or isinstance(value, np.ndarray) or isinstance(value, pd.Series):
-                return [0 if (v in to_exclude) else v for v in value]
+            if isinstance(value, list):
+                return [0 if (v == np.inf or v == -np.inf or v < 0 or np.isnan(v)) else v for v in value]
             else:
-                return 0 if (value in to_exclude) else value
+                return 0 if (value == np.inf or value == -np.inf or value < 0 or np.isnan(value)) else value
 
         df = df.applymap(standardize_value)
+
         return df
 
     def _split_hurdle_parameters(self):

From 8fb4b17d2469c64ff605d6b8f65bc92997c21d69 Mon Sep 17 00:00:00 2001
From: Dylan <52908667+smellycloud@users.noreply.github.com>
Date: Thu, 17 Apr 2025 14:33:36 +0200
Subject: [PATCH 17/24] update ModelManager to ForecastingModelManager

---
 .../manager/stepshifter_manager.py            | 30 +++++++------------
 views_stepshifter/models/hurdle_model.py      |  8 ++---
 views_stepshifter/models/shurf_model.py       |  4 +--
 3 files changed, 17 insertions(+), 25 deletions(-)

diff --git a/views_stepshifter/manager/stepshifter_manager.py b/views_stepshifter/manager/stepshifter_manager.py
index 3ebe4ff..1852284 100644
--- a/views_stepshifter/manager/stepshifter_manager.py
+++ b/views_stepshifter/manager/stepshifter_manager.py
@@ -1,6 +1,6 @@
-from views_pipeline_core.managers.model import ModelPathManager, ModelManager
+from views_pipeline_core.managers.model import ModelPathManager, ForecastingModelManager
 from views_pipeline_core.configs.pipeline import PipelineConfig
-from views_pipeline_core.files.utils import read_dataframe
+from views_pipeline_core.files.utils import read_dataframe, generate_model_file_name
 from views_stepshifter.models.stepshifter import StepshifterModel
 from views_stepshifter.models.hurdle_model import HurdleModel
 from views_stepshifter.models.shurf_model import ShurfModel
@@ -9,16 +9,15 @@
 import pandas as pd
 import numpy as np
 from typing import Union, Optional, List, Dict
-import math
 
 logger = logging.getLogger(__name__)
 
 
-class StepshifterManager(ModelManager):
+class StepshifterManager(ForecastingModelManager):
     def __init__(
         self,
         model_path: ModelPathManager,
-        wandb_notifications: bool = False,
+        wandb_notifications: bool = True,
         use_prediction_store: bool = True,
     ) -> None:
         super().__init__(model_path, wandb_notifications, use_prediction_store)
@@ -38,19 +37,15 @@ def _get_standardized_df(df: pd.DataFrame) -> pd.DataFrame:
         """
 
         def standardize_value(value):
-            # 1) Replace inf and -inf with 0; 
+            # 1) Replace inf, -inf, nan with 0; 
             # 2) Replace negative values with 0
-            # if isinstance(value, list):
-            #     return [0 if (v == np.inf or v == -np.inf or v < 0 or v == np.nan) else v for v in value]
-            # else:
-            #     return 0 if (value == np.inf or value == -np.inf or value < 0 or value == np.nan) else value
-            to_exclude = [np.inf, -np.inf, np.nan, None]
-            if isinstance(value, list) or isinstance(value, np.ndarray) or isinstance(value, pd.Series):
-                return [0 if (v in to_exclude) else v for v in value]
+            if isinstance(value, list):
+                return [0 if (v == np.inf or v == -np.inf or v < 0 or np.isnan(v)) else v for v in value]
             else:
-                return 0 if (value in to_exclude) else value
+                return 0 if (value == np.inf or value == -np.inf or value < 0 or np.isnan(value)) else value
 
         df = df.applymap(standardize_value)
+
         return df
 
     def _split_hurdle_parameters(self):
@@ -121,7 +116,7 @@ def _train_model_artifact(self):
         stepshift_model.fit(df_viewser)
 
         if not self.config["sweep"]:
-            model_filename = ModelManager.generate_model_file_name(
+            model_filename = generate_model_file_name(
                 run_type, file_extension=".pkl"
             )
             stepshift_model.save(path_artifacts / model_filename)
@@ -140,7 +135,6 @@ def _evaluate_model_artifact(
         Returns:
             A list of DataFrames containing the evaluation results
         """
-        path_raw = self._model_path.data_raw
         path_artifacts = self._model_path.artifacts
         run_type = self.config["run_type"]
 
@@ -184,7 +178,6 @@ def _forecast_model_artifact(self, artifact_name: str) -> pd.DataFrame:
         Returns:
             The forecasted DataFrame
         """
-        path_raw = self._model_path.data_raw
         path_artifacts = self._model_path.artifacts
         run_type = self.config["run_type"]
 
@@ -218,7 +211,6 @@ def _forecast_model_artifact(self, artifact_name: str) -> pd.DataFrame:
         return df_prediction
 
     def _evaluate_sweep(self, eval_type: str, model: any) -> List[pd.DataFrame]:
-        path_raw = self._model_path.data_raw
         run_type = self.config["run_type"]
 
         df_predictions = model.predict(run_type, eval_type)
@@ -226,4 +218,4 @@ def _evaluate_sweep(self, eval_type: str, model: any) -> List[pd.DataFrame]:
             StepshifterManager._get_standardized_df(df) for df in df_predictions
         ]
 
-        return df_predictions
+        return df_predictions
\ No newline at end of file
diff --git a/views_stepshifter/models/hurdle_model.py b/views_stepshifter/models/hurdle_model.py
index b539821..e8845d5 100644
--- a/views_stepshifter/models/hurdle_model.py
+++ b/views_stepshifter/models/hurdle_model.py
@@ -1,4 +1,4 @@
-from views_pipeline_core.managers.model import ModelManager
+from views_pipeline_core.managers.model import ForecastingModelManager
 from views_stepshifter.models.stepshifter import StepshifterModel
 from views_stepshifter.models.validation import views_validate
 from sklearn.utils.validation import check_is_fitted
@@ -154,12 +154,12 @@ def predict(self, run_type: str, eval_type: str = "standard") -> pd.DataFrame:
 
             if eval_type == "standard":
                 total_sequence_number = (
-                    ModelManager._resolve_evaluation_sequence_number(eval_type)
+                    ForecastingModelManager._resolve_evaluation_sequence_number(eval_type)
                 )
                 if self.get_device_params().get("device") == "cuda":
                     pred = []
                     for sequence_number in tqdm.tqdm(
-                        range(ModelManager._resolve_evaluation_sequence_number(eval_type)),
+                        range(ForecastingModelManager._resolve_evaluation_sequence_number(eval_type)),
                         desc="Predicting for sequence number",
                     ):
                         pred_by_step_binary = [
@@ -182,7 +182,7 @@ def predict(self, run_type: str, eval_type: str = "standard") -> pd.DataFrame:
                     with ProcessPoolExecutor() as executor:
                         futures = {
                             executor.submit(self._predict_by_sequence, sequence_number): sequence_number
-                            for sequence_number in range(ModelManager._resolve_evaluation_sequence_number(eval_type))
+                            for sequence_number in range(ForecastingModelManager._resolve_evaluation_sequence_number(eval_type))
                         }
                         for future in tqdm.tqdm(
                             as_completed(futures.keys()),
diff --git a/views_stepshifter/models/shurf_model.py b/views_stepshifter/models/shurf_model.py
index 51365da..1dc7af4 100644
--- a/views_stepshifter/models/shurf_model.py
+++ b/views_stepshifter/models/shurf_model.py
@@ -1,4 +1,4 @@
-from views_pipeline_core.managers.model import ModelManager
+from views_pipeline_core.managers.model import ForecastingModelManager
 from views_stepshifter.models.hurdle_model import HurdleModel
 from views_stepshifter.models.validation import views_validate
 from sklearn.utils.validation import check_is_fitted
@@ -271,7 +271,7 @@ def predict(self, run_type: str, eval_type: str = "standard") -> pd.DataFrame:
             preds = []
             if eval_type == "standard":
                 for sequence_number in tqdm(
-                    range(ModelManager._resolve_evaluation_sequence_number(eval_type)),
+                    range(ForecastingModelManager._resolve_evaluation_sequence_number(eval_type)),
                     desc=f"Predicting for sequence number",
                     leave=True,
                 ):

From 19f713bc8a200b7f875a7575f5957c134f812b80 Mon Sep 17 00:00:00 2001
From: Dylan <52908667+smellycloud@users.noreply.github.com>
Date: Wed, 23 Apr 2025 09:36:53 +0200
Subject: [PATCH 18/24] use ForecastingModelManager

---
 views_stepshifter/models/stepshifter.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/views_stepshifter/models/stepshifter.py b/views_stepshifter/models/stepshifter.py
index dca342b..264d97b 100644
--- a/views_stepshifter/models/stepshifter.py
+++ b/views_stepshifter/models/stepshifter.py
@@ -6,7 +6,7 @@
 from sklearn.utils.validation import check_is_fitted
 from typing import List, Dict
 from views_stepshifter.models.validation import views_validate
-from views_pipeline_core.managers.model import ModelManager
+from views_pipeline_core.managers.model import ModelManager, ForecastingModelManager
 import tqdm
 from concurrent.futures import ProcessPoolExecutor, as_completed
 import torch
@@ -238,13 +238,13 @@ def predict(self, run_type: str, eval_type: str = "standard") -> pd.DataFrame:
 
             if eval_type == "standard":
                 total_sequence_number = (
-                    ModelManager._resolve_evaluation_sequence_number(eval_type)
+                    ForecastingModelManager._resolve_evaluation_sequence_number(eval_type)
                 )
 
                 if self.get_device_params().get("device") == "cuda":
                     preds = []
                     for sequence_number in tqdm.tqdm(
-                        range(ModelManager._resolve_evaluation_sequence_number(eval_type)),
+                        range(ForecastingModelManager._resolve_evaluation_sequence_number(eval_type)),
                         desc="Predicting for sequence number",
                     ):
                         pred_by_step = [

From e10a723279567c69bf8ca01f9cf04b5a9e7fad0b Mon Sep 17 00:00:00 2001
From: Dylan <52908667+smellycloud@users.noreply.github.com>
Date: Wed, 23 Apr 2025 23:02:50 +0200
Subject: [PATCH 19/24] fix tests

---
 tests/test_hurdle_model.py | 2 +-
 tests/test_stepshifter.py  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/test_hurdle_model.py b/tests/test_hurdle_model.py
index 385fb2a..734276b 100644
--- a/tests/test_hurdle_model.py
+++ b/tests/test_hurdle_model.py
@@ -125,7 +125,7 @@ def test_predict(sample_config, sample_partitioner_dict, sample_dataframe):
         patch("views_stepshifter.models.hurdle_model.as_completed") as mock_as_completed, \
         patch("views_stepshifter.models.hurdle_model.tqdm.tqdm") as mock_tqdm, \
         patch("views_stepshifter.models.hurdle_model.ProcessPoolExecutor") as mock_ProcessPoolExecutor, \
-        patch("views_stepshifter.models.hurdle_model.ModelManager._resolve_evaluation_sequence_number") as mock_sequence_number:
+        patch("views_stepshifter.models.hurdle_model.ForecastingModelManager._resolve_evaluation_sequence_number") as mock_sequence_number:
 
         
         # the else branch
diff --git a/tests/test_stepshifter.py b/tests/test_stepshifter.py
index 3a43b54..370d76a 100644
--- a/tests/test_stepshifter.py
+++ b/tests/test_stepshifter.py
@@ -3,7 +3,7 @@
 import numpy as np
 from unittest.mock import patch, MagicMock, call
 from views_stepshifter.models.stepshifter import StepshifterModel
-from views_pipeline_core.managers.model import ModelManager
+from views_pipeline_core.managers.model import ModelManager, ForecastingModelManager
 
 @pytest.fixture
 def config():
@@ -182,7 +182,7 @@ def test_predict(config, partitioner_dict, sample_dataframe):
         patch("views_stepshifter.models.stepshifter.as_completed") as mock_as_completed, \
         patch("views_stepshifter.models.stepshifter.tqdm.tqdm") as mock_tqdm, \
         patch("views_stepshifter.models.stepshifter.ProcessPoolExecutor") as mock_ProcessPoolExecutor, \
-        patch("views_stepshifter.models.stepshifter.ModelManager._resolve_evaluation_sequence_number") as mock_sequence_number:
+        patch("views_stepshifter.models.stepshifter.ForecastingModelManager._resolve_evaluation_sequence_number") as mock_sequence_number:
 
         
         # the else branch

From c721cd630eb6075cccc2d7e8a4b956238ea36d80 Mon Sep 17 00:00:00 2001
From: xiaolongsun <95378566+xiaolong0728@users.noreply.github.com>
Date: Mon, 23 Jun 2025 14:13:24 +0200
Subject: [PATCH 20/24] update version

---
 pyproject.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 26989ae..11e312a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "views_stepshifter"
-version = "0.4.0"
+version = "1.0.0"
 description = ""
 authors = [
     "Xiaolong Sun <xiaolong.sun@pcr.uu.se>",
@@ -11,7 +11,7 @@ readme = "README.md"
 
 [tool.poetry.dependencies]
 python = ">=3.11,<3.15"
-views_pipeline_core = ">=1.0.0,<2.0.0"
+views_pipeline_core = ">=2.0.0,<3.0.0"
 scikit-learn = "^1.6.0"
 pandas = "^1.5.3"
 numpy = "^1.25.2"

From 927fa5411f6792669deeb48fc757155ea2cd2cd4 Mon Sep 17 00:00:00 2001
From: xiaolongsun <95378566+xiaolong0728@users.noreply.github.com>
Date: Fri, 4 Jul 2025 15:39:12 +0200
Subject: [PATCH 21/24] try to fix test error

---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index 11e312a..1d4feb6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -18,6 +18,7 @@ numpy = "^1.25.2"
 darts = "^0.30.0"
 lightgbm = "4.6.0"
 views_forecasts = "^0.5.5"
+scipy = "1.12.0"
 
 
 

From 14a09da32b6cd6c869cec4d684084b8d00347dd6 Mon Sep 17 00:00:00 2001
From: xiaolongsun <95378566+xiaolong0728@users.noreply.github.com>
Date: Fri, 4 Jul 2025 15:42:50 +0200
Subject: [PATCH 22/24] try

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 1d4feb6..8737d48 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -18,7 +18,7 @@ numpy = "^1.25.2"
 darts = "^0.30.0"
 lightgbm = "4.6.0"
 views_forecasts = "^0.5.5"
-scipy = "1.12.0"
+scipy = "1.15.0"
 
 
 

From 93d2d43e539b877e5d239d0defde14de84f02465 Mon Sep 17 00:00:00 2001
From: xiaolongsun <95378566+xiaolong0728@users.noreply.github.com>
Date: Fri, 4 Jul 2025 15:43:55 +0200
Subject: [PATCH 23/24] another try

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 8737d48..35a1a15 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -18,7 +18,7 @@ numpy = "^1.25.2"
 darts = "^0.30.0"
 lightgbm = "4.6.0"
 views_forecasts = "^0.5.5"
-scipy = "1.15.0"
+scipy = "1.15.1"
 
 
 

From 6e26731edfb4179ba49b2c7ad4d30dae2829f817 Mon Sep 17 00:00:00 2001
From: xiaolongsun <95378566+xiaolong0728@users.noreply.github.com>
Date: Fri, 4 Jul 2025 15:49:44 +0200
Subject: [PATCH 24/24] Add comments

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 35a1a15..53d8442 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -18,7 +18,7 @@ numpy = "^1.25.2"
 darts = "^0.30.0"
 lightgbm = "4.6.0"
 views_forecasts = "^0.5.5"
-scipy = "1.15.1"
+scipy = "1.15.1" # error with latest scipy 1.16.0. see https://github.com/statsmodels/statsmodels/issues?q=_lazywhere