views-platform · xiaolong0728 · Nov 25, 2025 · Nov 7, 2025 · Nov 13, 2025 · Nov 13, 2025
diff --git a/pyproject.toml b/pyproject.toml
@@ -15,9 +15,10 @@ views_pipeline_core = ">=2.0.0,<3.0.0"
 scikit-learn = "^1.6.0"
 pandas = "^1.5.3"
 numpy = "^1.25.2"
-darts = "^0.30.0"
+darts = "^0.38.0"
 lightgbm = "4.6.0"
 views_forecasts = "^0.5.5"
+xgboost = "^3.0.0"
 scipy = "1.15.1" # error with latest scipy 1.16.0. see https://github.com/statsmodels/statsmodels/issues?q=_lazywhere
 
 

diff --git a/tests/test_hurdle_model.py b/tests/test_hurdle_model.py
@@ -10,8 +10,8 @@ def sample_config():
     return {
         "steps": [1, 2, 3],
         "targets": ["target"],
-        "model_clf": "RandomForestClassifier",
-        "model_reg": "RandomForestRegressor",
+        "model_clf": "LGBMClassifier",
+        "model_reg": "LGBMRegressor",
         "parameters": {"clf": {"n_estimators": 100, "max_depth": 10}, "reg": {}},
         "sweep": False,
         "metrics": ["test_metric"]

diff --git a/tests/test_stepshifter_manager.py b/tests/test_stepshifter_manager.py
@@ -6,12 +6,21 @@
 from views_stepshifter.manager.stepshifter_manager import StepshifterManager
 from views_stepshifter.models.stepshifter import StepshifterModel
 from views_pipeline_core.managers.model import ModelPathManager
+from views_pipeline_core.managers.configuration.configuration import ConfigurationManager
+from views_pipeline_core.cli.args import ForecastingModelArgs
+
 
 @pytest.fixture
 def mock_model_path():
-    mock_path = MagicMock()
+    mock_path = MagicMock(spec=ModelPathManager)
     mock_path.model_dir = "/path/to/models/test_model"
     mock_path.target = "model"
+    mock_path.artifacts = Path("/path/to/artifacts")
+    mock_path.get_latest_model_artifact_path.return_value = Path("test_model_202401011_200000")
+    mock_path.logging = MagicMock()
+    mock_path.models = Path("/path/to/models_root") 
+    mock_path.model_name = "test_model"
+    mock_path.data_raw = Path("/path/to/data_raw")
     return mock_path
 
 @pytest.fixture
@@ -81,26 +90,51 @@ def mock_partitioner_dict():
 
 @pytest.fixture
 def stepshifter_manager(mock_model_path, mock_config_meta, mock_config_deployment, mock_config_hyperparameters, mock_config_sweep, mock_partitioner_dict):
+    """
+    Provides a StepshifterManager instance for a non-hurdle model.
+
+    It patches:
+    - _ModelManager__load_config: To inject mock config dictionaries.
+    - validate_config: To prevent validation errors during test setup.
+    """
     with patch.object(StepshifterManager, '_ModelManager__load_config', side_effect=lambda file, func: {
         "config_meta.py": mock_config_meta,
         "config_deployment.py": mock_config_deployment,
         "config_hyperparameters.py": mock_config_hyperparameters,
         "config_sweep.py": mock_config_sweep
-    }.get(file, None)):  
+    }.get(file, None)), \
+         patch("views_pipeline_core.managers.configuration.configuration.validate_config"):
+
         manager = StepshifterManager(mock_model_path, use_prediction_store=False)
-        print(manager._data_loader)
-        return manager
+
+        manager._data_loader = MagicMock()
+        manager._data_loader.partition_dict = mock_partitioner_dict
+
+        yield manager
 
 @pytest.fixture
-def stepshifter_manager_hurdle(mock_model_path, mock_config_meta_hurdle, mock_config_deployment, mock_config_hyperparameters_hurdle, mock_config_sweep):
+def stepshifter_manager_hurdle(mock_model_path, mock_config_meta_hurdle, mock_config_deployment, mock_config_hyperparameters_hurdle, mock_config_sweep, mock_partitioner_dict):
+    """
+    Provides a StepshifterManager instance for a hurdle model.
+
+    It patches:
+    - _ModelManager__load_config: To inject mock config dictionaries.
+    - validate_config: To prevent validation errors during test setup.
+    """
     with patch.object(StepshifterManager, '_ModelManager__load_config', side_effect=lambda file, func: {
         "config_meta.py": mock_config_meta_hurdle,
         "config_deployment.py": mock_config_deployment,
         "config_hyperparameters.py": mock_config_hyperparameters_hurdle,
         "config_sweep.py": mock_config_sweep
-    }.get(file, None)):  
+    }.get(file, None)), \
+         patch("views_pipeline_core.managers.configuration.configuration.validate_config"):
+
         manager = StepshifterManager(mock_model_path, use_prediction_store=False)
-        return manager
+
+        manager._data_loader = MagicMock()
+        manager._data_loader.partition_dict = mock_partitioner_dict
+
+        yield manager
 
 def test_stepshifter_manager_init_hurdle(stepshifter_manager_hurdle):
     """
@@ -147,7 +181,8 @@ def test_split_hurdle_parameters(stepshifter_manager_hurdle):
     """
     Test the _split_hurdle_parameters method to ensure it correctly splits the parameters for HurdleModel.
     """
-    stepshifter_manager_hurdle.config = {
+    stepshifter_manager_hurdle.configs = {
+        "algorithm": "HurdleModel",
         "clf_param1": "value1",
         "clf_param2": "value2",
         "reg_param1": "value3",
@@ -164,17 +199,29 @@ def test_get_model(stepshifter_manager, stepshifter_manager_hurdle, mock_partiti
     with patch("views_stepshifter.manager.stepshifter_manager.HurdleModel") as mock_hurdle_model, \
         patch("views_stepshifter.manager.stepshifter_manager.StepshifterModel") as mock_stepshifter_model:
 
-        stepshifter_manager_hurdle.config = stepshifter_manager_hurdle._update_single_config(MagicMock(run_type="test_run_type"))
+        # --- Test Hurdle ---
+        args = ForecastingModelArgs(run_type="test_run_type", saved=True)
+
+        # We must include the "algorithm" key, otherwise _is_hurdle gets reset to False
+        hurdle_args = vars(args)
+        hurdle_args["algorithm"] = "HurdleModel"
+        stepshifter_manager_hurdle.configs = hurdle_args
+
         stepshifter_manager_hurdle._get_model(mock_partitioner_dict)
-        mock_hurdle_model.assert_called_once_with(stepshifter_manager_hurdle.config, mock_partitioner_dict)
+        mock_hurdle_model.assert_called_once_with(stepshifter_manager_hurdle.configs, mock_partitioner_dict)
         mock_stepshifter_model.assert_not_called()
 
         mock_hurdle_model.reset_mock()
         mock_stepshifter_model.reset_mock()
 
-        stepshifter_manager.config = stepshifter_manager._update_single_config(MagicMock(run_type="test_run_type"))
+        # --- Test Non-Hurdle ---
+        args = ForecastingModelArgs(run_type="test_run_type", saved=True)
+        non_hurdle_args = vars(args)
+        non_hurdle_args["algorithm"] = "LightGBMModel"
+        stepshifter_manager.configs = non_hurdle_args
+
         stepshifter_manager._get_model(mock_partitioner_dict)
-        mock_stepshifter_model.assert_called_once_with(stepshifter_manager.config, mock_partitioner_dict)
+        mock_stepshifter_model.assert_called_once_with(stepshifter_manager.configs, mock_partitioner_dict)
         mock_hurdle_model.assert_not_called()
 
 def test_train_model_artifact(stepshifter_manager, stepshifter_manager_hurdle):
@@ -185,23 +232,38 @@ def test_train_model_artifact(stepshifter_manager, stepshifter_manager_hurdle):
         patch("views_stepshifter.manager.stepshifter_manager.read_dataframe") as mock_read_dataframe, \
         patch("views_stepshifter.manager.stepshifter_manager.StepshifterManager._get_model") as mock_get_model:
 
-        # StepshifterManager with StepshifterModel
-        stepshifter_manager.config = stepshifter_manager._update_single_config(MagicMock(run_type="test_run_type"))
-        stepshifter_manager.config["sweep"] = False
+        # --- Test Non-Hurdle ---
+        args = ForecastingModelArgs(run_type="test_run_type", train=True)
+        non_hurdle_args = vars(args)
+        non_hurdle_args["algorithm"] = "LightGBMModel"
+        non_hurdle_args["sweep"] = False 
+        stepshifter_manager.configs = non_hurdle_args
+
         stepshifter_manager._train_model_artifact()
 
         mock_split_hurdle.assert_not_called()
-        assert stepshifter_manager.config["run_type"] == "test_run_type"
+        assert stepshifter_manager.configs["run_type"] == "test_run_type"
         mock_read_dataframe.assert_called_once()
         mock_get_model.assert_called_once_with(stepshifter_manager._data_loader.partition_dict)
         mock_get_model.return_value.fit.assert_called_once()
         mock_get_model.return_value.save.assert_called_once()
 
-        # StepshifterManager with HurdleModel
-        stepshifter_manager_hurdle.config = stepshifter_manager_hurdle._update_single_config(MagicMock(run_type="test_run_type"))
+        mock_read_dataframe.reset_mock()
+        mock_get_model.reset_mock()
+
+        mock_split_hurdle.reset_mock()
+
+        # --- Test Hurdle ---
+        args = ForecastingModelArgs(run_type="test_run_type", train=True)
+        hurdle_args = vars(args)
+        hurdle_args["algorithm"] = "HurdleModel"
+        stepshifter_manager_hurdle.configs = hurdle_args
+        stepshifter_manager_hurdle._is_hurdle = True
+
         stepshifter_manager_hurdle._train_model_artifact()
 
-        mock_split_hurdle.assert_called_once()
+        mock_read_dataframe.assert_called_once()
+        mock_get_model.assert_called_once_with(stepshifter_manager_hurdle._data_loader.partition_dict)
 
 def test_evaluate_model_artifact(stepshifter_manager):
     """
@@ -215,30 +277,29 @@ def test_evaluate_model_artifact(stepshifter_manager):
         patch.object(StepshifterManager, "_get_standardized_df", return_value="standardized_df") as mock_get_standardized_df:
 
 
-        # the else branch
-        stepshifter_manager._model_path.get_latest_model_artifact_path.return_value = Path("predictions_test_run_202401011200000")
-        stepshifter_manager.config = stepshifter_manager._update_single_config(MagicMock(run_type="test_run_type"))
+        # --- Test default artifact branch (else) ---
+        args = ForecastingModelArgs(run_type="test_run_type", evaluate=True, saved=True)
+        stepshifter_manager.configs = vars(args)
+
         eval_type = "test_eval_type"
         artifact_name = None
         stepshifter_manager._evaluate_model_artifact(eval_type, artifact_name)
 
-        assert stepshifter_manager.config["run_type"] == "test_run_type"
+        assert stepshifter_manager.configs["run_type"] == "test_run_type"
         mock_logger.info.assert_called_once_with(f"Using latest (default) run type (test_run_type) specific artifact")
-        assert stepshifter_manager.config["timestamp"] == "202401011200000"
-        # mock_read_dataframe.assert_called_once()
         mock_get_standardized_df.assert_called_once()
 
         mock_logger.reset_mock()
 
-
-        # the if branch
+        # --- Test specific artifact branch (if) ---
         artifact_name = "non_default_artifact.pkl"
-        stepshifter_manager._model_path.artifacts = Path("predictions_test_run_202401011200000")
+        expected_path = stepshifter_manager._model_path.artifacts / artifact_name
+
         stepshifter_manager._evaluate_model_artifact(eval_type, artifact_name)
 
         mock_logger.info.assert_called_once_with(f"Using (non-default) artifact: {artifact_name}")
-        path_artifact = stepshifter_manager._model_path.artifacts / artifact_name
-        assert path_artifact == Path("predictions_test_run_202401011200000/non_default_artifact.pkl")
+        assert expected_path == Path("/path/to/artifacts/non_default_artifact.pkl")
+
 
 def test_forecast_model_artifact(stepshifter_manager):
     """
@@ -252,35 +313,34 @@ def test_forecast_model_artifact(stepshifter_manager):
         patch.object(StepshifterManager, "_get_standardized_df", return_value="standardized_df") as mock_get_standardized_df:
 
 
-        # the else branch
-        # mock_read_dataframe.return_value = pd.DataFrame({"a": [1, 2, 3]})
-        stepshifter_manager._model_path.get_latest_model_artifact_path.return_value = Path("predictions_test_run_202401011200000")
-        stepshifter_manager.config = stepshifter_manager._update_single_config(MagicMock(run_type="test_run_type"))
+        # --- Test default artifact branch (else) ---
+        args = ForecastingModelArgs(run_type="forecasting", forecast=True, saved=True)
+        stepshifter_manager.configs = vars(args)
+
         artifact_name = None
         stepshifter_manager._forecast_model_artifact(artifact_name)
 
-        assert stepshifter_manager.config["run_type"] == "test_run_type"
-        mock_logger.info.assert_called_once_with(f"Using latest (default) run type (test_run_type) specific artifact")
-        assert stepshifter_manager.config["timestamp"] == "202401011200000"
-        # mock_read_dataframe.assert_called_once()
-        mock_model.predict.assert_called_once_with("test_run_type")
+        assert stepshifter_manager.configs["run_type"] == "forecasting"
+        mock_logger.info.assert_called_once_with(f"Using latest (default) run type (forecasting) specific artifact")
+        mock_model.predict.assert_called_once_with("forecasting")
         mock_get_standardized_df.assert_called_once()
 
         mock_logger.reset_mock()
+        mock_model.predict.reset_mock()
+        mock_get_standardized_df.reset_mock()
 
-
-        # the if branch
+        # --- Test specific artifact branch (if) with FileNotFoundError ---
         mock_builtins_open.side_effect = FileNotFoundError("Test error")
         artifact_name = "non_default_artifact.pkl"
-        stepshifter_manager._model_path.artifacts = Path("predictions_test_run_202401011200000")
+
         with pytest.raises(FileNotFoundError) as exc_info:
             stepshifter_manager._forecast_model_artifact(artifact_name)
-        assert str(exc_info.value) == "Test error"
 
+        assert str(exc_info.value) == "Test error"
 
         mock_logger.info.assert_called_once_with(f"Using (non-default) artifact: {artifact_name}")
-        path_artifact = stepshifter_manager._model_path.artifacts / artifact_name 
-        assert path_artifact == Path("predictions_test_run_202401011200000/non_default_artifact.pkl")
+        path_artifact = stepshifter_manager._model_path.artifacts / artifact_name
+        assert path_artifact == Path("/path/to/artifacts/non_default_artifact.pkl")
         mock_logger.exception.assert_called_once_with(f"Model artifact not found at {path_artifact}")
 
 def test_evaluate_sweep(stepshifter_manager):
@@ -292,12 +352,12 @@ def test_evaluate_sweep(stepshifter_manager):
     with patch("views_stepshifter.manager.stepshifter_manager.read_dataframe") as mock_read_dataframe, \
         patch.object(StepshifterManager, "_get_standardized_df", return_value="standardized_df") as mock_get_standardized_df:
 
-        # mock_read_dataframe.return_value = pd.DataFrame({"a": [1, 2, 3]})
-        stepshifter_manager.config = stepshifter_manager._update_single_config(MagicMock(run_type="test_run_type"))
+        args = ForecastingModelArgs(run_type="test_run_type", evaluate=True, saved=True)
+        stepshifter_manager.configs = vars(args)
+
         eval_type = "test_eval_type"
         stepshifter_manager._evaluate_sweep(eval_type, mock_model)
 
-        assert stepshifter_manager.config["run_type"] == "test_run_type"
-        # mock_read_dataframe.assert_called_once()
+        assert stepshifter_manager.configs["run_type"] == "test_run_type"
         mock_model.predict.assert_called_once_with("test_run_type", eval_type)
-        mock_get_standardized_df.assert_called_once()
+        mock_get_standardized_df.assert_called_once()
diff --git a/views_stepshifter/manager/stepshifter_manager.py b/views_stepshifter/manager/stepshifter_manager.py
@@ -58,7 +58,7 @@ def _split_hurdle_parameters(self):
         """
         clf_dict = {}
         reg_dict = {}
-        config = self.config
+        config = self.configs
 
         for key, value in config.items():
             if key.startswith("clf_"):
@@ -84,12 +84,12 @@ def _get_model(self, partitioner_dict: dict):
             The model object based on the algorithm specified in the config
         """
         if self._is_hurdle:
-            model = HurdleModel(self.config, partitioner_dict)
+            model = HurdleModel(self.configs, partitioner_dict)
         elif self._is_shurf:
-            model = ShurfModel(self.config, partitioner_dict)
+            model = ShurfModel(self.configs, partitioner_dict)
         else:
-            self.config["model_reg"] = self.config["algorithm"]
-            model = StepshifterModel(self.config, partitioner_dict)
+            self.configs = {"model_reg": self.configs["algorithm"]}
-            self.configs = {"model_reg": self.configs["algorithm"]}
+            self.configs["model_reg"] = self.configs["algorithm"]
-            self.configs = {"model_reg": self.configs["algorithm"]}
+            self.configs["model_reg"] = self.configs["algorithm"]
+            model = StepshifterModel(self.configs, partitioner_dict)
 
         return model
 
@@ -103,10 +103,10 @@ def _train_model_artifact(self):
         path_raw = self._model_path.data_raw
         path_artifacts = self._model_path.artifacts
         # W&B does not directly support nested dictionaries for hyperparameters
-        if self.config["sweep"] and (self._is_hurdle or self._is_shurf):
-            self.config = self._split_hurdle_parameters()
+        if self.configs["sweep"] and (self._is_hurdle or self._is_shurf):
+            self.configs = self._split_hurdle_parameters()
 
-        run_type = self.config["run_type"]
+        run_type = self.configs["run_type"]
         df_viewser = read_dataframe(
             path_raw / f"{run_type}_viewser_df{PipelineConfig.dataframe_format}"
         )
@@ -115,7 +115,7 @@ def _train_model_artifact(self):
         stepshift_model = self._get_model(partitioner_dict)
         stepshift_model.fit(df_viewser)
 
-        if not self.config["sweep"]:
+        if not self.configs["sweep"]:
             model_filename = generate_model_file_name(
                 run_type, file_extension=".pkl"
             )
@@ -136,7 +136,7 @@ def _evaluate_model_artifact(
             A list of DataFrames containing the evaluation results
         """
         path_artifacts = self._model_path.artifacts
-        run_type = self.config["run_type"]
+        run_type = self.configs["run_type"]
 
         # if an artifact name is provided through the CLI, use it.
         # Otherwise, get the latest model artifact based on the run type
@@ -153,7 +153,7 @@ def _evaluate_model_artifact(
             )
             path_artifact = self._model_path.get_latest_model_artifact_path(run_type)
 
-        self.config["timestamp"] = path_artifact.stem[-15:]
+        self.configs['timestamp'] = path_artifact.stem[-15:]
 
         try:
             with open(path_artifact, "rb") as f:
@@ -179,7 +179,7 @@ def _forecast_model_artifact(self, artifact_name: str) -> pd.DataFrame:
             The forecasted DataFrame
         """
         path_artifacts = self._model_path.artifacts
-        run_type = self.config["run_type"]
+        run_type = self.configs["run_type"]
 
         # if an artifact name is provided through the CLI, use it.
         # Otherwise, get the latest model artifact based on the run type
@@ -196,7 +196,7 @@ def _forecast_model_artifact(self, artifact_name: str) -> pd.DataFrame:
             )
             path_artifact = self._model_path.get_latest_model_artifact_path(run_type)
 
-        self.config["timestamp"] = path_artifact.stem[-15:]
+        self.configs['timestamp'] = path_artifact.stem[-15:]
 
         try:
             with open(path_artifact, "rb") as f:
@@ -211,7 +211,7 @@ def _forecast_model_artifact(self, artifact_name: str) -> pd.DataFrame:
         return df_prediction
 
     def _evaluate_sweep(self, eval_type: str, model: any) -> List[pd.DataFrame]:
-        run_type = self.config["run_type"]
+        run_type = self.configs["run_type"]
 
         df_predictions = model.predict(run_type, eval_type)
         df_predictions = [