Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,10 @@ views_pipeline_core = ">=2.0.0,<3.0.0"
scikit-learn = "^1.6.0"
pandas = "^1.5.3"
numpy = "^1.25.2"
darts = "^0.30.0"
darts = "^0.38.0"
lightgbm = "4.6.0"
views_forecasts = "^0.5.5"
xgboost = "^3.0.0"
scipy = "1.15.1" # error with latest scipy 1.16.0. see https://github.com/statsmodels/statsmodels/issues?q=_lazywhere


Expand Down
4 changes: 2 additions & 2 deletions tests/test_hurdle_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ def sample_config():
return {
"steps": [1, 2, 3],
"targets": ["target"],
"model_clf": "RandomForestClassifier",
"model_reg": "RandomForestRegressor",
"model_clf": "LGBMClassifier",
"model_reg": "LGBMRegressor",
"parameters": {"clf": {"n_estimators": 100, "max_depth": 10}, "reg": {}},
"sweep": False,
"metrics": ["test_metric"]
Expand Down
160 changes: 110 additions & 50 deletions tests/test_stepshifter_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,21 @@
from views_stepshifter.manager.stepshifter_manager import StepshifterManager
from views_stepshifter.models.stepshifter import StepshifterModel
from views_pipeline_core.managers.model import ModelPathManager
from views_pipeline_core.managers.configuration.configuration import ConfigurationManager
from views_pipeline_core.cli.args import ForecastingModelArgs


@pytest.fixture
def mock_model_path():
mock_path = MagicMock()
mock_path = MagicMock(spec=ModelPathManager)
mock_path.model_dir = "/path/to/models/test_model"
mock_path.target = "model"
mock_path.artifacts = Path("/path/to/artifacts")
mock_path.get_latest_model_artifact_path.return_value = Path("test_model_202401011_200000")
mock_path.logging = MagicMock()
mock_path.models = Path("/path/to/models_root")
mock_path.model_name = "test_model"
mock_path.data_raw = Path("/path/to/data_raw")
return mock_path

@pytest.fixture
Expand Down Expand Up @@ -81,26 +90,51 @@ def mock_partitioner_dict():

@pytest.fixture
def stepshifter_manager(mock_model_path, mock_config_meta, mock_config_deployment, mock_config_hyperparameters, mock_config_sweep, mock_partitioner_dict):
"""
Provides a StepshifterManager instance for a non-hurdle model.

It patches:
- _ModelManager__load_config: To inject mock config dictionaries.
- validate_config: To prevent validation errors during test setup.
"""
with patch.object(StepshifterManager, '_ModelManager__load_config', side_effect=lambda file, func: {
"config_meta.py": mock_config_meta,
"config_deployment.py": mock_config_deployment,
"config_hyperparameters.py": mock_config_hyperparameters,
"config_sweep.py": mock_config_sweep
}.get(file, None)):
}.get(file, None)), \
patch("views_pipeline_core.managers.configuration.configuration.validate_config"):

manager = StepshifterManager(mock_model_path, use_prediction_store=False)
print(manager._data_loader)
return manager

manager._data_loader = MagicMock()
manager._data_loader.partition_dict = mock_partitioner_dict

yield manager

@pytest.fixture
def stepshifter_manager_hurdle(mock_model_path, mock_config_meta_hurdle, mock_config_deployment, mock_config_hyperparameters_hurdle, mock_config_sweep):
def stepshifter_manager_hurdle(mock_model_path, mock_config_meta_hurdle, mock_config_deployment, mock_config_hyperparameters_hurdle, mock_config_sweep, mock_partitioner_dict):
"""
Provides a StepshifterManager instance for a hurdle model.

It patches:
- _ModelManager__load_config: To inject mock config dictionaries.
- validate_config: To prevent validation errors during test setup.
"""
with patch.object(StepshifterManager, '_ModelManager__load_config', side_effect=lambda file, func: {
"config_meta.py": mock_config_meta_hurdle,
"config_deployment.py": mock_config_deployment,
"config_hyperparameters.py": mock_config_hyperparameters_hurdle,
"config_sweep.py": mock_config_sweep
}.get(file, None)):
}.get(file, None)), \
patch("views_pipeline_core.managers.configuration.configuration.validate_config"):

manager = StepshifterManager(mock_model_path, use_prediction_store=False)
return manager

manager._data_loader = MagicMock()
manager._data_loader.partition_dict = mock_partitioner_dict

yield manager

def test_stepshifter_manager_init_hurdle(stepshifter_manager_hurdle):
"""
Expand Down Expand Up @@ -147,7 +181,8 @@ def test_split_hurdle_parameters(stepshifter_manager_hurdle):
"""
Test the _split_hurdle_parameters method to ensure it correctly splits the parameters for HurdleModel.
"""
stepshifter_manager_hurdle.config = {
stepshifter_manager_hurdle.configs = {
"algorithm": "HurdleModel",
"clf_param1": "value1",
"clf_param2": "value2",
"reg_param1": "value3",
Expand All @@ -164,17 +199,29 @@ def test_get_model(stepshifter_manager, stepshifter_manager_hurdle, mock_partiti
with patch("views_stepshifter.manager.stepshifter_manager.HurdleModel") as mock_hurdle_model, \
patch("views_stepshifter.manager.stepshifter_manager.StepshifterModel") as mock_stepshifter_model:

stepshifter_manager_hurdle.config = stepshifter_manager_hurdle._update_single_config(MagicMock(run_type="test_run_type"))
# --- Test Hurdle ---
args = ForecastingModelArgs(run_type="test_run_type", saved=True)

# We must include the "algorithm" key, otherwise _is_hurdle gets reset to False
hurdle_args = vars(args)
hurdle_args["algorithm"] = "HurdleModel"
stepshifter_manager_hurdle.configs = hurdle_args

stepshifter_manager_hurdle._get_model(mock_partitioner_dict)
mock_hurdle_model.assert_called_once_with(stepshifter_manager_hurdle.config, mock_partitioner_dict)
mock_hurdle_model.assert_called_once_with(stepshifter_manager_hurdle.configs, mock_partitioner_dict)
mock_stepshifter_model.assert_not_called()

mock_hurdle_model.reset_mock()
mock_stepshifter_model.reset_mock()

stepshifter_manager.config = stepshifter_manager._update_single_config(MagicMock(run_type="test_run_type"))
# --- Test Non-Hurdle ---
args = ForecastingModelArgs(run_type="test_run_type", saved=True)
non_hurdle_args = vars(args)
non_hurdle_args["algorithm"] = "LightGBMModel"
stepshifter_manager.configs = non_hurdle_args

stepshifter_manager._get_model(mock_partitioner_dict)
mock_stepshifter_model.assert_called_once_with(stepshifter_manager.config, mock_partitioner_dict)
mock_stepshifter_model.assert_called_once_with(stepshifter_manager.configs, mock_partitioner_dict)
mock_hurdle_model.assert_not_called()

def test_train_model_artifact(stepshifter_manager, stepshifter_manager_hurdle):
Expand All @@ -185,23 +232,38 @@ def test_train_model_artifact(stepshifter_manager, stepshifter_manager_hurdle):
patch("views_stepshifter.manager.stepshifter_manager.read_dataframe") as mock_read_dataframe, \
patch("views_stepshifter.manager.stepshifter_manager.StepshifterManager._get_model") as mock_get_model:

# StepshifterManager with StepshifterModel
stepshifter_manager.config = stepshifter_manager._update_single_config(MagicMock(run_type="test_run_type"))
stepshifter_manager.config["sweep"] = False
# --- Test Non-Hurdle ---
args = ForecastingModelArgs(run_type="test_run_type", train=True)
non_hurdle_args = vars(args)
non_hurdle_args["algorithm"] = "LightGBMModel"
non_hurdle_args["sweep"] = False
stepshifter_manager.configs = non_hurdle_args

stepshifter_manager._train_model_artifact()

mock_split_hurdle.assert_not_called()
assert stepshifter_manager.config["run_type"] == "test_run_type"
assert stepshifter_manager.configs["run_type"] == "test_run_type"
mock_read_dataframe.assert_called_once()
mock_get_model.assert_called_once_with(stepshifter_manager._data_loader.partition_dict)
mock_get_model.return_value.fit.assert_called_once()
mock_get_model.return_value.save.assert_called_once()

# StepshifterManager with HurdleModel
stepshifter_manager_hurdle.config = stepshifter_manager_hurdle._update_single_config(MagicMock(run_type="test_run_type"))
mock_read_dataframe.reset_mock()
mock_get_model.reset_mock()

mock_split_hurdle.reset_mock()

# --- Test Hurdle ---
args = ForecastingModelArgs(run_type="test_run_type", train=True)
hurdle_args = vars(args)
hurdle_args["algorithm"] = "HurdleModel"
stepshifter_manager_hurdle.configs = hurdle_args
stepshifter_manager_hurdle._is_hurdle = True

stepshifter_manager_hurdle._train_model_artifact()

mock_split_hurdle.assert_called_once()
mock_read_dataframe.assert_called_once()
mock_get_model.assert_called_once_with(stepshifter_manager_hurdle._data_loader.partition_dict)

def test_evaluate_model_artifact(stepshifter_manager):
"""
Expand All @@ -215,30 +277,29 @@ def test_evaluate_model_artifact(stepshifter_manager):
patch.object(StepshifterManager, "_get_standardized_df", return_value="standardized_df") as mock_get_standardized_df:


# the else branch
stepshifter_manager._model_path.get_latest_model_artifact_path.return_value = Path("predictions_test_run_202401011200000")
stepshifter_manager.config = stepshifter_manager._update_single_config(MagicMock(run_type="test_run_type"))
# --- Test default artifact branch (else) ---
args = ForecastingModelArgs(run_type="test_run_type", evaluate=True, saved=True)
stepshifter_manager.configs = vars(args)

eval_type = "test_eval_type"
artifact_name = None
stepshifter_manager._evaluate_model_artifact(eval_type, artifact_name)

assert stepshifter_manager.config["run_type"] == "test_run_type"
assert stepshifter_manager.configs["run_type"] == "test_run_type"
mock_logger.info.assert_called_once_with(f"Using latest (default) run type (test_run_type) specific artifact")
assert stepshifter_manager.config["timestamp"] == "202401011200000"
# mock_read_dataframe.assert_called_once()
mock_get_standardized_df.assert_called_once()

mock_logger.reset_mock()


# the if branch
# --- Test specific artifact branch (if) ---
artifact_name = "non_default_artifact.pkl"
stepshifter_manager._model_path.artifacts = Path("predictions_test_run_202401011200000")
expected_path = stepshifter_manager._model_path.artifacts / artifact_name

stepshifter_manager._evaluate_model_artifact(eval_type, artifact_name)

mock_logger.info.assert_called_once_with(f"Using (non-default) artifact: {artifact_name}")
path_artifact = stepshifter_manager._model_path.artifacts / artifact_name
assert path_artifact == Path("predictions_test_run_202401011200000/non_default_artifact.pkl")
assert expected_path == Path("/path/to/artifacts/non_default_artifact.pkl")


def test_forecast_model_artifact(stepshifter_manager):
"""
Expand All @@ -252,35 +313,34 @@ def test_forecast_model_artifact(stepshifter_manager):
patch.object(StepshifterManager, "_get_standardized_df", return_value="standardized_df") as mock_get_standardized_df:


# the else branch
# mock_read_dataframe.return_value = pd.DataFrame({"a": [1, 2, 3]})
stepshifter_manager._model_path.get_latest_model_artifact_path.return_value = Path("predictions_test_run_202401011200000")
stepshifter_manager.config = stepshifter_manager._update_single_config(MagicMock(run_type="test_run_type"))
# --- Test default artifact branch (else) ---
args = ForecastingModelArgs(run_type="forecasting", forecast=True, saved=True)
stepshifter_manager.configs = vars(args)

artifact_name = None
stepshifter_manager._forecast_model_artifact(artifact_name)

assert stepshifter_manager.config["run_type"] == "test_run_type"
mock_logger.info.assert_called_once_with(f"Using latest (default) run type (test_run_type) specific artifact")
assert stepshifter_manager.config["timestamp"] == "202401011200000"
# mock_read_dataframe.assert_called_once()
mock_model.predict.assert_called_once_with("test_run_type")
assert stepshifter_manager.configs["run_type"] == "forecasting"
mock_logger.info.assert_called_once_with(f"Using latest (default) run type (forecasting) specific artifact")
mock_model.predict.assert_called_once_with("forecasting")
mock_get_standardized_df.assert_called_once()

mock_logger.reset_mock()
mock_model.predict.reset_mock()
mock_get_standardized_df.reset_mock()


# the if branch
# --- Test specific artifact branch (if) with FileNotFoundError ---
mock_builtins_open.side_effect = FileNotFoundError("Test error")
artifact_name = "non_default_artifact.pkl"
stepshifter_manager._model_path.artifacts = Path("predictions_test_run_202401011200000")

with pytest.raises(FileNotFoundError) as exc_info:
stepshifter_manager._forecast_model_artifact(artifact_name)
assert str(exc_info.value) == "Test error"

assert str(exc_info.value) == "Test error"

mock_logger.info.assert_called_once_with(f"Using (non-default) artifact: {artifact_name}")
path_artifact = stepshifter_manager._model_path.artifacts / artifact_name
assert path_artifact == Path("predictions_test_run_202401011200000/non_default_artifact.pkl")
path_artifact = stepshifter_manager._model_path.artifacts / artifact_name
assert path_artifact == Path("/path/to/artifacts/non_default_artifact.pkl")
mock_logger.exception.assert_called_once_with(f"Model artifact not found at {path_artifact}")

def test_evaluate_sweep(stepshifter_manager):
Expand All @@ -292,12 +352,12 @@ def test_evaluate_sweep(stepshifter_manager):
with patch("views_stepshifter.manager.stepshifter_manager.read_dataframe") as mock_read_dataframe, \
patch.object(StepshifterManager, "_get_standardized_df", return_value="standardized_df") as mock_get_standardized_df:

# mock_read_dataframe.return_value = pd.DataFrame({"a": [1, 2, 3]})
stepshifter_manager.config = stepshifter_manager._update_single_config(MagicMock(run_type="test_run_type"))
args = ForecastingModelArgs(run_type="test_run_type", evaluate=True, saved=True)
stepshifter_manager.configs = vars(args)

eval_type = "test_eval_type"
stepshifter_manager._evaluate_sweep(eval_type, mock_model)

assert stepshifter_manager.config["run_type"] == "test_run_type"
# mock_read_dataframe.assert_called_once()
assert stepshifter_manager.configs["run_type"] == "test_run_type"
mock_model.predict.assert_called_once_with("test_run_type", eval_type)
mock_get_standardized_df.assert_called_once()
mock_get_standardized_df.assert_called_once()
28 changes: 14 additions & 14 deletions views_stepshifter/manager/stepshifter_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def _split_hurdle_parameters(self):
"""
clf_dict = {}
reg_dict = {}
config = self.config
config = self.configs

for key, value in config.items():
if key.startswith("clf_"):
Expand All @@ -84,12 +84,12 @@ def _get_model(self, partitioner_dict: dict):
The model object based on the algorithm specified in the config
"""
if self._is_hurdle:
model = HurdleModel(self.config, partitioner_dict)
model = HurdleModel(self.configs, partitioner_dict)
elif self._is_shurf:
model = ShurfModel(self.config, partitioner_dict)
model = ShurfModel(self.configs, partitioner_dict)
else:
self.config["model_reg"] = self.config["algorithm"]
model = StepshifterModel(self.config, partitioner_dict)
self.configs = {"model_reg": self.configs["algorithm"]}
Copy link

Copilot AI Nov 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This line overwrites the entire configs dictionary with a single key-value pair, discarding all other configuration. It should update the dictionary instead: self.configs['model_reg'] = self.configs['algorithm']

Suggested change
self.configs = {"model_reg": self.configs["algorithm"]}
self.configs["model_reg"] = self.configs["algorithm"]

Copilot uses AI. Check for mistakes.
model = StepshifterModel(self.configs, partitioner_dict)

return model

Expand All @@ -103,10 +103,10 @@ def _train_model_artifact(self):
path_raw = self._model_path.data_raw
path_artifacts = self._model_path.artifacts
# W&B does not directly support nested dictionaries for hyperparameters
if self.config["sweep"] and (self._is_hurdle or self._is_shurf):
self.config = self._split_hurdle_parameters()
if self.configs["sweep"] and (self._is_hurdle or self._is_shurf):
self.configs = self._split_hurdle_parameters()

run_type = self.config["run_type"]
run_type = self.configs["run_type"]
df_viewser = read_dataframe(
path_raw / f"{run_type}_viewser_df{PipelineConfig.dataframe_format}"
)
Expand All @@ -115,7 +115,7 @@ def _train_model_artifact(self):
stepshift_model = self._get_model(partitioner_dict)
stepshift_model.fit(df_viewser)

if not self.config["sweep"]:
if not self.configs["sweep"]:
model_filename = generate_model_file_name(
run_type, file_extension=".pkl"
)
Expand All @@ -136,7 +136,7 @@ def _evaluate_model_artifact(
A list of DataFrames containing the evaluation results
"""
path_artifacts = self._model_path.artifacts
run_type = self.config["run_type"]
run_type = self.configs["run_type"]

# if an artifact name is provided through the CLI, use it.
# Otherwise, get the latest model artifact based on the run type
Expand All @@ -153,7 +153,7 @@ def _evaluate_model_artifact(
)
path_artifact = self._model_path.get_latest_model_artifact_path(run_type)

self.config["timestamp"] = path_artifact.stem[-15:]
self.configs['timestamp'] = path_artifact.stem[-15:]

try:
with open(path_artifact, "rb") as f:
Expand All @@ -179,7 +179,7 @@ def _forecast_model_artifact(self, artifact_name: str) -> pd.DataFrame:
The forecasted DataFrame
"""
path_artifacts = self._model_path.artifacts
run_type = self.config["run_type"]
run_type = self.configs["run_type"]

# if an artifact name is provided through the CLI, use it.
# Otherwise, get the latest model artifact based on the run type
Expand All @@ -196,7 +196,7 @@ def _forecast_model_artifact(self, artifact_name: str) -> pd.DataFrame:
)
path_artifact = self._model_path.get_latest_model_artifact_path(run_type)

self.config["timestamp"] = path_artifact.stem[-15:]
self.configs['timestamp'] = path_artifact.stem[-15:]

try:
with open(path_artifact, "rb") as f:
Expand All @@ -211,7 +211,7 @@ def _forecast_model_artifact(self, artifact_name: str) -> pd.DataFrame:
return df_prediction

def _evaluate_sweep(self, eval_type: str, model: any) -> List[pd.DataFrame]:
run_type = self.config["run_type"]
run_type = self.configs["run_type"]

df_predictions = model.predict(run_type, eval_type)
df_predictions = [
Expand Down
Loading
Loading