From 7a38ab75eaa272ecdd8943b8b6cdcfe5f2ccc885 Mon Sep 17 00:00:00 2001 From: xiaolongsun <95378566+xiaolong0728@users.noreply.github.com> Date: Mon, 17 Mar 2025 11:33:26 +0100 Subject: [PATCH 01/24] support standardizing a list of predictions --- .../manager/stepshifter_manager.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/views_stepshifter/manager/stepshifter_manager.py b/views_stepshifter/manager/stepshifter_manager.py index 260f59a..27f7548 100644 --- a/views_stepshifter/manager/stepshifter_manager.py +++ b/views_stepshifter/manager/stepshifter_manager.py @@ -36,9 +36,15 @@ def _get_standardized_df(df: pd.DataFrame) -> pd.DataFrame: The standardized DataFrame """ - # post-process: replace inf and -inf with 0 - df = df.replace([np.inf, -np.inf], 0) - df = df.mask(df < 0, 0) + def standardize_value(value): + # 1) Replace inf and -inf with 0; + # 2) Replace negative values with 0 + if isinstance(value, list): + return [0 if (v == np.inf or v == -np.inf or v < 0) else v for v in value] + else: + return 0 if (value == np.inf or value == -np.inf or value < 0) else value + + df = df.applymap(standardize_value) return df def _split_hurdle_parameters(self): @@ -157,10 +163,9 @@ def _evaluate_model_artifact( raise df_predictions = stepshift_model.predict(run_type, eval_type) - if not self._is_shurf: - df_predictions = [ - StepshifterManager._get_standardized_df(df) for df in df_predictions - ] + df_predictions = [ + StepshifterManager._get_standardized_df(df) for df in df_predictions + ] return df_predictions def _forecast_model_artifact(self, artifact_name: str) -> pd.DataFrame: From 5d0b72279a9426e61ab230a5c2d5ffbeec8e4da5 Mon Sep 17 00:00:00 2001 From: xiaolongsun <95378566+xiaolong0728@users.noreply.github.com> Date: Mon, 17 Mar 2025 11:33:34 +0100 Subject: [PATCH 02/24] add test --- tests/test_stepshifter_manager.py | 36 +++++++++++++++---------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/tests/test_stepshifter_manager.py b/tests/test_stepshifter_manager.py index 4cd430c..fb56a2d 100644 --- a/tests/test_stepshifter_manager.py +++ b/tests/test_stepshifter_manager.py @@ -118,18 +118,30 @@ def test_get_standardized_df(): """ Test the _get_standardized_df method to ensure it correctly standardizes the DataFrame. """ - df = pd.DataFrame({ + df1 = pd.DataFrame({ "a": [1.0, -1.0, np.inf, -np.inf, 3.0], "b": [4.0, 5.0, -6.0, 7.0, -8.0] }) - expected_df = pd.DataFrame({ + expected_df1 = pd.DataFrame({ "a": [1.0, 0.0, 0.0, 0.0, 3.0], "b": [4.0, 5.0, 0.0, 7.0, 0.0] }) - result_df = StepshifterManager._get_standardized_df(df) - pd.testing.assert_frame_equal(result_df, expected_df) - - + df2 = pd.DataFrame({ + "a": [[1.0, -1.0, np.inf], + [-np.inf, 3.0, 4.0]], + "b": [[4.0, 5.0, -6.0], + [7.0, -8.0, 9.0]], + }) + expected_df2 = pd.DataFrame({ + "a": [[1.0, 0.0, 0.0], + [0.0, 3.0, 4.0]], + "b": [[4.0, 5.0, 0.0], + [7.0, 0.0, 9.0]], + }) + result_df1 = StepshifterManager._get_standardized_df(df1) + result_df2 = StepshifterManager._get_standardized_df(df2) + pd.testing.assert_frame_equal(result_df1, expected_df1) + pd.testing.assert_frame_equal(result_df2, expected_df2) def test_split_hurdle_parameters(stepshifter_manager_hurdle): """ @@ -164,9 +176,6 @@ def test_get_model(stepshifter_manager, stepshifter_manager_hurdle, mock_partiti stepshifter_manager._get_model(mock_partitioner_dict) mock_stepshifter_model.assert_called_once_with(stepshifter_manager.config, mock_partitioner_dict) mock_hurdle_model.assert_not_called() - - - def test_train_model_artifact(stepshifter_manager, stepshifter_manager_hurdle): """ @@ -194,8 +203,6 @@ def test_train_model_artifact(stepshifter_manager, stepshifter_manager_hurdle): mock_split_hurdle.assert_called_once() - - def test_evaluate_model_artifact(stepshifter_manager): """ Test the _evaluate_model_artifact method to ensure it correctly evaluates the model artifact. @@ -233,8 +240,6 @@ def test_evaluate_model_artifact(stepshifter_manager): path_artifact = stepshifter_manager._model_path.artifacts / artifact_name assert path_artifact == Path("predictions_test_run_202401011200000/non_default_artifact.pkl") - - def test_forecast_model_artifact(stepshifter_manager): """ Test the _forecast_model_artifact method to ensure it correctly forecasts the model artifact. @@ -278,7 +283,6 @@ def test_forecast_model_artifact(stepshifter_manager): assert path_artifact == Path("predictions_test_run_202401011200000/non_default_artifact.pkl") mock_logger.exception.assert_called_once_with(f"Model artifact not found at {path_artifact}") - def test_evaluate_sweep(stepshifter_manager): """ Test the _evaluate_sweep method. @@ -297,7 +301,3 @@ def test_evaluate_sweep(stepshifter_manager): # mock_read_dataframe.assert_called_once() mock_model.predict.assert_called_once_with("test_run_type", eval_type) mock_get_standardized_df.assert_called_once() - - - - From 00c4562f4e116d8c4fd5f4316c92ccae5d0d1cce Mon Sep 17 00:00:00 2001 From: Dylan <52908667+smellycloud@users.noreply.github.com> Date: Wed, 19 Mar 2025 14:11:11 +0100 Subject: [PATCH 03/24] add shurf_model.py --- .../manager/stepshifter_manager.py | 6 +- views_stepshifter/models/shurf_model.py | 283 ++++++++++++++++++ 2 files changed, 286 insertions(+), 3 deletions(-) create mode 100644 views_stepshifter/models/shurf_model.py diff --git a/views_stepshifter/manager/stepshifter_manager.py b/views_stepshifter/manager/stepshifter_manager.py index 27f7548..310bd56 100644 --- a/views_stepshifter/manager/stepshifter_manager.py +++ b/views_stepshifter/manager/stepshifter_manager.py @@ -8,7 +8,7 @@ import pandas as pd import numpy as np from typing import Union, Optional, List, Dict -# from views_stepshifter.models.shurf import StepShiftedHurdleUncertainRF +from views_stepshifter.models.shurf_model import StepShiftedHurdleUncertainRF logger = logging.getLogger(__name__) @@ -84,8 +84,8 @@ def _get_model(self, partitioner_dict: dict): """ if self._is_hurdle: model = HurdleModel(self.config, partitioner_dict) - # elif self._is_shurf: - # model = StepShiftedHurdleUncertainRF(self.config, partitioner_dict) + elif self._is_shurf: + model = StepShiftedHurdleUncertainRF(self.config, partitioner_dict) else: self.config["model_reg"] = self.config["algorithm"] model = StepshifterModel(self.config, partitioner_dict) diff --git a/views_stepshifter/models/shurf_model.py b/views_stepshifter/models/shurf_model.py new file mode 100644 index 0000000..94e1dd4 --- /dev/null +++ b/views_stepshifter/models/shurf_model.py @@ -0,0 +1,283 @@ +from views_pipeline_core.managers.model import ModelManager +from views_stepshifter.models.stepshifter import StepshifterModel +from views_stepshifter.models.hurdle_model import HurdleModel +from views_stepshifter.models.validation import views_validate +from sklearn.utils.validation import check_is_fitted +import pandas as pd +from typing import List, Dict +import numpy as np +import logging +# from darts.models import RandomForest +from tqdm import tqdm + +logger = logging.getLogger(__name__) + +class StepShiftedHurdleUncertainRF(HurdleModel): + """ + Hurdle model for time series forecasting. The model consists of two stages: + 1. Binary stage: Predicts whether the target variable is 0 or > 0. + 2. Positive stage: Predicts the value of the target variable when it is > 0. + + Note: + This algorithm uses a two-step approach. + + **Step 1: Classification Stage** + In the first step, a regression model is used with a binary target (0 or 1), + indicating the absence or presence of violence. This stage functions similarly + to a linear probability model, estimating the likelihood of a positive outcome. + Since the model is a regression rather than a classification model, + these estimates are not strictly bounded between 0 and 1, + but this is acceptable for the purpose of this step. + + To determine whether an observation is classified as "positive," we apply a threshold. + The default threshold is 1, meaning that predictions above this value + are considered positive outcomes. This threshold can be adjusted as + a tunable hyperparameter to better suit specific requirements. + + **Step 2: Regression Stage** + In the second step, we use a regression model to predict a continuous or count value + (e.g., the expected number of conflict fatalities) for the selected time series. + We include the entire time series for countries or PRIO grids where the + classification stage yielded at least one "positive" prediction, + rather than limiting the regression to just the predicted positive values. + """ + + def __init__(self, config: Dict, partitioner_dict: Dict[str, List[int]], threshold: float = 0.1): + super().__init__(config, partitioner_dict, threshold) + print(config) +# self._clf = RandomForest +# self._reg = RandomForest + self._clf_params = self._get_parameters(config)['clf'] + self._reg_params = self._get_parameters(config)['reg'] + self._threshold = threshold + + self._submodel_list = [] + + self._partitioner_dict = partitioner_dict + self._submodels_to_train = config['submodels_to_train'] + # self._n_estimators = config['parameters']['n_estimators'] + self.log_target = config['log_target'] + self._max_features = config['max_features'] + self._max_depth = config['max_depth'] + self._max_samples = config['max_samples'] + self._pred_samples = config['pred_samples'] + self._draw_dist = config['draw_dist'] + self._draw_sigma = config['draw_sigma'] + self._geo_unit_samples = config['geo_unit_samples'] + self._n_jobs = config['n_jobs'] + + @views_validate + def fit(self, df: pd.DataFrame): + """ + Generate predictions using the trained submodels. + This method performs the following steps: + 1. Prepares the data for classification and regression stages. + 2. Iterates over each submodel to generate predictions: + - Predicts probabilities using the classification model. + - Predicts target values using the regression model. + - Handles infinite values in predictions. + 3. Draws samples from the distributions: + - For each prediction sample, combines classification and regression predictions. + - Applies binomial, Poisson, or lognormal distributions to generate final predictions. + 4. Aggregates the predictions from all submodels into a final DataFrame. + Returns: + pd.DataFrame: A DataFrame containing the final set of predictions with indices set to 'draw'. + """ + df = self._process_data(df) + self._prepare_time_series(df) + + target_binary = [s.map(lambda x: (x > self._threshold).astype(float)) for s in self._target_train] + + # Positive outcome (for cases where target > threshold) + target_pos, past_cov_pos = zip(*[(t, p) for t, p in zip(self._target_train, self._past_cov) + if (t.values() > self._threshold).any()]) + + for i in tqdm(range(self._submodels_to_train), desc="Training submodel"): + # logger.info(f"Training submodel {i+1}/{self._submodels_to_train}") + + for step in tqdm(self._steps, desc=f"Steps for submodel {i+1}"): + # logger.info(f"Training step {step}") + # Fit binary-like stage using a regression model, but the target is binary (0 or 1) + binary_model = self._clf(lags_past_covariates=[-step], **self._clf_params) + binary_model.fit(target_binary, past_covariates=self._past_cov) + + # Fit positive stage using the regression model + positive_model = self._reg(lags_past_covariates=[-step], **self._reg_params) + positive_model.fit(target_pos, past_covariates=past_cov_pos) + self._models[step] = (binary_model, positive_model) + self._submodel_list.append(self._models) + logger.info(f"Submodel {i+1}/{self._submodels_to_train} trained successfully") + self.is_fitted_ = True + + + def predict_sequence(self,run_type, eval_type, sequence_number) -> pd.DataFrame: + """ + Predicts n draws of outcomes based on the provided DataFrame . + + Parameters: + ----------- + self: StepShiftedHurdleUncertainRF + The model object. + + run_type : str + The type of run to perform. Currently it is unlikely to affect the behaviour of the function. + + eval_type : str + The type of evaluation to perform. Currently it is unlikely to affect the behaviour of the function. + + sequence_number : int + The sequence number to predict outcomes for. + + + Returns: + -------- + pd.DataFrame + The final predictions as a DataFrame. + """ + + sample_number = 0 + final_preds = [] # This will hold predictions for all sub-models and all samples within sub-models + # Loop over submodels + submodel_number = 0 + for submodel in tqdm(self._submodel_list, desc=f"Predicting submodel: {run_type}", leave=True): +# print(submodel) + pred_by_step_binary = [self._predict_by_step(submodel[step][0], step, sequence_number) + for step in self._steps] + pred_by_step_positive = [self._predict_by_step(submodel[step][1], step, sequence_number) + for step in self._steps] + + pred_concat_binary = pd.concat(pred_by_step_binary, axis=0) + + pred_concat_binary.rename(columns={'step_combined':'Classification'}, inplace=True) + pred_concat_positive = pd.concat(pred_by_step_positive, axis=0) + pred_concat_positive.rename(columns={'step_combined':'Regression'}, inplace=True) + pred_concat = pd.concat([pred_concat_binary, pred_concat_positive], axis=1) + pred_concat['submodel'] = submodel_number +# print(pred_concat.tail(12)) + + # Append the combined predictions to the final predictions list + final_preds.append(pred_concat) + submodel_number += 1 +# submodel_preds[i] = final_preds + # Generate a DataFrame from the final predictions list for this sequence number + final_preds_aslists = pd.concat(final_preds, axis=0) + # Drawing samples from the classification model + # Ensuring that the classification probabilities are between 0 and 1: + final_preds_aslists['Classification'] = final_preds_aslists['Classification'].apply(lambda x: np.clip(x, 0, 1)) + final_preds_aslists['ClassificationSample'] = final_preds_aslists['Classification'].apply(lambda x: np.random.binomial(1, x, self._pred_samples)) + + # Drawing samples from the regression model + + if self.log_target == True: + if self._draw_dist == 'Poisson': # Note: the Poisson distribution assumes a non-log-transformed target, so not defined here + print('Poisson not implemented') + final_preds_aslists['RegressionSample'] = final_preds_aslists['Regression'] + if self._draw_dist == 'Lognormal': + # Draw from normal distribution for log-transformed outcomes, then exponentiate, then round to integer + #pred_concat['RegressionSample'] = pred_concat['Regression'].apply(lambda x: np.random.normal(x, self._draw_sigma, self._pred_samples)) + final_preds_aslists['RegressionSample'] = final_preds_aslists['Regression'].apply(lambda x: np.abs(np.rint(np.expm1(np.random.normal(x, self._draw_sigma, self._pred_samples))))) + if self.log_target == False: + if self._draw_dist == 'Poisson': # Note: this assumes a non-log-transformed target + print('Poisson not implemented') + final_preds_aslists['RegressionSample'] = final_preds_aslists['Regression'] + if self._draw_dist == 'Lognormal': + print('Draws for non-log-transformed target: first implementation' ) + final_preds_aslists['RegressionSample'] = final_preds_aslists['Regression'].apply(lambda x: np.abs(np.rint(np.expm1(np.random.normal(np.log1p(x), self._draw_sigma, self._pred_samples))))) + + if self._draw_dist == '': + final_preds_aslists['RegressionSample'] = final_preds_aslists['Regression'] + print('final_preds_aslists contains the samples in list form. Shape:', final_preds_aslists.shape, '. Looks like this:') + print(final_preds_aslists.tail(20)) + # 'Explode' the samples to get one row per sample + final_preds_full = final_preds_aslists.explode(['ClassificationSample','RegressionSample']) + final_preds_full['Prediction'] = final_preds_full['ClassificationSample'] * final_preds_full['RegressionSample'] + # Ensuring that the final predictions are positive: + final_preds_full['Prediction'] = final_preds_full['Prediction'].apply(lambda x: np.clip(x, 0, None)) + # Column for the main prediction: + pred_col_name = 'pred_' + self.depvar + final_preds_full[pred_col_name] = final_preds_full['Prediction'] + # Log-transforming the final predictions if the target is log-transformed, exponentiating if not, and adding a column with the log-transformed predictions + if self.log_target == True: + final_preds_full['LogPrediction'] = final_preds_full['Prediction'] + final_preds_full['Prediction'] = np.expm1(final_preds_full['Prediction']) + if self.log_target == False: + final_preds_full['LogPrediction'] = np.log1p(final_preds_full['Prediction']) + final_preds_full.drop(columns=['Classification','Regression','ClassificationSample','RegressionSample','submodel','Prediction','LogPrediction'],inplace=True) + final_preds = pd.DataFrame(final_preds_full.groupby(['month_id', 'country_id'])[pred_col_name].apply(list)) + print('final_preds is the end product of the predict sequence function. Shape:', final_preds_full.shape) + print(final_preds.tail(20)) + return final_preds + + @views_validate + def predict(self, df: pd.DataFrame, run_type: str, eval_type: str = "standard") -> pd.DataFrame: + """ + Predicts outcomes based on the provided DataFrame and run type. + + Parameters: + ----------- + df : pd.DataFrame + The input data for making predictions. + run_type : str + The type of run to perform. If 'forecasting', a single prediction is made. + Otherwise, multiple predictions are made based on the evaluation sequence number. + eval_type : str, optional + The type of evaluation to perform. Default is "standard". + + Returns: + -------- + pd.DataFrame + The final predictions as a DataFrame. + """ + + # Process the input data to ensure it is in the correct format + df = self._process_data(df) + # Check if the model has been fitted before making predictions + check_is_fitted(self, 'is_fitted_') + print('Dependent variable:', self.depvar, 'Parameters:', 'Log target:', self.log_target, ' submodels:', self._submodels_to_train, ', samples within submodels: ', self._pred_samples, ', draw distribution: ', self._draw_dist, ', sigma: ', self._draw_sigma) + + # If the run type is not 'forecasting', perform multiple predictions + if run_type != 'forecasting': + preds = [] # D: List to collect predictions for each sequence + # If the evaluation type is "standard", iterate over the evaluation sequence number + submodel_preds = {} # Not sure this belongs here + if eval_type == "standard": + # Loop over the evaluation sequence number + for sequence_number in tqdm(range(ModelManager._resolve_evaluation_sequence_number(eval_type)), desc=f"Sequence", leave=True): +# print('sequence_number', sequence_number) + temp_preds_full = self.predict_sequence(run_type, eval_type, sequence_number) + + # Output the temporary final predictions with samples as parquet + temp_preds_full.to_parquet(f'data/generated/final_pred_full_{run_type}_{eval_type}_{sequence_number}.parquet') + # Convert to views_pipeline standard format + + # Aggregate the predictions into point predictions +# final_preds.pop('LogPrediction') +# agg_preds = np.log1p(temp_preds_full.groupby(['month_id', 'country_id']).mean()) +# final_preds.rename(columns={'Prediction':'pred_ged_sb'}, inplace=True) +# agg_preds.pop('submodel') + preds.append(temp_preds_full) # D: Append the final predictions for this sequence number + # Output the final predictions as parquet +# agg_preds.to_parquet(f'data/generated/final_preds_{run_type}_{eval_type}_{sequence_number}_agg.parquet') + return preds + else: + # If the run type is 'forecasting', perform a single prediction + sequence_number = 0 + temp_preds_full = self.predict_sequence(run_type, eval_type, sequence_number) + print('temp_preds_full') + +# print('final_preds_aslists') +# print(final_preds_aslists.describe()) + + # Output the final predictions with samples as parquet + temp_preds_full.to_parquet(f'data/generated/final_pred_full_{run_type}_{eval_type}_{sequence_number}.parquet') + # Aggregate the predictions into point predictions +# agg_preds = temp_preds_full.groupby(['month_id', 'country_id']).mean() +# agg_preds.pop('submodel') + # Output the final predictions as parquet +# agg_preds['ged_sb_dep'] = agg_preds['Prediction'] +# agg_preds.to_parquet(f'data/generated/final_preds_{run_type}_{eval_type}_{sequence_number}_agg.parquet') + + # Return the final predictions as a DataFrame + print('Returning final predictions:') + print(temp_preds_full.tail(20)) + return temp_preds_full \ No newline at end of file From f395b4b91f7b3b61a257ea02110ead5229c25db8 Mon Sep 17 00:00:00 2001 From: xiaolongsun <95378566+xiaolong0728@users.noreply.github.com> Date: Wed, 19 Mar 2025 14:19:38 +0100 Subject: [PATCH 04/24] remove commented codes --- views_stepshifter/models/hurdle_model.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/views_stepshifter/models/hurdle_model.py b/views_stepshifter/models/hurdle_model.py index 639adf1..c982a58 100644 --- a/views_stepshifter/models/hurdle_model.py +++ b/views_stepshifter/models/hurdle_model.py @@ -7,8 +7,6 @@ import logging import tqdm from concurrent.futures import ProcessPoolExecutor, as_completed -# import multiprocessing -# multiprocessing.set_start_method('spawn') from functools import partial logger = logging.getLogger(__name__) From 96a163ccab8cb9c0ec5d811c408b54a6d6ff83c8 Mon Sep 17 00:00:00 2001 From: xiaolongsun <95378566+xiaolong0728@users.noreply.github.com> Date: Wed, 19 Mar 2025 14:20:01 +0100 Subject: [PATCH 05/24] adapt to latest class --- views_stepshifter/models/shurf_model.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/views_stepshifter/models/shurf_model.py b/views_stepshifter/models/shurf_model.py index 94e1dd4..4a2c7b5 100644 --- a/views_stepshifter/models/shurf_model.py +++ b/views_stepshifter/models/shurf_model.py @@ -42,14 +42,10 @@ class StepShiftedHurdleUncertainRF(HurdleModel): rather than limiting the regression to just the predicted positive values. """ - def __init__(self, config: Dict, partitioner_dict: Dict[str, List[int]], threshold: float = 0.1): - super().__init__(config, partitioner_dict, threshold) - print(config) -# self._clf = RandomForest -# self._reg = RandomForest + def __init__(self, config: Dict, partitioner_dict: Dict[str, List[int]]): + super().__init__(config, partitioner_dict) self._clf_params = self._get_parameters(config)['clf'] self._reg_params = self._get_parameters(config)['reg'] - self._threshold = threshold self._submodel_list = [] @@ -85,12 +81,16 @@ def fit(self, df: pd.DataFrame): """ df = self._process_data(df) self._prepare_time_series(df) + self._clf = self._resolve_clf_model(self._config["model_clf"]) + self._reg = self._resolve_reg_model(self._config["model_reg"]) - target_binary = [s.map(lambda x: (x > self._threshold).astype(float)) for s in self._target_train] + target_binary = [ + s.map(lambda x: (x > 0).astype(float)) for s in self._target_train + ] # Positive outcome (for cases where target > threshold) target_pos, past_cov_pos = zip(*[(t, p) for t, p in zip(self._target_train, self._past_cov) - if (t.values() > self._threshold).any()]) + if (t.values() > 0).any()]) for i in tqdm(range(self._submodels_to_train), desc="Training submodel"): # logger.info(f"Training submodel {i+1}/{self._submodels_to_train}") From 22fca3f95ebbf62ea3804ee78053900e0d19f528 Mon Sep 17 00:00:00 2001 From: xiaolongsun <95378566+xiaolong0728@users.noreply.github.com> Date: Wed, 19 Mar 2025 14:24:26 +0100 Subject: [PATCH 06/24] update --- views_stepshifter/models/shurf_model.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/views_stepshifter/models/shurf_model.py b/views_stepshifter/models/shurf_model.py index 4a2c7b5..01d790c 100644 --- a/views_stepshifter/models/shurf_model.py +++ b/views_stepshifter/models/shurf_model.py @@ -110,7 +110,7 @@ def fit(self, df: pd.DataFrame): self.is_fitted_ = True - def predict_sequence(self,run_type, eval_type, sequence_number) -> pd.DataFrame: + def predict_sequence(self, run_type, eval_type, sequence_number) -> pd.DataFrame: """ Predicts n draws of outcomes based on the provided DataFrame . @@ -208,8 +208,7 @@ def predict_sequence(self,run_type, eval_type, sequence_number) -> pd.DataFrame: print(final_preds.tail(20)) return final_preds - @views_validate - def predict(self, df: pd.DataFrame, run_type: str, eval_type: str = "standard") -> pd.DataFrame: + def predict(self, run_type: str, eval_type: str = "standard") -> pd.DataFrame: """ Predicts outcomes based on the provided DataFrame and run type. @@ -229,8 +228,6 @@ def predict(self, df: pd.DataFrame, run_type: str, eval_type: str = "standard") The final predictions as a DataFrame. """ - # Process the input data to ensure it is in the correct format - df = self._process_data(df) # Check if the model has been fitted before making predictions check_is_fitted(self, 'is_fitted_') print('Dependent variable:', self.depvar, 'Parameters:', 'Log target:', self.log_target, ' submodels:', self._submodels_to_train, ', samples within submodels: ', self._pred_samples, ', draw distribution: ', self._draw_dist, ', sigma: ', self._draw_sigma) From 6dd594f07aec9f73b906d0652bcadf39285ae5d0 Mon Sep 17 00:00:00 2001 From: Dylan <52908667+smellycloud@users.noreply.github.com> Date: Wed, 19 Mar 2025 14:30:12 +0100 Subject: [PATCH 07/24] fix 1 --- views_stepshifter/models/shurf_model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/views_stepshifter/models/shurf_model.py b/views_stepshifter/models/shurf_model.py index 01d790c..4a2b449 100644 --- a/views_stepshifter/models/shurf_model.py +++ b/views_stepshifter/models/shurf_model.py @@ -194,7 +194,7 @@ def predict_sequence(self, run_type, eval_type, sequence_number) -> pd.DataFrame # Ensuring that the final predictions are positive: final_preds_full['Prediction'] = final_preds_full['Prediction'].apply(lambda x: np.clip(x, 0, None)) # Column for the main prediction: - pred_col_name = 'pred_' + self.depvar + pred_col_name = 'pred_' + self._targets final_preds_full[pred_col_name] = final_preds_full['Prediction'] # Log-transforming the final predictions if the target is log-transformed, exponentiating if not, and adding a column with the log-transformed predictions if self.log_target == True: @@ -230,7 +230,7 @@ def predict(self, run_type: str, eval_type: str = "standard") -> pd.DataFrame: # Check if the model has been fitted before making predictions check_is_fitted(self, 'is_fitted_') - print('Dependent variable:', self.depvar, 'Parameters:', 'Log target:', self.log_target, ' submodels:', self._submodels_to_train, ', samples within submodels: ', self._pred_samples, ', draw distribution: ', self._draw_dist, ', sigma: ', self._draw_sigma) + print('Dependent variable:', self._targets, 'Parameters:', 'Log target:', self.log_target, ' submodels:', self._submodels_to_train, ', samples within submodels: ', self._pred_samples, ', draw distribution: ', self._draw_dist, ', sigma: ', self._draw_sigma) # If the run type is not 'forecasting', perform multiple predictions if run_type != 'forecasting': From 942c273de24b41a4783015d73df770c649671b8d Mon Sep 17 00:00:00 2001 From: xiaolongsun <95378566+xiaolong0728@users.noreply.github.com> Date: Wed, 19 Mar 2025 15:39:38 +0100 Subject: [PATCH 08/24] add ShurfModel to stepshifter_manager --- views_stepshifter/manager/stepshifter_manager.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/views_stepshifter/manager/stepshifter_manager.py b/views_stepshifter/manager/stepshifter_manager.py index 310bd56..96c4f27 100644 --- a/views_stepshifter/manager/stepshifter_manager.py +++ b/views_stepshifter/manager/stepshifter_manager.py @@ -3,12 +3,12 @@ from views_pipeline_core.files.utils import read_dataframe from views_stepshifter.models.stepshifter import StepshifterModel from views_stepshifter.models.hurdle_model import HurdleModel +from views_stepshifter.models.shurf_model import ShurfModel import logging import pickle import pandas as pd import numpy as np from typing import Union, Optional, List, Dict -from views_stepshifter.models.shurf_model import StepShiftedHurdleUncertainRF logger = logging.getLogger(__name__) @@ -22,7 +22,7 @@ def __init__( ) -> None: super().__init__(model_path, wandb_notifications, use_prediction_store) self._is_hurdle = self._config_meta["algorithm"] == "HurdleModel" - self._is_shurf = self._config_meta["algorithm"] == "SHURF" + self._is_shurf = self._config_meta["algorithm"] == "ShurfModel" @staticmethod def _get_standardized_df(df: pd.DataFrame) -> pd.DataFrame: @@ -85,7 +85,7 @@ def _get_model(self, partitioner_dict: dict): if self._is_hurdle: model = HurdleModel(self.config, partitioner_dict) elif self._is_shurf: - model = StepShiftedHurdleUncertainRF(self.config, partitioner_dict) + model = ShurfModel(self.config, partitioner_dict) else: self.config["model_reg"] = self.config["algorithm"] model = StepshifterModel(self.config, partitioner_dict) @@ -102,7 +102,7 @@ def _train_model_artifact(self): path_raw = self._model_path.data_raw path_artifacts = self._model_path.artifacts # W&B does not directly support nested dictionaries for hyperparameters - if self.config["sweep"] and self._is_hurdle: + if self.config["sweep"] and (self._is_hurdle or self._is_shurf): self.config = self._split_hurdle_parameters() run_type = self.config["run_type"] From b2c5d96c01f4c6e209b3ebc003a51607e09bfcf4 Mon Sep 17 00:00:00 2001 From: xiaolongsun <95378566+xiaolong0728@users.noreply.github.com> Date: Wed, 19 Mar 2025 15:39:59 +0100 Subject: [PATCH 09/24] add eval_type check --- views_stepshifter/models/hurdle_model.py | 31 +++++++++--------------- views_stepshifter/models/stepshifter.py | 21 ++++------------ 2 files changed, 17 insertions(+), 35 deletions(-) diff --git a/views_stepshifter/models/hurdle_model.py b/views_stepshifter/models/hurdle_model.py index c982a58..b539821 100644 --- a/views_stepshifter/models/hurdle_model.py +++ b/views_stepshifter/models/hurdle_model.py @@ -147,27 +147,17 @@ def fit(self, df: pd.DataFrame): self._models = models self.is_fitted_ = True - # for step in tqdm.tqdm(self._steps, desc="Fitting model for step", leave=True): - # # Fit binary-like stage using a classification model, but the target is binary (0 or 1) - # binary_model = self._clf(lags_past_covariates=[-step], **self._clf_params) - # binary_model.fit(target_binary, past_covariates=self._past_cov) - - # # Fit positive stage using the regression model - # positive_model = self._reg(lags_past_covariates=[-step], **self._reg_params) - # positive_model.fit(target_pos, past_covariates=past_cov_pos) - # self._models[step] = (binary_model, positive_model) - # self.is_fitted_ = True - def predict(self, run_type: str, eval_type: str = "standard") -> pd.DataFrame: check_is_fitted(self, "is_fitted_") if run_type != "forecasting": - final_preds = [] + if eval_type == "standard": total_sequence_number = ( ModelManager._resolve_evaluation_sequence_number(eval_type) ) if self.get_device_params().get("device") == "cuda": + pred = [] for sequence_number in tqdm.tqdm( range(ModelManager._resolve_evaluation_sequence_number(eval_type)), desc="Predicting for sequence number", @@ -184,9 +174,9 @@ def predict(self, run_type: str, eval_type: str = "standard") -> pd.DataFrame: ) for step in self._steps ] - final_pred = pd.concat(pred_by_step_binary, axis=0) * pd.concat(pred_by_step_positive, axis=0) - final_preds.append(final_pred) - return final_preds + pred = pd.concat(pred_by_step_binary, axis=0) * pd.concat(pred_by_step_positive, axis=0) + preds.append(pred) + else: preds = [None] * total_sequence_number with ProcessPoolExecutor() as executor: @@ -201,7 +191,10 @@ def predict(self, run_type: str, eval_type: str = "standard") -> pd.DataFrame: ): sequence_number = futures[future] preds[sequence_number] = future.result() - return preds + else: + raise ValueError( + f"{eval_type} is not supported now. Please use 'standard' evaluation type." + ) else: if self.get_device_params().get("device") == "cuda": @@ -215,10 +208,10 @@ def predict(self, run_type: str, eval_type: str = "standard") -> pd.DataFrame: self._predict_by_step(self._models[step][1], step, 0) ) - final_preds = pd.concat(pred_by_step_binary, axis=0) * pd.concat( + preds = pd.concat(pred_by_step_binary, axis=0) * pd.concat( pred_by_step_positive, axis=0 ) - return final_preds + else: with ProcessPoolExecutor() as executor: futures_binary = { @@ -255,4 +248,4 @@ def predict(self, run_type: str, eval_type: str = "standard") -> pd.DataFrame: pd.concat(pred_by_step_binary, axis=0).sort_index() * pd.concat(pred_by_step_positive, axis=0).sort_index() ) - return preds + return preds diff --git a/views_stepshifter/models/stepshifter.py b/views_stepshifter/models/stepshifter.py index 2d42e5a..dca342b 100644 --- a/views_stepshifter/models/stepshifter.py +++ b/views_stepshifter/models/stepshifter.py @@ -237,18 +237,6 @@ def predict(self, run_type: str, eval_type: str = "standard") -> pd.DataFrame: if run_type != "forecasting": if eval_type == "standard": - # preds = [] - # for sequence_number in tqdm.tqdm( - # range(ModelManager._resolve_evaluation_sequence_number(eval_type)), - # desc="Predicting for sequence number", - # ): - # pred_by_step = [ - # self._predict_by_step(self._models[step], step, sequence_number) - # for step in self._steps - # ] - # pred = pd.concat(pred_by_step, axis=0) - # preds.append(pred) - total_sequence_number = ( ModelManager._resolve_evaluation_sequence_number(eval_type) ) @@ -282,18 +270,19 @@ def predict(self, run_type: str, eval_type: str = "standard") -> pd.DataFrame: ): sequence_number = futures[future] preds[sequence_number] = future.result() + else: + raise ValueError( + f"{eval_type} is not supported now. Please use 'standard' evaluation type." + ) else: - # preds = [] - # for step in tqdm.tqdm(self._steps, desc="Predicting for steps"): - # preds.append(self._predict_by_step(self._models[step], step, 0)) - # preds = pd.concat(preds, axis=0).sort_index() if self.get_device_params().get("device") == "cuda": preds = [] for step in tqdm.tqdm(self._steps, desc="Predicting for steps"): preds.append(self._predict_by_step(self._models[step], step, 0)) preds = pd.concat(preds, axis=0).sort_index() + else: with ProcessPoolExecutor() as executor: futures = { From 45b146eb52ebcf065f2f7a355833f5d353b67ce8 Mon Sep 17 00:00:00 2001 From: Dylan <52908667+smellycloud@users.noreply.github.com> Date: Wed, 19 Mar 2025 15:46:30 +0100 Subject: [PATCH 10/24] rename --- views_stepshifter/models/shurf_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/views_stepshifter/models/shurf_model.py b/views_stepshifter/models/shurf_model.py index 4a2b449..9c5e103 100644 --- a/views_stepshifter/models/shurf_model.py +++ b/views_stepshifter/models/shurf_model.py @@ -12,7 +12,7 @@ logger = logging.getLogger(__name__) -class StepShiftedHurdleUncertainRF(HurdleModel): +class ShurfModel(HurdleModel): """ Hurdle model for time series forecasting. The model consists of two stages: 1. Binary stage: Predicts whether the target variable is 0 or > 0. From 92c2e227500ef9750c7489f6c9f632cb32a52311 Mon Sep 17 00:00:00 2001 From: Dylan <52908667+smellycloud@users.noreply.github.com> Date: Wed, 19 Mar 2025 15:53:29 +0100 Subject: [PATCH 11/24] cleanup --- views_stepshifter/models/shurf_model.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/views_stepshifter/models/shurf_model.py b/views_stepshifter/models/shurf_model.py index 9c5e103..155a1e4 100644 --- a/views_stepshifter/models/shurf_model.py +++ b/views_stepshifter/models/shurf_model.py @@ -53,14 +53,14 @@ def __init__(self, config: Dict, partitioner_dict: Dict[str, List[int]]): self._submodels_to_train = config['submodels_to_train'] # self._n_estimators = config['parameters']['n_estimators'] self.log_target = config['log_target'] - self._max_features = config['max_features'] - self._max_depth = config['max_depth'] - self._max_samples = config['max_samples'] + # self._max_features = config['max_features'] + # self._max_depth = config['max_depth'] + # self._max_samples = config['max_samples'] self._pred_samples = config['pred_samples'] self._draw_dist = config['draw_dist'] self._draw_sigma = config['draw_sigma'] - self._geo_unit_samples = config['geo_unit_samples'] - self._n_jobs = config['n_jobs'] + # self._geo_unit_samples = config['geo_unit_samples'] + # self._n_jobs = config['n_jobs'] @views_validate def fit(self, df: pd.DataFrame): From 9a9cd0d90cb1a21bfee964c41e8b7131e020d6e7 Mon Sep 17 00:00:00 2001 From: xiaolongsun <95378566+xiaolong0728@users.noreply.github.com> Date: Wed, 19 Mar 2025 16:00:17 +0100 Subject: [PATCH 12/24] update shurf --- views_stepshifter/models/shurf_model.py | 337 ++++++++++++------------ 1 file changed, 173 insertions(+), 164 deletions(-) diff --git a/views_stepshifter/models/shurf_model.py b/views_stepshifter/models/shurf_model.py index 155a1e4..51365da 100644 --- a/views_stepshifter/models/shurf_model.py +++ b/views_stepshifter/models/shurf_model.py @@ -1,5 +1,4 @@ from views_pipeline_core.managers.model import ModelManager -from views_stepshifter.models.stepshifter import StepshifterModel from views_stepshifter.models.hurdle_model import HurdleModel from views_stepshifter.models.validation import views_validate from sklearn.utils.validation import check_is_fitted @@ -7,58 +6,28 @@ from typing import List, Dict import numpy as np import logging -# from darts.models import RandomForest from tqdm import tqdm logger = logging.getLogger(__name__) + class ShurfModel(HurdleModel): - """ - Hurdle model for time series forecasting. The model consists of two stages: - 1. Binary stage: Predicts whether the target variable is 0 or > 0. - 2. Positive stage: Predicts the value of the target variable when it is > 0. - - Note: - This algorithm uses a two-step approach. - - **Step 1: Classification Stage** - In the first step, a regression model is used with a binary target (0 or 1), - indicating the absence or presence of violence. This stage functions similarly - to a linear probability model, estimating the likelihood of a positive outcome. - Since the model is a regression rather than a classification model, - these estimates are not strictly bounded between 0 and 1, - but this is acceptable for the purpose of this step. - - To determine whether an observation is classified as "positive," we apply a threshold. - The default threshold is 1, meaning that predictions above this value - are considered positive outcomes. This threshold can be adjusted as - a tunable hyperparameter to better suit specific requirements. - - **Step 2: Regression Stage** - In the second step, we use a regression model to predict a continuous or count value - (e.g., the expected number of conflict fatalities) for the selected time series. - We include the entire time series for countries or PRIO grids where the - classification stage yielded at least one "positive" prediction, - rather than limiting the regression to just the predicted positive values. - """ - def __init__(self, config: Dict, partitioner_dict: Dict[str, List[int]]): super().__init__(config, partitioner_dict) - self._clf_params = self._get_parameters(config)['clf'] - self._reg_params = self._get_parameters(config)['reg'] + self._clf_params = self._get_parameters(config)["clf"] + self._reg_params = self._get_parameters(config)["reg"] self._submodel_list = [] + self._submodels_to_train = config["submodels_to_train"] + self._log_target = config["log_target"] + self._pred_samples = config["pred_samples"] + self._draw_dist = config["draw_dist"] + self._draw_sigma = config["draw_sigma"] - self._partitioner_dict = partitioner_dict - self._submodels_to_train = config['submodels_to_train'] # self._n_estimators = config['parameters']['n_estimators'] - self.log_target = config['log_target'] # self._max_features = config['max_features'] # self._max_depth = config['max_depth'] # self._max_samples = config['max_samples'] - self._pred_samples = config['pred_samples'] - self._draw_dist = config['draw_dist'] - self._draw_sigma = config['draw_sigma'] # self._geo_unit_samples = config['geo_unit_samples'] # self._n_jobs = config['n_jobs'] @@ -88,29 +57,35 @@ def fit(self, df: pd.DataFrame): s.map(lambda x: (x > 0).astype(float)) for s in self._target_train ] - # Positive outcome (for cases where target > threshold) - target_pos, past_cov_pos = zip(*[(t, p) for t, p in zip(self._target_train, self._past_cov) - if (t.values() > 0).any()]) + target_pos, past_cov_pos = zip( + *[ + (t, p) + for t, p in zip(self._target_train, self._past_cov) + if (t.values() > 0).any() + ] + ) for i in tqdm(range(self._submodels_to_train), desc="Training submodel"): - # logger.info(f"Training submodel {i+1}/{self._submodels_to_train}") - + for step in tqdm(self._steps, desc=f"Steps for submodel {i+1}"): - # logger.info(f"Training step {step}") # Fit binary-like stage using a regression model, but the target is binary (0 or 1) - binary_model = self._clf(lags_past_covariates=[-step], **self._clf_params) + binary_model = self._clf( + lags_past_covariates=[-step], **self._clf_params + ) binary_model.fit(target_binary, past_covariates=self._past_cov) # Fit positive stage using the regression model - positive_model = self._reg(lags_past_covariates=[-step], **self._reg_params) + positive_model = self._reg( + lags_past_covariates=[-step], **self._reg_params + ) positive_model.fit(target_pos, past_covariates=past_cov_pos) self._models[step] = (binary_model, positive_model) + self._submodel_list.append(self._models) - logger.info(f"Submodel {i+1}/{self._submodels_to_train} trained successfully") + self.is_fitted_ = True - - - def predict_sequence(self, run_type, eval_type, sequence_number) -> pd.DataFrame: + + def predict_sequence(self, sequence_number) -> pd.DataFrame: """ Predicts n draws of outcomes based on the provided DataFrame . @@ -118,94 +93,157 @@ def predict_sequence(self, run_type, eval_type, sequence_number) -> pd.DataFrame ----------- self: StepShiftedHurdleUncertainRF The model object. - + run_type : str The type of run to perform. Currently it is unlikely to affect the behaviour of the function. - - eval_type : str + + eval_type : str The type of evaluation to perform. Currently it is unlikely to affect the behaviour of the function. - + sequence_number : int The sequence number to predict outcomes for. - - + + Returns: -------- pd.DataFrame The final predictions as a DataFrame. """ - - sample_number = 0 - final_preds = [] # This will hold predictions for all sub-models and all samples within sub-models - # Loop over submodels + + final_preds = [] submodel_number = 0 - for submodel in tqdm(self._submodel_list, desc=f"Predicting submodel: {run_type}", leave=True): -# print(submodel) - pred_by_step_binary = [self._predict_by_step(submodel[step][0], step, sequence_number) - for step in self._steps] - pred_by_step_positive = [self._predict_by_step(submodel[step][1], step, sequence_number) - for step in self._steps] - + + for submodel in tqdm( + self._submodel_list, desc=f"Predicting submodel number", leave=True + ): + pred_by_step_binary = [ + self._predict_by_step(submodel[step][0], step, sequence_number) + for step in self._steps + ] + pred_by_step_positive = [ + self._predict_by_step(submodel[step][1], step, sequence_number) + for step in self._steps + ] + pred_concat_binary = pd.concat(pred_by_step_binary, axis=0) - - pred_concat_binary.rename(columns={'step_combined':'Classification'}, inplace=True) + + pred_concat_binary.rename( + columns={f"pred_{self._targets}": "Classification"}, inplace=True + ) pred_concat_positive = pd.concat(pred_by_step_positive, axis=0) - pred_concat_positive.rename(columns={'step_combined':'Regression'}, inplace=True) + pred_concat_positive.rename( + columns={f"pred_{self._targets}": "Regression"}, inplace=True + ) pred_concat = pd.concat([pred_concat_binary, pred_concat_positive], axis=1) - pred_concat['submodel'] = submodel_number -# print(pred_concat.tail(12)) - - # Append the combined predictions to the final predictions list + pred_concat["submodel"] = submodel_number + final_preds.append(pred_concat) submodel_number += 1 -# submodel_preds[i] = final_preds - # Generate a DataFrame from the final predictions list for this sequence number - final_preds_aslists = pd.concat(final_preds, axis=0) + + final_preds_aslists = pd.concat(final_preds, axis=0) + # Drawing samples from the classification model # Ensuring that the classification probabilities are between 0 and 1: - final_preds_aslists['Classification'] = final_preds_aslists['Classification'].apply(lambda x: np.clip(x, 0, 1)) - final_preds_aslists['ClassificationSample'] = final_preds_aslists['Classification'].apply(lambda x: np.random.binomial(1, x, self._pred_samples)) - - # Drawing samples from the regression model + final_preds_aslists["Classification"] = final_preds_aslists[ + "Classification" + ].apply(lambda x: np.clip(x, 0, 1)) + final_preds_aslists["ClassificationSample"] = final_preds_aslists[ + "Classification" + ].apply(lambda x: np.random.binomial(1, x, self._pred_samples)) - if self.log_target == True: - if self._draw_dist == 'Poisson': # Note: the Poisson distribution assumes a non-log-transformed target, so not defined here - print('Poisson not implemented') - final_preds_aslists['RegressionSample'] = final_preds_aslists['Regression'] - if self._draw_dist == 'Lognormal': + # Drawing samples from the regression model + if self._log_target == True: + if ( + self._draw_dist == "Poisson" + ): # Note: the Poisson distribution assumes a non-log-transformed target, so not defined here + final_preds_aslists["RegressionSample"] = final_preds_aslists[ + "Regression" + ] + if self._draw_dist == "Lognormal": # Draw from normal distribution for log-transformed outcomes, then exponentiate, then round to integer - #pred_concat['RegressionSample'] = pred_concat['Regression'].apply(lambda x: np.random.normal(x, self._draw_sigma, self._pred_samples)) - final_preds_aslists['RegressionSample'] = final_preds_aslists['Regression'].apply(lambda x: np.abs(np.rint(np.expm1(np.random.normal(x, self._draw_sigma, self._pred_samples))))) - if self.log_target == False: - if self._draw_dist == 'Poisson': # Note: this assumes a non-log-transformed target - print('Poisson not implemented') - final_preds_aslists['RegressionSample'] = final_preds_aslists['Regression'] - if self._draw_dist == 'Lognormal': - print('Draws for non-log-transformed target: first implementation' ) - final_preds_aslists['RegressionSample'] = final_preds_aslists['Regression'].apply(lambda x: np.abs(np.rint(np.expm1(np.random.normal(np.log1p(x), self._draw_sigma, self._pred_samples))))) - - if self._draw_dist == '': - final_preds_aslists['RegressionSample'] = final_preds_aslists['Regression'] - print('final_preds_aslists contains the samples in list form. Shape:', final_preds_aslists.shape, '. Looks like this:') - print(final_preds_aslists.tail(20)) + final_preds_aslists["RegressionSample"] = final_preds_aslists[ + "Regression" + ].apply( + lambda x: np.abs( + np.rint( + np.expm1( + np.random.normal( + x, self._draw_sigma, self._pred_samples + ) + ) + ) + ) + ) + + if self._log_target == False: + if ( + self._draw_dist == "Poisson" + ): # Note: this assumes a non-log-transformed target + final_preds_aslists["RegressionSample"] = final_preds_aslists[ + "Regression" + ] + if self._draw_dist == "Lognormal": + final_preds_aslists["RegressionSample"] = final_preds_aslists[ + "Regression" + ].apply( + lambda x: np.abs( + np.rint( + np.expm1( + np.random.normal( + np.log1p(x), self._draw_sigma, self._pred_samples + ) + ) + ) + ) + ) + + if self._draw_dist == "": + final_preds_aslists["RegressionSample"] = final_preds_aslists["Regression"] + # 'Explode' the samples to get one row per sample - final_preds_full = final_preds_aslists.explode(['ClassificationSample','RegressionSample']) - final_preds_full['Prediction'] = final_preds_full['ClassificationSample'] * final_preds_full['RegressionSample'] + final_preds_full = final_preds_aslists.explode( + ["ClassificationSample", "RegressionSample"] + ) + final_preds_full["Prediction"] = ( + final_preds_full["ClassificationSample"] + * final_preds_full["RegressionSample"] + ) + # Ensuring that the final predictions are positive: - final_preds_full['Prediction'] = final_preds_full['Prediction'].apply(lambda x: np.clip(x, 0, None)) + final_preds_full["Prediction"] = final_preds_full["Prediction"].apply( + lambda x: np.clip(x, 0, None) + ) + # Column for the main prediction: - pred_col_name = 'pred_' + self._targets - final_preds_full[pred_col_name] = final_preds_full['Prediction'] - # Log-transforming the final predictions if the target is log-transformed, exponentiating if not, and adding a column with the log-transformed predictions - if self.log_target == True: - final_preds_full['LogPrediction'] = final_preds_full['Prediction'] - final_preds_full['Prediction'] = np.expm1(final_preds_full['Prediction']) - if self.log_target == False: - final_preds_full['LogPrediction'] = np.log1p(final_preds_full['Prediction']) - final_preds_full.drop(columns=['Classification','Regression','ClassificationSample','RegressionSample','submodel','Prediction','LogPrediction'],inplace=True) - final_preds = pd.DataFrame(final_preds_full.groupby(['month_id', 'country_id'])[pred_col_name].apply(list)) - print('final_preds is the end product of the predict sequence function. Shape:', final_preds_full.shape) - print(final_preds.tail(20)) + pred_col_name = "pred_" + self._targets + final_preds_full[pred_col_name] = final_preds_full["Prediction"] + + # Log-transforming the final predictions if the target is log-transformed, exponentiating if not, + # and adding a column with the log-transformed predictions + if self._log_target == True: + final_preds_full["LogPrediction"] = final_preds_full["Prediction"] + final_preds_full["Prediction"] = np.expm1(final_preds_full["Prediction"]) + if self._log_target == False: + final_preds_full["LogPrediction"] = np.log1p(final_preds_full["Prediction"]) + + final_preds_full.drop( + columns=[ + "Classification", + "Regression", + "ClassificationSample", + "RegressionSample", + "submodel", + "Prediction", + "LogPrediction", + ], + inplace=True, + ) + final_preds = pd.DataFrame( + final_preds_full.groupby(["month_id", "country_id"])[pred_col_name].apply( + list + ) + ) + return final_preds def predict(self, run_type: str, eval_type: str = "standard") -> pd.DataFrame: @@ -227,54 +265,25 @@ def predict(self, run_type: str, eval_type: str = "standard") -> pd.DataFrame: pd.DataFrame The final predictions as a DataFrame. """ - - # Check if the model has been fitted before making predictions - check_is_fitted(self, 'is_fitted_') - print('Dependent variable:', self._targets, 'Parameters:', 'Log target:', self.log_target, ' submodels:', self._submodels_to_train, ', samples within submodels: ', self._pred_samples, ', draw distribution: ', self._draw_dist, ', sigma: ', self._draw_sigma) - - # If the run type is not 'forecasting', perform multiple predictions - if run_type != 'forecasting': - preds = [] # D: List to collect predictions for each sequence - # If the evaluation type is "standard", iterate over the evaluation sequence number - submodel_preds = {} # Not sure this belongs here + check_is_fitted(self, "is_fitted_") + + if run_type != "forecasting": + preds = [] if eval_type == "standard": - # Loop over the evaluation sequence number - for sequence_number in tqdm(range(ModelManager._resolve_evaluation_sequence_number(eval_type)), desc=f"Sequence", leave=True): -# print('sequence_number', sequence_number) - temp_preds_full = self.predict_sequence(run_type, eval_type, sequence_number) - - # Output the temporary final predictions with samples as parquet - temp_preds_full.to_parquet(f'data/generated/final_pred_full_{run_type}_{eval_type}_{sequence_number}.parquet') - # Convert to views_pipeline standard format - - # Aggregate the predictions into point predictions -# final_preds.pop('LogPrediction') -# agg_preds = np.log1p(temp_preds_full.groupby(['month_id', 'country_id']).mean()) -# final_preds.rename(columns={'Prediction':'pred_ged_sb'}, inplace=True) -# agg_preds.pop('submodel') - preds.append(temp_preds_full) # D: Append the final predictions for this sequence number - # Output the final predictions as parquet -# agg_preds.to_parquet(f'data/generated/final_preds_{run_type}_{eval_type}_{sequence_number}_agg.parquet') - return preds + for sequence_number in tqdm( + range(ModelManager._resolve_evaluation_sequence_number(eval_type)), + desc=f"Predicting for sequence number", + leave=True, + ): + temp_preds_full = self.predict_sequence(sequence_number) + preds.append(temp_preds_full) + else: + raise ValueError( + f"{eval_type} is not supported now. Please use 'standard' evaluation type." + ) + else: - # If the run type is 'forecasting', perform a single prediction sequence_number = 0 - temp_preds_full = self.predict_sequence(run_type, eval_type, sequence_number) - print('temp_preds_full') - -# print('final_preds_aslists') -# print(final_preds_aslists.describe()) - - # Output the final predictions with samples as parquet - temp_preds_full.to_parquet(f'data/generated/final_pred_full_{run_type}_{eval_type}_{sequence_number}.parquet') - # Aggregate the predictions into point predictions -# agg_preds = temp_preds_full.groupby(['month_id', 'country_id']).mean() -# agg_preds.pop('submodel') - # Output the final predictions as parquet -# agg_preds['ged_sb_dep'] = agg_preds['Prediction'] -# agg_preds.to_parquet(f'data/generated/final_preds_{run_type}_{eval_type}_{sequence_number}_agg.parquet') - - # Return the final predictions as a DataFrame - print('Returning final predictions:') - print(temp_preds_full.tail(20)) - return temp_preds_full \ No newline at end of file + preds = self.predict_sequence(sequence_number) + + return preds From af9477873aca6b99b51d3389432f91d6183175e8 Mon Sep 17 00:00:00 2001 From: Dylan <52908667+smellycloud@users.noreply.github.com> Date: Thu, 20 Mar 2025 07:47:11 +0100 Subject: [PATCH 13/24] add extra condition --- views_stepshifter/manager/stepshifter_manager.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/views_stepshifter/manager/stepshifter_manager.py b/views_stepshifter/manager/stepshifter_manager.py index 96c4f27..332a4a4 100644 --- a/views_stepshifter/manager/stepshifter_manager.py +++ b/views_stepshifter/manager/stepshifter_manager.py @@ -40,9 +40,9 @@ def standardize_value(value): # 1) Replace inf and -inf with 0; # 2) Replace negative values with 0 if isinstance(value, list): - return [0 if (v == np.inf or v == -np.inf or v < 0) else v for v in value] + return [0 if (v == np.inf or v == -np.inf or v < 0 or v == np.nan) else v for v in value] else: - return 0 if (value == np.inf or value == -np.inf or value < 0) else value + return 0 if (value == np.inf or value == -np.inf or value < 0 or v == np.nan) else value df = df.applymap(standardize_value) return df From 2a5a8fc952f53778f8a83e8250a308b033d5a3e3 Mon Sep 17 00:00:00 2001 From: Dylan <52908667+smellycloud@users.noreply.github.com> Date: Thu, 20 Mar 2025 07:58:24 +0100 Subject: [PATCH 14/24] fix typo --- views_stepshifter/manager/stepshifter_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/views_stepshifter/manager/stepshifter_manager.py b/views_stepshifter/manager/stepshifter_manager.py index 332a4a4..d724b41 100644 --- a/views_stepshifter/manager/stepshifter_manager.py +++ b/views_stepshifter/manager/stepshifter_manager.py @@ -42,7 +42,7 @@ def standardize_value(value): if isinstance(value, list): return [0 if (v == np.inf or v == -np.inf or v < 0 or v == np.nan) else v for v in value] else: - return 0 if (value == np.inf or value == -np.inf or value < 0 or v == np.nan) else value + return 0 if (value == np.inf or value == -np.inf or value < 0 or value == np.nan) else value df = df.applymap(standardize_value) return df From cd6ee7a37e9ed7a021673bfd31a1f11a5c82e4e1 Mon Sep 17 00:00:00 2001 From: Dylan <52908667+smellycloud@users.noreply.github.com> Date: Wed, 26 Mar 2025 15:37:25 +0100 Subject: [PATCH 15/24] some fix idk --- views_stepshifter/manager/stepshifter_manager.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/views_stepshifter/manager/stepshifter_manager.py b/views_stepshifter/manager/stepshifter_manager.py index d724b41..3ebe4ff 100644 --- a/views_stepshifter/manager/stepshifter_manager.py +++ b/views_stepshifter/manager/stepshifter_manager.py @@ -9,6 +9,7 @@ import pandas as pd import numpy as np from typing import Union, Optional, List, Dict +import math logger = logging.getLogger(__name__) @@ -39,10 +40,15 @@ def _get_standardized_df(df: pd.DataFrame) -> pd.DataFrame: def standardize_value(value): # 1) Replace inf and -inf with 0; # 2) Replace negative values with 0 - if isinstance(value, list): - return [0 if (v == np.inf or v == -np.inf or v < 0 or v == np.nan) else v for v in value] + # if isinstance(value, list): + # return [0 if (v == np.inf or v == -np.inf or v < 0 or v == np.nan) else v for v in value] + # else: + # return 0 if (value == np.inf or value == -np.inf or value < 0 or value == np.nan) else value + to_exclude = [np.inf, -np.inf, np.nan, None] + if isinstance(value, list) or isinstance(value, np.ndarray) or isinstance(value, pd.Series): + return [0 if (v in to_exclude) else v for v in value] else: - return 0 if (value == np.inf or value == -np.inf or value < 0 or value == np.nan) else value + return 0 if (value in to_exclude) else value df = df.applymap(standardize_value) return df From 2c6e87d6d917fb3f95a263526d390d53e6c4e68d Mon Sep 17 00:00:00 2001 From: xiaolongsun <95378566+xiaolong0728@users.noreply.github.com> Date: Tue, 8 Apr 2025 11:29:50 +0200 Subject: [PATCH 16/24] update get_standardized_df --- views_stepshifter/manager/stepshifter_manager.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/views_stepshifter/manager/stepshifter_manager.py b/views_stepshifter/manager/stepshifter_manager.py index 3ebe4ff..fdefdd4 100644 --- a/views_stepshifter/manager/stepshifter_manager.py +++ b/views_stepshifter/manager/stepshifter_manager.py @@ -38,19 +38,15 @@ def _get_standardized_df(df: pd.DataFrame) -> pd.DataFrame: """ def standardize_value(value): - # 1) Replace inf and -inf with 0; + # 1) Replace inf, -inf, nan with 0; # 2) Replace negative values with 0 - # if isinstance(value, list): - # return [0 if (v == np.inf or v == -np.inf or v < 0 or v == np.nan) else v for v in value] - # else: - # return 0 if (value == np.inf or value == -np.inf or value < 0 or value == np.nan) else value - to_exclude = [np.inf, -np.inf, np.nan, None] - if isinstance(value, list) or isinstance(value, np.ndarray) or isinstance(value, pd.Series): - return [0 if (v in to_exclude) else v for v in value] + if isinstance(value, list): + return [0 if (v == np.inf or v == -np.inf or v < 0 or np.isnan(v)) else v for v in value] else: - return 0 if (value in to_exclude) else value + return 0 if (value == np.inf or value == -np.inf or value < 0 or np.isnan(value)) else value df = df.applymap(standardize_value) + return df def _split_hurdle_parameters(self): From 8fb4b17d2469c64ff605d6b8f65bc92997c21d69 Mon Sep 17 00:00:00 2001 From: Dylan <52908667+smellycloud@users.noreply.github.com> Date: Thu, 17 Apr 2025 14:33:36 +0200 Subject: [PATCH 17/24] update ModelManager to ForecastingModelManager --- .../manager/stepshifter_manager.py | 30 +++++++------------ views_stepshifter/models/hurdle_model.py | 8 ++--- views_stepshifter/models/shurf_model.py | 4 +-- 3 files changed, 17 insertions(+), 25 deletions(-) diff --git a/views_stepshifter/manager/stepshifter_manager.py b/views_stepshifter/manager/stepshifter_manager.py index 3ebe4ff..1852284 100644 --- a/views_stepshifter/manager/stepshifter_manager.py +++ b/views_stepshifter/manager/stepshifter_manager.py @@ -1,6 +1,6 @@ -from views_pipeline_core.managers.model import ModelPathManager, ModelManager +from views_pipeline_core.managers.model import ModelPathManager, ForecastingModelManager from views_pipeline_core.configs.pipeline import PipelineConfig -from views_pipeline_core.files.utils import read_dataframe +from views_pipeline_core.files.utils import read_dataframe, generate_model_file_name from views_stepshifter.models.stepshifter import StepshifterModel from views_stepshifter.models.hurdle_model import HurdleModel from views_stepshifter.models.shurf_model import ShurfModel @@ -9,16 +9,15 @@ import pandas as pd import numpy as np from typing import Union, Optional, List, Dict -import math logger = logging.getLogger(__name__) -class StepshifterManager(ModelManager): +class StepshifterManager(ForecastingModelManager): def __init__( self, model_path: ModelPathManager, - wandb_notifications: bool = False, + wandb_notifications: bool = True, use_prediction_store: bool = True, ) -> None: super().__init__(model_path, wandb_notifications, use_prediction_store) @@ -38,19 +37,15 @@ def _get_standardized_df(df: pd.DataFrame) -> pd.DataFrame: """ def standardize_value(value): - # 1) Replace inf and -inf with 0; + # 1) Replace inf, -inf, nan with 0; # 2) Replace negative values with 0 - # if isinstance(value, list): - # return [0 if (v == np.inf or v == -np.inf or v < 0 or v == np.nan) else v for v in value] - # else: - # return 0 if (value == np.inf or value == -np.inf or value < 0 or value == np.nan) else value - to_exclude = [np.inf, -np.inf, np.nan, None] - if isinstance(value, list) or isinstance(value, np.ndarray) or isinstance(value, pd.Series): - return [0 if (v in to_exclude) else v for v in value] + if isinstance(value, list): + return [0 if (v == np.inf or v == -np.inf or v < 0 or np.isnan(v)) else v for v in value] else: - return 0 if (value in to_exclude) else value + return 0 if (value == np.inf or value == -np.inf or value < 0 or np.isnan(value)) else value df = df.applymap(standardize_value) + return df def _split_hurdle_parameters(self): @@ -121,7 +116,7 @@ def _train_model_artifact(self): stepshift_model.fit(df_viewser) if not self.config["sweep"]: - model_filename = ModelManager.generate_model_file_name( + model_filename = generate_model_file_name( run_type, file_extension=".pkl" ) stepshift_model.save(path_artifacts / model_filename) @@ -140,7 +135,6 @@ def _evaluate_model_artifact( Returns: A list of DataFrames containing the evaluation results """ - path_raw = self._model_path.data_raw path_artifacts = self._model_path.artifacts run_type = self.config["run_type"] @@ -184,7 +178,6 @@ def _forecast_model_artifact(self, artifact_name: str) -> pd.DataFrame: Returns: The forecasted DataFrame """ - path_raw = self._model_path.data_raw path_artifacts = self._model_path.artifacts run_type = self.config["run_type"] @@ -218,7 +211,6 @@ def _forecast_model_artifact(self, artifact_name: str) -> pd.DataFrame: return df_prediction def _evaluate_sweep(self, eval_type: str, model: any) -> List[pd.DataFrame]: - path_raw = self._model_path.data_raw run_type = self.config["run_type"] df_predictions = model.predict(run_type, eval_type) @@ -226,4 +218,4 @@ def _evaluate_sweep(self, eval_type: str, model: any) -> List[pd.DataFrame]: StepshifterManager._get_standardized_df(df) for df in df_predictions ] - return df_predictions + return df_predictions \ No newline at end of file diff --git a/views_stepshifter/models/hurdle_model.py b/views_stepshifter/models/hurdle_model.py index b539821..e8845d5 100644 --- a/views_stepshifter/models/hurdle_model.py +++ b/views_stepshifter/models/hurdle_model.py @@ -1,4 +1,4 @@ -from views_pipeline_core.managers.model import ModelManager +from views_pipeline_core.managers.model import ForecastingModelManager from views_stepshifter.models.stepshifter import StepshifterModel from views_stepshifter.models.validation import views_validate from sklearn.utils.validation import check_is_fitted @@ -154,12 +154,12 @@ def predict(self, run_type: str, eval_type: str = "standard") -> pd.DataFrame: if eval_type == "standard": total_sequence_number = ( - ModelManager._resolve_evaluation_sequence_number(eval_type) + ForecastingModelManager._resolve_evaluation_sequence_number(eval_type) ) if self.get_device_params().get("device") == "cuda": pred = [] for sequence_number in tqdm.tqdm( - range(ModelManager._resolve_evaluation_sequence_number(eval_type)), + range(ForecastingModelManager._resolve_evaluation_sequence_number(eval_type)), desc="Predicting for sequence number", ): pred_by_step_binary = [ @@ -182,7 +182,7 @@ def predict(self, run_type: str, eval_type: str = "standard") -> pd.DataFrame: with ProcessPoolExecutor() as executor: futures = { executor.submit(self._predict_by_sequence, sequence_number): sequence_number - for sequence_number in range(ModelManager._resolve_evaluation_sequence_number(eval_type)) + for sequence_number in range(ForecastingModelManager._resolve_evaluation_sequence_number(eval_type)) } for future in tqdm.tqdm( as_completed(futures.keys()), diff --git a/views_stepshifter/models/shurf_model.py b/views_stepshifter/models/shurf_model.py index 51365da..1dc7af4 100644 --- a/views_stepshifter/models/shurf_model.py +++ b/views_stepshifter/models/shurf_model.py @@ -1,4 +1,4 @@ -from views_pipeline_core.managers.model import ModelManager +from views_pipeline_core.managers.model import ForecastingModelManager from views_stepshifter.models.hurdle_model import HurdleModel from views_stepshifter.models.validation import views_validate from sklearn.utils.validation import check_is_fitted @@ -271,7 +271,7 @@ def predict(self, run_type: str, eval_type: str = "standard") -> pd.DataFrame: preds = [] if eval_type == "standard": for sequence_number in tqdm( - range(ModelManager._resolve_evaluation_sequence_number(eval_type)), + range(ForecastingModelManager._resolve_evaluation_sequence_number(eval_type)), desc=f"Predicting for sequence number", leave=True, ): From 19f713bc8a200b7f875a7575f5957c134f812b80 Mon Sep 17 00:00:00 2001 From: Dylan <52908667+smellycloud@users.noreply.github.com> Date: Wed, 23 Apr 2025 09:36:53 +0200 Subject: [PATCH 18/24] use ForecastingModelManager --- views_stepshifter/models/stepshifter.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/views_stepshifter/models/stepshifter.py b/views_stepshifter/models/stepshifter.py index dca342b..264d97b 100644 --- a/views_stepshifter/models/stepshifter.py +++ b/views_stepshifter/models/stepshifter.py @@ -6,7 +6,7 @@ from sklearn.utils.validation import check_is_fitted from typing import List, Dict from views_stepshifter.models.validation import views_validate -from views_pipeline_core.managers.model import ModelManager +from views_pipeline_core.managers.model import ModelManager, ForecastingModelManager import tqdm from concurrent.futures import ProcessPoolExecutor, as_completed import torch @@ -238,13 +238,13 @@ def predict(self, run_type: str, eval_type: str = "standard") -> pd.DataFrame: if eval_type == "standard": total_sequence_number = ( - ModelManager._resolve_evaluation_sequence_number(eval_type) + ForecastingModelManager._resolve_evaluation_sequence_number(eval_type) ) if self.get_device_params().get("device") == "cuda": preds = [] for sequence_number in tqdm.tqdm( - range(ModelManager._resolve_evaluation_sequence_number(eval_type)), + range(ForecastingModelManager._resolve_evaluation_sequence_number(eval_type)), desc="Predicting for sequence number", ): pred_by_step = [ From e10a723279567c69bf8ca01f9cf04b5a9e7fad0b Mon Sep 17 00:00:00 2001 From: Dylan <52908667+smellycloud@users.noreply.github.com> Date: Wed, 23 Apr 2025 23:02:50 +0200 Subject: [PATCH 19/24] fix tests --- tests/test_hurdle_model.py | 2 +- tests/test_stepshifter.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_hurdle_model.py b/tests/test_hurdle_model.py index 385fb2a..734276b 100644 --- a/tests/test_hurdle_model.py +++ b/tests/test_hurdle_model.py @@ -125,7 +125,7 @@ def test_predict(sample_config, sample_partitioner_dict, sample_dataframe): patch("views_stepshifter.models.hurdle_model.as_completed") as mock_as_completed, \ patch("views_stepshifter.models.hurdle_model.tqdm.tqdm") as mock_tqdm, \ patch("views_stepshifter.models.hurdle_model.ProcessPoolExecutor") as mock_ProcessPoolExecutor, \ - patch("views_stepshifter.models.hurdle_model.ModelManager._resolve_evaluation_sequence_number") as mock_sequence_number: + patch("views_stepshifter.models.hurdle_model.ForecastingModelManager._resolve_evaluation_sequence_number") as mock_sequence_number: # the else branch diff --git a/tests/test_stepshifter.py b/tests/test_stepshifter.py index 3a43b54..370d76a 100644 --- a/tests/test_stepshifter.py +++ b/tests/test_stepshifter.py @@ -3,7 +3,7 @@ import numpy as np from unittest.mock import patch, MagicMock, call from views_stepshifter.models.stepshifter import StepshifterModel -from views_pipeline_core.managers.model import ModelManager +from views_pipeline_core.managers.model import ModelManager, ForecastingModelManager @pytest.fixture def config(): @@ -182,7 +182,7 @@ def test_predict(config, partitioner_dict, sample_dataframe): patch("views_stepshifter.models.stepshifter.as_completed") as mock_as_completed, \ patch("views_stepshifter.models.stepshifter.tqdm.tqdm") as mock_tqdm, \ patch("views_stepshifter.models.stepshifter.ProcessPoolExecutor") as mock_ProcessPoolExecutor, \ - patch("views_stepshifter.models.stepshifter.ModelManager._resolve_evaluation_sequence_number") as mock_sequence_number: + patch("views_stepshifter.models.stepshifter.ForecastingModelManager._resolve_evaluation_sequence_number") as mock_sequence_number: # the else branch From c721cd630eb6075cccc2d7e8a4b956238ea36d80 Mon Sep 17 00:00:00 2001 From: xiaolongsun <95378566+xiaolong0728@users.noreply.github.com> Date: Mon, 23 Jun 2025 14:13:24 +0200 Subject: [PATCH 20/24] update version --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 26989ae..11e312a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "views_stepshifter" -version = "0.4.0" +version = "1.0.0" description = "" authors = [ "Xiaolong Sun ", @@ -11,7 +11,7 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.11,<3.15" -views_pipeline_core = ">=1.0.0,<2.0.0" +views_pipeline_core = ">=2.0.0,<3.0.0" scikit-learn = "^1.6.0" pandas = "^1.5.3" numpy = "^1.25.2" From 927fa5411f6792669deeb48fc757155ea2cd2cd4 Mon Sep 17 00:00:00 2001 From: xiaolongsun <95378566+xiaolong0728@users.noreply.github.com> Date: Fri, 4 Jul 2025 15:39:12 +0200 Subject: [PATCH 21/24] try to fix test error --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 11e312a..1d4feb6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,7 @@ numpy = "^1.25.2" darts = "^0.30.0" lightgbm = "4.6.0" views_forecasts = "^0.5.5" +scipy = "1.12.0" From 14a09da32b6cd6c869cec4d684084b8d00347dd6 Mon Sep 17 00:00:00 2001 From: xiaolongsun <95378566+xiaolong0728@users.noreply.github.com> Date: Fri, 4 Jul 2025 15:42:50 +0200 Subject: [PATCH 22/24] try --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 1d4feb6..8737d48 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,7 @@ numpy = "^1.25.2" darts = "^0.30.0" lightgbm = "4.6.0" views_forecasts = "^0.5.5" -scipy = "1.12.0" +scipy = "1.15.0" From 93d2d43e539b877e5d239d0defde14de84f02465 Mon Sep 17 00:00:00 2001 From: xiaolongsun <95378566+xiaolong0728@users.noreply.github.com> Date: Fri, 4 Jul 2025 15:43:55 +0200 Subject: [PATCH 23/24] another try --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 8737d48..35a1a15 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,7 @@ numpy = "^1.25.2" darts = "^0.30.0" lightgbm = "4.6.0" views_forecasts = "^0.5.5" -scipy = "1.15.0" +scipy = "1.15.1" From 6e26731edfb4179ba49b2c7ad4d30dae2829f817 Mon Sep 17 00:00:00 2001 From: xiaolongsun <95378566+xiaolong0728@users.noreply.github.com> Date: Fri, 4 Jul 2025 15:49:44 +0200 Subject: [PATCH 24/24] Add comments --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 35a1a15..53d8442 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,7 @@ numpy = "^1.25.2" darts = "^0.30.0" lightgbm = "4.6.0" views_forecasts = "^0.5.5" -scipy = "1.15.1" +scipy = "1.15.1" # error with latest scipy 1.16.0. see https://github.com/statsmodels/statsmodels/issues?q=_lazywhere