Black_lodge model/Production model template #37

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open

sarakallis wants to merge 29 commits into production from prod-model-template

common_utils/hurdle_model.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -7,12 +7,9 @@ @@
     from sklearn.utils.estimator_checks import check_estimator
     from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
     from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor
-    from sklearn.ensemble import RandomForestRegressor
-    from sklearn.ensemble import RandomForestClassifier
-    from sklearn.ensemble import HistGradientBoostingRegressor
-    from sklearn.ensemble import HistGradientBoostingClassifier
-    from xgboost import XGBRegressor
-    from xgboost import XGBClassifier
+    from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
+    from sklearn.ensemble import HistGradientBoostingRegressor, HistGradientBoostingClassifier
+    from xgboost import XGBRegressor, XGBClassifier
     from xgboost import XGBRFRegressor, XGBRFClassifier
     from lightgbm import LGBMClassifier, LGBMRegressor
@@ Expand Down Expand Up / @@ -105,5 +102,4 @@ def predict(self, X: Union[np.ndarray, pd.DataFrame]): @@
             return self.clf_.predict_proba(X)[:, 1] * self.reg_.predict(X)

common_utils/utils_cli_parser.py

-Original file line number
+Diff line change
@@ -1,3 +1,4 @@
     import sys
     import argparse
@@ Expand Down Expand Up / @@ -32,6 +33,13 @@ def parse_args(): @@
                                  'Note: If --sweep is specified, --evaluate will also automatically be flagged. '
                                  'Cannot be used with --run_type forecasting.')
+        parser.add_argument('-f', '--forecast',
+                            action='store_true',
+                            help='Flag to indicate if the model should produce predictions. '
+                                 'Note: If --sweep is specified, --forecast will also automatically be flagged. '
+                                 'Can only be used with --run_type forecasting.')
         parser.add_argument('-a', '--artifact_name',
                             type=str,
                             help='Specify the name of the model artifact to be used for evaluation. '
@@ Expand All / @@ -43,27 +51,32 @@ def parse_args(): @@
         return parser.parse_args()
     def validate_arguments(args):
-        if args.sweep:
-            if args.run_type != 'calibration':
-                print("Error: Sweep runs must have --run_type set to 'calibration'. Exiting.")
-                print("To fix: Use --run_type calibration when --sweep is flagged.")
-                sys.exit(1)
-        if args.run_type in ['testing', 'forecasting'] and args.sweep:
-            print("Error: Sweep cannot be performed with testing or forecasting run types. Exiting.")
-            print("To fix: Remove --sweep flag or set --run_type to 'calibration'.")
+        if args.sweep and args.run_type != 'calibration':
+            print("Error: Sweep runs must have --run_type set to 'calibration'. Exiting.")
+            print("To fix: Use --run_type calibration when --sweep is flagged.")
             sys.exit(1)
-        if args.run_type == 'forecasting' and args.evaluate:
+        if args.evaluate and args.run_type == 'forecasting':
             print("Error: Forecasting runs cannot evaluate. Exiting.")
             print("To fix: Remove --evaluate flag when --run_type is 'forecasting'.")
             sys.exit(1)
-        if args.run_type in ['calibration', 'testing'] and not args.train and not args.evaluate and not args.sweep:
+        if (args.run_type in ['calibration', 'testing', 'forecasting']
+                and not args.train and not args.evaluate and not args.forecast and not args.sweep):
             print(f"Error: Run type is {args.run_type} but neither --train, --evaluate, nor --sweep flag is set. Nothing to do... Exiting.")
             print("To fix: Add --train and/or --evaluate flag. Or use --sweep to run both training and evaluation in a WadnB sweep loop.")
             sys.exit(1)
+        if args.train and args.artifact_name:
+            print("Error: Both --train and --artifact_name flags are set. Exiting.")
+            print("To fix: Remove --artifact_name if --train is set, or vice versa.")
+            sys.exit(1)
+        if args.forecast and args.run_type != 'forecasting':
+            print("Error: --forecast flag can only be used with --run_type forecasting. Exiting.")
+            print("To fix: Set --run_type to forecasting if --forecast is flagged.")
+            sys.exit(1)
         # notes on stepshifted models:
         # There will be some thinking here in regards to how we store, denote (naming convention), and retrieve the model artifacts from stepshifted models.
@@ Expand All / @@ -72,5 +85,3 @@ def validate_arguments(args): @@
         # And the rest of the code maded in a way to handle this naming convention without any issues. Could be a simple fix.
         # Alternatively, we could store the model artifacts in a subfolder for each stepshifted model. This would make it easier to handle the artifacts, but it would also make it harder to retrieve the latest artifact for a given run type.
         # Lastly, the solution Xiaolong is working on might allow us the store multiple models (steps) in one artifact, which would make this whole discussion obsolete and be the best solution.

common_utils/utils_evaluation_metrics.py

-Original file line number
+Diff line change
@@ -1,6 +1,16 @@
     from dataclasses import dataclass
     from typing import Optional
     import pandas as pd
+    from statistics import mean, stdev, median
+    import properscoring as ps
+    from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_squared_log_error, brier_score_loss, average_precision_score, roc_auc_score
+    from views_forecasts.extensions import *
+    # MUST BE ALIGNED WITH THE METRICS WE DECIDE TO USE IN THE WORKSHOP!!!!
     @dataclass
     class EvaluationMetrics:
@@ Expand Down Expand Up @@
             """
             return pd.DataFrame.from_dict(evaluation_dict, orient='index')
-    # TBD: Align with metrics discussed in workshop
+        @staticmethod
+        def calculate_aggregate_metrics(evaluation_dict: dict) -> dict:
+            metrics_aggregate = {
+                'mean': {},
+                'std': {},
+                'median': {}
+            }
+            for metric in EvaluationMetrics.__annotations__.keys():
+                metric_values = [getattr(evaluation, metric) for evaluation in evaluation_dict.values() if getattr(evaluation, metric) is not None]
+                if metric_values:
+                    metrics_aggregate['mean'][metric] = mean(metric_values)
+                    metrics_aggregate['std'][metric] = stdev(metric_values)
+                    metrics_aggregate['median'][metric] = median(metric_values)
+                else:
+                    metrics_aggregate['mean'][metric] = None
+                    metrics_aggregate['std'][metric] = None
+                    metrics_aggregate['median'][metric] = None
+            return metrics_aggregate
+        @staticmethod
+        def output_metrics(evaluation_dict):
+            aggregate = EvaluationMetrics.calculate_aggregate_metrics(evaluation_dict)
+            step_metrics_dict = {step: vars(metrics) for step, metrics in evaluation_dict.items()}
+            step_metrics_dict['mean'] = aggregate['mean']
+            step_metrics_dict['std'] = aggregate['std']
+            step_metrics_dict['median'] = aggregate['median']
+            return step_metrics_dict
+    def generate_metric_dict(df, config):
+        """
+        Generates a dictionary of evaluation metrics for a given forecasting configuration and dataset.
+        Args:
+            df (pd.DataFrame): A pandas DataFrame containing the forecasted values and ground truth.
+            config (dict): A dictionary containing the forecasting configuration parameters.
+        Returns:
+            evaluation_dict (dict): A dictionary of EvaluationMetrics instances for each forecasting step.
+            df_evaluation_dict (pd.DataFrame): A pandas DataFrame containing the evaluation metrics for each forecasting step.
+        Note:
+            ! This function is temporary for the stepshifter model.
+            ! Change the metrics to those discussed previously.
+            For logged targets, calculating MSE is actually MSLE.
+            KLD and Jeffreys divergence are measures used to quantify the difference between two probability distributions. Why do we calculate these metrics in the context of forecasting?
+            Brier score is used for binary and categorical outcomes that can be structured as true or false
+            There are no classes in data, so we cannot calculate roc_auc_score, ap_score
+        """
+        evaluation_dict = EvaluationMetrics.make_evaluation_dict(steps=config.steps[-1])
+        for step in config.steps:
+            evaluation_dict[f"step{str(step).zfill(2)}"].MSE = mean_squared_error(df[config.depvar], df[f"step_pred_{step}"])
+            evaluation_dict[f"step{str(step).zfill(2)}"].MAE = mean_absolute_error(df[config.depvar], df[f"step_pred_{step}"])
+            # evaluation_dict[f"step{str(step).zfill(2)}"].MSLE = mean_squared_log_error(df[config.depvar], df[f"step_pred_{step}"])
+            evaluation_dict[f"step{str(step).zfill(2)}"].CRPS = ps.crps_ensemble(df[config.depvar], df[f"step_pred_{step}"]).mean()
+            # evaluation_dict[f"step{str(step).zfill(2)}"].Brier = brier_score_loss(df[config.depvar], df[f"step_pred_{step}"])
+            # evaluation_dict[f"step{str(step).zfill(2)}"].AUC = roc_auc_score(df[config.depvar], df[f"step_pred_{step}"])
+            # evaluation_dict[f"step{str(step).zfill(2)}"].AP = average_precision_score(df[config.depvar], df[f"step_pred_{step}"])
+        evaluation_dict = EvaluationMetrics.output_metrics(evaluation_dict)
+        df_evaluation_dict = EvaluationMetrics.evaluation_dict_to_dataframe(evaluation_dict)
+        return evaluation_dict, df_evaluation_dict

common_utils/utils_input_data.py

-Original file line number
+Diff line change
@@ -0,0 +1,17 @@
+    import numpy as np
+    def ensure_float64(df):
+        """
+        Check if the DataFrame only contains np.float64 types. If not, raise a warning
+        and convert the DataFrame to use np.float64 for all its numeric columns.
+        """
+        non_float64_cols = df.select_dtypes(include=['number']).columns[df.select_dtypes(include=['number']).dtypes != np.float64]
+        if len(non_float64_cols) > 0:
+            print(f"Warning: DataFrame contains non-np.float64 numeric columns. Converting the following columns: {', '.join(non_float64_cols)}")
+            for col in non_float64_cols:
+                df[col] = df[col].astype(np.float64)
+        return df

common_utils/utils_model_outputs.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -2,6 +2,11 @@
  
    from typing import List, Optional

    import pandas as pd

    # we need to figure out if we are storing logged fatalities or not

    # And this is also a good place to decide on the uncertainty quantification. Right now var, but maybe HDI or something else.

    # you migth also waht the a non-step specifci list of pgm? So you can rectrate the full df from here? Otherwsie this could turn into a mess

    @dataclass

    class ModelOutputs:

        """

    @@ -17,7 +22,7 @@ class ModelOutputs:
  
            pg_id (Optional[List[int]]): The priogrid id.

            c_id (Optional[List[int]]): The country id.

            month_id (Optional[List[int]]): The month id.

            step (Optional[List[int]]): The step ahead forecast.

            out_sample_month (Optional[List[int]]): The step ahead forecast.

        """

        y_score: Optional[List[float]] = field(default_factory=list)

    @@ -29,7 +34,7 @@ class ModelOutputs:
  
        pg_id: Optional[List[int]] = field(default_factory=list)

        c_id: Optional[List[int]] = field(default_factory=list)

        month_id: Optional[List[int]] = field(default_factory=list)

        step: Optional[List[int]] = field(default_factory=list)

        out_sample_month: Optional[List[int]] = field(default_factory=list)

        @classmethod

        def make_output_dict(cls, steps=36) -> dict:

    @@ -103,6 +108,36 @@ def output_dict_to_dataframe(dict_of_outputs) -> pd.DataFrame:
  
            return df

    # we need to figure out if we are storing logged fatalities or not

    # And this is also a good place to decide on the uncertainty quantification. Right now var, but maybe HDI or something else.

    # you might also want the a non-step specific list of pgm? So you can recreate the full df from here? Otherwise this could turn into a mess

    def generate_output_dict(df, config):

        """

        Generate a dictionary of ModelOutputs instances and a DataFrame from a DataFrame of model predictions.

        This function takes a DataFrame of model predictions and a configuration object, and generates a dictionary of ModelOutputs instances

        Args:

            df (pd.DataFrame): A DataFrame containing model predictions.

            config (dict): A configuration object containing model settings.

        Returns:

            output_dict (dict): A dictionary where each key is a step label and each value is an instance of ModelOutputs.

            df_output_dict (pd.DataFrame): A DataFrame of model outputs.

        Note:

            ! This is temporary for stepshifter model

        """

        output_dict = ModelOutputs.make_output_dict(steps=config.steps[-1])

        for step in config.steps:

            df_step = df[[config.depvar, f"step_pred_{step}"]]

            output_dict[f"step{str(step).zfill(2)}"].y_true = df_step[config.depvar].to_list()

            output_dict[f"step{str(step).zfill(2)}"].y_score = df_step[f"step_pred_{step}"].to_list()

            output_dict[f"step{str(step).zfill(2)}"].month_id = df_step.index.get_level_values("month_id").to_list()

            if df.index.names[1] == "priogrid_gid":

                output_dict[f"step{str(step).zfill(2)}"].pg_id = df_step.index.get_level_values("priogrid_gid").to_list()

            elif df.index.names[1] == "country_id":

                output_dict[f"step{str(step).zfill(2)}"].c_id = df_step.index.get_level_values("country_id").to_list()

            output_dict[f"step{str(step).zfill(2)}"].out_sample_month = step

        df_output_dict = ModelOutputs.output_dict_to_dataframe(output_dict)

        df_output_dict = df_output_dict.reset_index()

        df_output_dict = df_output_dict.drop(columns=df_output_dict.columns[0])

        return output_dict, df_output_dict

common_utils/views_stepshift/run.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -132,12 +132,12 @@ def future_point_predict(self, time: int, data: pd.DataFrame, keep_specific: boo
  
            if proba:

                predictions = self._models.predict_proba(

                    data.loc[time - self._models._steps_extent: time],

                    combine=True

                    combine=False

                )

            else:

                predictions = self._models.predict(

                    data.loc[time - self._models._steps_extent: time],

                    combine = True

                    combine =False

                    )

            if not keep_specific:

common_utils/views_stepshifter_darts/__init__.py

Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		from .stepshifter_darts import StepshifterModel
		from darts.models import LightGBMModel, XGBModel, RandomForest

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Black_lodge model/Production model template #37

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!

Black_lodge model/Production model template #37

Are you sure you want to change the base?

Uh oh!

Black_lodge model/Production model template #37

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!