Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
7fabae8
First commit
sarakallis May 24, 2024
fb4c959
Fix formatting
sarakallis May 24, 2024
3867624
Create utils_input_data.py
sarakallis May 28, 2024
d80bc94
Dataloaders
sarakallis May 28, 2024
4e6a57c
Update README.md
sarakallis May 28, 2024
5b82f02
Add data check
sarakallis May 29, 2024
215faa7
New config
sarakallis May 29, 2024
d2b1732
Create train_model.py
sarakallis May 29, 2024
d015d75
Update README.md
sarakallis May 29, 2024
0175364
Typo
sarakallis May 29, 2024
5492817
Update README.md
sarakallis May 29, 2024
5c69173
Merge branch 'main' into black_lodge
sarakallis Jun 17, 2024
179a77b
Update utils_input_data.py
sarakallis Jun 17, 2024
a5d5023
Update configs
sarakallis Jun 18, 2024
2ad7d9a
New config name
sarakallis Jun 18, 2024
469aa34
Typo and small fixes
sarakallis Jun 18, 2024
a0d73a9
Create main.py
sarakallis Jun 18, 2024
c15209f
Management scripts
sarakallis Jun 18, 2024
d4faee1
Merge branch 'main' into black_lodge
sarakallis Jun 19, 2024
7a9c0b1
Merge branch 'main' into black_lodge
sarakallis Jun 20, 2024
fcf0dd7
Merge branch 'main' into black_lodge
sarakallis Jun 20, 2024
22220f5
Correct script location
sarakallis Jun 20, 2024
b922a64
Update to-dos
sarakallis Jun 20, 2024
61c09c2
Update main.py
sarakallis Jun 20, 2024
c7a6ab8
Add .gitkeep for folders
sarakallis Jun 24, 2024
834bf1c
latest template
xiaolong0728 Jul 31, 2024
6bed9a0
fix forecasting problems
xiaolong0728 Aug 2, 2024
05d959a
forecasting script
marinamatic Aug 5, 2024
7883a55
Merge branch 'prod-model-template' of https://github.com/prio-data/vi…
marinamatic Aug 5, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 4 additions & 8 deletions common_utils/hurdle_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,9 @@
from sklearn.utils.estimator_checks import check_estimator
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.ensemble import HistGradientBoostingClassifier
from xgboost import XGBRegressor
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.ensemble import HistGradientBoostingRegressor, HistGradientBoostingClassifier
from xgboost import XGBRegressor, XGBClassifier
from xgboost import XGBRFRegressor, XGBRFClassifier
from lightgbm import LGBMClassifier, LGBMRegressor

Expand Down Expand Up @@ -105,5 +102,4 @@ def predict(self, X: Union[np.ndarray, pd.DataFrame]):
return self.clf_.predict_proba(X)[:, 1] * self.reg_.predict(X)





37 changes: 24 additions & 13 deletions common_utils/utils_cli_parser.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

import sys
import argparse

Expand Down Expand Up @@ -32,6 +33,13 @@ def parse_args():
'Note: If --sweep is specified, --evaluate will also automatically be flagged. '
'Cannot be used with --run_type forecasting.')

parser.add_argument('-f', '--forecast',
action='store_true',
help='Flag to indicate if the model should produce predictions. '
'Note: If --sweep is specified, --forecast will also automatically be flagged. '
'Can only be used with --run_type forecasting.')


parser.add_argument('-a', '--artifact_name',
type=str,
help='Specify the name of the model artifact to be used for evaluation. '
Expand All @@ -43,27 +51,32 @@ def parse_args():
return parser.parse_args()

def validate_arguments(args):
if args.sweep:
if args.run_type != 'calibration':
print("Error: Sweep runs must have --run_type set to 'calibration'. Exiting.")
print("To fix: Use --run_type calibration when --sweep is flagged.")
sys.exit(1)

if args.run_type in ['testing', 'forecasting'] and args.sweep:
print("Error: Sweep cannot be performed with testing or forecasting run types. Exiting.")
print("To fix: Remove --sweep flag or set --run_type to 'calibration'.")
if args.sweep and args.run_type != 'calibration':
print("Error: Sweep runs must have --run_type set to 'calibration'. Exiting.")
print("To fix: Use --run_type calibration when --sweep is flagged.")
sys.exit(1)

if args.run_type == 'forecasting' and args.evaluate:
if args.evaluate and args.run_type == 'forecasting':
print("Error: Forecasting runs cannot evaluate. Exiting.")
print("To fix: Remove --evaluate flag when --run_type is 'forecasting'.")
sys.exit(1)

if args.run_type in ['calibration', 'testing'] and not args.train and not args.evaluate and not args.sweep:
if (args.run_type in ['calibration', 'testing', 'forecasting']
and not args.train and not args.evaluate and not args.forecast and not args.sweep):
print(f"Error: Run type is {args.run_type} but neither --train, --evaluate, nor --sweep flag is set. Nothing to do... Exiting.")
print("To fix: Add --train and/or --evaluate flag. Or use --sweep to run both training and evaluation in a WadnB sweep loop.")
sys.exit(1)

if args.train and args.artifact_name:
print("Error: Both --train and --artifact_name flags are set. Exiting.")
print("To fix: Remove --artifact_name if --train is set, or vice versa.")
sys.exit(1)

if args.forecast and args.run_type != 'forecasting':
print("Error: --forecast flag can only be used with --run_type forecasting. Exiting.")
print("To fix: Set --run_type to forecasting if --forecast is flagged.")
sys.exit(1)


# notes on stepshifted models:
# There will be some thinking here in regards to how we store, denote (naming convention), and retrieve the model artifacts from stepshifted models.
Expand All @@ -72,5 +85,3 @@ def validate_arguments(args):
# And the rest of the code maded in a way to handle this naming convention without any issues. Could be a simple fix.
# Alternatively, we could store the model artifacts in a subfolder for each stepshifted model. This would make it easier to handle the artifacts, but it would also make it harder to retrieve the latest artifact for a given run type.
# Lastly, the solution Xiaolong is working on might allow us the store multiple models (steps) in one artifact, which would make this whole discussion obsolete and be the best solution.


75 changes: 74 additions & 1 deletion common_utils/utils_evaluation_metrics.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@

from dataclasses import dataclass
from typing import Optional
import pandas as pd
from statistics import mean, stdev, median

import properscoring as ps
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_squared_log_error, brier_score_loss, average_precision_score, roc_auc_score
from views_forecasts.extensions import *



# MUST BE ALIGNED WITH THE METRICS WE DECIDE TO USE IN THE WORKSHOP!!!!

@dataclass
class EvaluationMetrics:
Expand Down Expand Up @@ -75,4 +85,67 @@ def evaluation_dict_to_dataframe(evaluation_dict: dict) -> pd.DataFrame:
"""
return pd.DataFrame.from_dict(evaluation_dict, orient='index')

# TBD: Align with metrics discussed in workshop
@staticmethod
def calculate_aggregate_metrics(evaluation_dict: dict) -> dict:
metrics_aggregate = {
'mean': {},
'std': {},
'median': {}
}

for metric in EvaluationMetrics.__annotations__.keys():
metric_values = [getattr(evaluation, metric) for evaluation in evaluation_dict.values() if getattr(evaluation, metric) is not None]
if metric_values:
metrics_aggregate['mean'][metric] = mean(metric_values)
metrics_aggregate['std'][metric] = stdev(metric_values)
metrics_aggregate['median'][metric] = median(metric_values)
else:
metrics_aggregate['mean'][metric] = None
metrics_aggregate['std'][metric] = None
metrics_aggregate['median'][metric] = None

return metrics_aggregate

@staticmethod
def output_metrics(evaluation_dict):
aggregate = EvaluationMetrics.calculate_aggregate_metrics(evaluation_dict)
step_metrics_dict = {step: vars(metrics) for step, metrics in evaluation_dict.items()}
step_metrics_dict['mean'] = aggregate['mean']
step_metrics_dict['std'] = aggregate['std']
step_metrics_dict['median'] = aggregate['median']
return step_metrics_dict


def generate_metric_dict(df, config):
"""
Generates a dictionary of evaluation metrics for a given forecasting configuration and dataset.

Args:
df (pd.DataFrame): A pandas DataFrame containing the forecasted values and ground truth.
config (dict): A dictionary containing the forecasting configuration parameters.

Returns:
evaluation_dict (dict): A dictionary of EvaluationMetrics instances for each forecasting step.
df_evaluation_dict (pd.DataFrame): A pandas DataFrame containing the evaluation metrics for each forecasting step.

Note:
! This function is temporary for the stepshifter model.
! Change the metrics to those discussed previously.
For logged targets, calculating MSE is actually MSLE.
KLD and Jeffreys divergence are measures used to quantify the difference between two probability distributions. Why do we calculate these metrics in the context of forecasting?
Brier score is used for binary and categorical outcomes that can be structured as true or false
There are no classes in data, so we cannot calculate roc_auc_score, ap_score
"""

evaluation_dict = EvaluationMetrics.make_evaluation_dict(steps=config.steps[-1])
for step in config.steps:
evaluation_dict[f"step{str(step).zfill(2)}"].MSE = mean_squared_error(df[config.depvar], df[f"step_pred_{step}"])
evaluation_dict[f"step{str(step).zfill(2)}"].MAE = mean_absolute_error(df[config.depvar], df[f"step_pred_{step}"])
# evaluation_dict[f"step{str(step).zfill(2)}"].MSLE = mean_squared_log_error(df[config.depvar], df[f"step_pred_{step}"])
evaluation_dict[f"step{str(step).zfill(2)}"].CRPS = ps.crps_ensemble(df[config.depvar], df[f"step_pred_{step}"]).mean()
# evaluation_dict[f"step{str(step).zfill(2)}"].Brier = brier_score_loss(df[config.depvar], df[f"step_pred_{step}"])
# evaluation_dict[f"step{str(step).zfill(2)}"].AUC = roc_auc_score(df[config.depvar], df[f"step_pred_{step}"])
# evaluation_dict[f"step{str(step).zfill(2)}"].AP = average_precision_score(df[config.depvar], df[f"step_pred_{step}"])
evaluation_dict = EvaluationMetrics.output_metrics(evaluation_dict)
df_evaluation_dict = EvaluationMetrics.evaluation_dict_to_dataframe(evaluation_dict)
return evaluation_dict, df_evaluation_dict
17 changes: 17 additions & 0 deletions common_utils/utils_input_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import numpy as np

def ensure_float64(df):
"""
Check if the DataFrame only contains np.float64 types. If not, raise a warning
and convert the DataFrame to use np.float64 for all its numeric columns.
"""

non_float64_cols = df.select_dtypes(include=['number']).columns[df.select_dtypes(include=['number']).dtypes != np.float64]

if len(non_float64_cols) > 0:
print(f"Warning: DataFrame contains non-np.float64 numeric columns. Converting the following columns: {', '.join(non_float64_cols)}")

for col in non_float64_cols:
df[col] = df[col].astype(np.float64)

return df
45 changes: 40 additions & 5 deletions common_utils/utils_model_outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@
from typing import List, Optional
import pandas as pd


# we need to figure out if we are storing logged fatalities or not
# And this is also a good place to decide on the uncertainty quantification. Right now var, but maybe HDI or something else.
# you migth also waht the a non-step specifci list of pgm? So you can rectrate the full df from here? Otherwsie this could turn into a mess

@dataclass
class ModelOutputs:
"""
Expand All @@ -17,7 +22,7 @@ class ModelOutputs:
pg_id (Optional[List[int]]): The priogrid id.
c_id (Optional[List[int]]): The country id.
month_id (Optional[List[int]]): The month id.
step (Optional[List[int]]): The step ahead forecast.
out_sample_month (Optional[List[int]]): The step ahead forecast.
"""

y_score: Optional[List[float]] = field(default_factory=list)
Expand All @@ -29,7 +34,7 @@ class ModelOutputs:
pg_id: Optional[List[int]] = field(default_factory=list)
c_id: Optional[List[int]] = field(default_factory=list)
month_id: Optional[List[int]] = field(default_factory=list)
step: Optional[List[int]] = field(default_factory=list)
out_sample_month: Optional[List[int]] = field(default_factory=list)

@classmethod
def make_output_dict(cls, steps=36) -> dict:
Expand Down Expand Up @@ -103,6 +108,36 @@ def output_dict_to_dataframe(dict_of_outputs) -> pd.DataFrame:

return df

# we need to figure out if we are storing logged fatalities or not
# And this is also a good place to decide on the uncertainty quantification. Right now var, but maybe HDI or something else.
# you might also want the a non-step specific list of pgm? So you can recreate the full df from here? Otherwise this could turn into a mess

def generate_output_dict(df, config):
"""
Generate a dictionary of ModelOutputs instances and a DataFrame from a DataFrame of model predictions.

This function takes a DataFrame of model predictions and a configuration object, and generates a dictionary of ModelOutputs instances

Args:
df (pd.DataFrame): A DataFrame containing model predictions.
config (dict): A configuration object containing model settings.

Returns:
output_dict (dict): A dictionary where each key is a step label and each value is an instance of ModelOutputs.
df_output_dict (pd.DataFrame): A DataFrame of model outputs.

Note:
! This is temporary for stepshifter model
"""
output_dict = ModelOutputs.make_output_dict(steps=config.steps[-1])
for step in config.steps:
df_step = df[[config.depvar, f"step_pred_{step}"]]
output_dict[f"step{str(step).zfill(2)}"].y_true = df_step[config.depvar].to_list()
output_dict[f"step{str(step).zfill(2)}"].y_score = df_step[f"step_pred_{step}"].to_list()
output_dict[f"step{str(step).zfill(2)}"].month_id = df_step.index.get_level_values("month_id").to_list()
if df.index.names[1] == "priogrid_gid":
output_dict[f"step{str(step).zfill(2)}"].pg_id = df_step.index.get_level_values("priogrid_gid").to_list()
elif df.index.names[1] == "country_id":
output_dict[f"step{str(step).zfill(2)}"].c_id = df_step.index.get_level_values("country_id").to_list()
output_dict[f"step{str(step).zfill(2)}"].out_sample_month = step
df_output_dict = ModelOutputs.output_dict_to_dataframe(output_dict)
df_output_dict = df_output_dict.reset_index()
df_output_dict = df_output_dict.drop(columns=df_output_dict.columns[0])
return output_dict, df_output_dict
4 changes: 2 additions & 2 deletions common_utils/views_stepshift/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,12 +132,12 @@ def future_point_predict(self, time: int, data: pd.DataFrame, keep_specific: boo
if proba:
predictions = self._models.predict_proba(
data.loc[time - self._models._steps_extent: time],
combine=True
combine=False
)
else:
predictions = self._models.predict(
data.loc[time - self._models._steps_extent: time],
combine = True
combine =False
)

if not keep_specific:
Expand Down
2 changes: 2 additions & 0 deletions common_utils/views_stepshifter_darts/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .stepshifter_darts import StepshifterModel
from darts.models import LightGBMModel, XGBModel, RandomForest
Loading