From ade69a4d4c6ca79abd1db391f601e8649ee22f37 Mon Sep 17 00:00:00 2001
From: Dylan <tacocat@Tacos-MacBook-Pro.local>
Date: Sat, 31 Jan 2026 23:26:15 +0100
Subject: [PATCH 1/3] mtd

---
 README.md                                     | 28 +++++++-
 .../evaluation/metric_calculators.py          | 68 +++++++++++++++++++
 views_evaluation/evaluation/metrics.py        |  1 +
 3 files changed, 94 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index e077671..e758e01 100644
--- a/README.md
+++ b/README.md
@@ -55,9 +55,31 @@ VIEWS Evaluation ensures **forecasting accuracy and model robustness** as the **
   * **Step-wise evaluation**: groups and evaluates predictions by the respective steps from all models.
   * **Time-series-wise evaluation**: evaluates predictions for each time-series.
   * **Month-wise evaluation**: groups and evaluates predictions at a monthly level.
-* **Support for Mulyiple Metrics**
-  * **Point Evaluation Metrics**: RMSLE, CRPS, Average Precision (Brier Score, Jeffreys Divergence, Pearson Correlation, Sinkhorn/Earth-mover Distance & pEMDiv and Variogram to be added).
-  * **Uncertainty Evaluation Metrics**: CRPS (and more to be added in the future).
+* **Support for Multiple Metrics** (see table below for details)
+
+### **Available Metrics**
+
+| Metric | Key | Description | Available | Supports Distributions |
+|--------|-----|-------------|:---------:|:----------------------:|
+| Mean Squared Error | `MSE` | Average of squared differences between predictions and actuals | ✅ | ❌ |
+| Mean Squared Log Error | `MSLE` | MSE computed on log-transformed values | ✅ | ❌ |
+| Root Mean Squared Log Error | `RMSLE` | Square root of MSLE | ✅ | ❌ |
+| Mean Tweedie Deviance | `MTD` | Tweedie deviance with power=1.5, ideal for zero-inflated data | ✅ | ❌ |
+| Average Precision | `AP` | Area under precision-recall curve for binary classification | ✅ | ❌ |
+| Pearson Correlation | `Pearson` | Linear correlation between predictions and actuals | ✅ | ❌ |
+| Earth Mover's Distance | `EMD` | Wasserstein distance between predicted and actual distributions | ✅ | ✅ |
+| Continuous Ranked Probability Score | `CRPS` | Measures calibration and sharpness of probabilistic forecasts | ✅ | ✅ |
+| Mean Interval Score | `MIS` | Evaluates prediction interval width and coverage | ✅ | ✅ |
+| Ignorance Score | `Ignorance` | Logarithmic scoring rule for probabilistic predictions | ✅ | ✅ |
+| Coverage | `Coverage` | Proportion of actuals falling within prediction intervals | ✅ | ✅ |
+| Mean Prediction | `y_hat_bar` | Average of all predicted values | ✅ | ✅ |
+| Sinkhorn Distance | `SD` | Regularized optimal transport distance | ❌ | ✅ |
+| pseudo-Earth Mover Divergence | `pEMDiv` | Efficient EMD approximation | ❌ | ✅ |
+| Variogram | `Variogram` | Spatial/temporal correlation structure score | ❌ | ❌ |
+| Brier Score | `Brier` | Accuracy of probabilistic predictions | ❌ | ✅ |
+| Jeffreys Divergence | `Jeffreys` | Symmetric measure of distribution difference | ❌ | ✅ |
+
+> **Note:** Metrics marked with ✅ in "Supports Distributions" can be used for uncertainty evaluation with ensemble/sample-based predictions.
 * **Data Integrity Checks**: Ensures that input DataFrames conform to expected structures before evaluation based on point and uncertainty evaluation.
 * **Automatic Index Matching**: Aligns actual and predicted values based on MultiIndex structures.
 * **Planned Enhancements**: 
diff --git a/views_evaluation/evaluation/metric_calculators.py b/views_evaluation/evaluation/metric_calculators.py
index 1bc7523..c877262 100644
--- a/views_evaluation/evaluation/metric_calculators.py
+++ b/views_evaluation/evaluation/metric_calculators.py
@@ -7,6 +7,7 @@
     mean_squared_error,
     mean_squared_log_error,
     average_precision_score,
+    mean_tweedie_deviance,
 )
 from scipy.stats import wasserstein_distance, pearsonr
 
@@ -408,6 +409,72 @@ def _calculate_ignorance_score(predictions, observed, n, all_bins):
     return np.mean(scores)
 
 
+def calculate_mtd(
+    matched_actual: pd.DataFrame, matched_pred: pd.DataFrame, target: str, power: float = 1.5
+) -> float:
+    """
+    Calculate Mean Tweedie Deviance (MTD) between actual and predicted values.
+
+    The Tweedie deviance is a family of loss functions parameterized by a power parameter `p`.
+    It generalizes several common loss functions:
+        - p = 0: Equivalent to Mean Squared Error (Gaussian distribution)
+        - p = 1: Equivalent to Poisson deviance (count data)
+        - p = 2: Equivalent to Gamma deviance (positive continuous data)
+        - 1 < p < 2: Compound Poisson-Gamma distribution (zero-inflated positive continuous data)
+
+    With the default power of 1.5 (compound Poisson-Gamma), this metric is particularly
+    well-suited for conflict forecasting data which typically exhibits:
+        - Right-skewness (many small values, few large values)
+        - Zero-inflation (many observations with zero fatalities)
+        - Non-negative continuous outcomes
+
+    The Tweedie deviance for a single observation is defined as:
+        d(y, μ) = 2 * (y^(2-p)/((1-p)*(2-p)) - y*μ^(1-p)/(1-p) + μ^(2-p)/(2-p))
+    where y is the actual value and μ is the predicted value.
+
+    Lower values indicate better model performance.
+
+    Args:
+        matched_actual (pd.DataFrame): DataFrame containing actual values with the target column.
+            The target column should contain numpy arrays or lists of actual observations.
+        matched_pred (pd.DataFrame): DataFrame containing predictions with the `pred_{target}` column.
+            The prediction column should contain numpy arrays or lists of predicted values.
+        target (str): The target column name (without the 'pred_' prefix).
+        power (float): The power parameter for the Tweedie distribution. Must be in range
+            [0, 1) or >= 1. Default is 1.5, which corresponds to the compound Poisson-Gamma
+            distribution, ideal for zero-inflated positive continuous data.
+
+    Returns:
+        float: The Mean Tweedie Deviance score. Lower values indicate better predictions.
+
+    Raises:
+        ValueError: If predictions contain negative values when power > 0, or if
+            actual values are negative when power >= 1.
+
+    Example:
+        >>> mtd_score = calculate_mtd(actual_df, pred_df, "ln_sb_best")
+        >>> print(f"Mean Tweedie Deviance: {mtd_score:.4f}")
+
+    References:
+        - Jørgensen, B. (1987). Exponential Dispersion Models. 
+          Journal of the Royal Statistical Society. Series B, 49(2), 127-162.
+        - Tweedie, M. C. K. (1984). An index which distinguishes between some important
+          exponential families. Statistics: Applications and New Directions.
+
+    See Also:
+        - sklearn.metrics.mean_tweedie_deviance: The underlying implementation.
+        - calculate_mse: Mean Squared Error (equivalent to MTD with power=0).
+    """
+    actual_values = np.concatenate(matched_actual[target].values)
+    pred_values = np.concatenate(matched_pred[f"pred_{target}"].values)
+
+    actual_expanded = np.repeat(
+        actual_values, [len(x) for x in matched_pred[f"pred_{target}"]]
+    )
+
+    return mean_tweedie_deviance(actual_expanded, pred_values, power=power)
+
+
 def calculate_mean_prediction(
     matched_actual: pd.DataFrame, matched_pred: pd.DataFrame, target: str
 ) -> float:
@@ -428,6 +495,7 @@ def calculate_mean_prediction(
     "pEMDiv": calculate_pEMDiv,
     "Pearson": calculate_pearson,
     "Variogram": calculate_variogram,
+    "MTD": calculate_mtd,
     "y_hat_bar": calculate_mean_prediction,
 }
 
diff --git a/views_evaluation/evaluation/metrics.py b/views_evaluation/evaluation/metrics.py
index a7dcf33..ba8ebd1 100644
--- a/views_evaluation/evaluation/metrics.py
+++ b/views_evaluation/evaluation/metrics.py
@@ -128,6 +128,7 @@ class PointEvaluationMetrics(BaseEvaluationMetrics):
     pEMDiv: Optional[float] = None
     Pearson: Optional[float] = None
     Variogram: Optional[float] = None
+    MTD: Optional[float] = None
     y_hat_bar: Optional[float] = None
 
   

From 27f5a7b255cb536d018be03cc9e7b33293ac568a Mon Sep 17 00:00:00 2001
From: Dylan <tacocat@Tacos-MacBook-Pro.local>
Date: Sat, 31 Jan 2026 23:28:56 +0100
Subject: [PATCH 2/3] rem refs

---
 views_evaluation/evaluation/metric_calculators.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/views_evaluation/evaluation/metric_calculators.py b/views_evaluation/evaluation/metric_calculators.py
index c877262..28ba5cb 100644
--- a/views_evaluation/evaluation/metric_calculators.py
+++ b/views_evaluation/evaluation/metric_calculators.py
@@ -455,12 +455,6 @@ def calculate_mtd(
         >>> mtd_score = calculate_mtd(actual_df, pred_df, "ln_sb_best")
         >>> print(f"Mean Tweedie Deviance: {mtd_score:.4f}")
 
-    References:
-        - Jørgensen, B. (1987). Exponential Dispersion Models. 
-          Journal of the Royal Statistical Society. Series B, 49(2), 127-162.
-        - Tweedie, M. C. K. (1984). An index which distinguishes between some important
-          exponential families. Statistics: Applications and New Directions.
-
     See Also:
         - sklearn.metrics.mean_tweedie_deviance: The underlying implementation.
         - calculate_mse: Mean Squared Error (equivalent to MTD with power=0).

From e8c65f7ee2e3637260a30bdec8bb0ef69ec1e62a Mon Sep 17 00:00:00 2001
From: Dylan <52908667+smellycloud@users.noreply.github.com>
Date: Tue, 3 Feb 2026 15:00:38 +0100
Subject: [PATCH 3/3] tests

---
 tests/test_metric_calculators.py | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/tests/test_metric_calculators.py b/tests/test_metric_calculators.py
index 31872a2..bd5321b 100644
--- a/tests/test_metric_calculators.py
+++ b/tests/test_metric_calculators.py
@@ -11,6 +11,7 @@
     calculate_coverage,
     calculate_ignorance_score,
     calculate_mean_interval_score,
+    calculate_mtd,
     POINT_METRIC_FUNCTIONS,
     UNCERTAINTY_METRIC_FUNCTIONS,
 )
@@ -94,6 +95,28 @@ def test_calculate_pearson(sample_data):
     assert -1 <= result <= 1
 
 
+def test_calculate_mtd(sample_data):
+    """Test Mean Tweedie Deviance calculation."""
+    actual, pred = sample_data
+    result = calculate_mtd(actual, pred, 'target')
+    assert isinstance(result, float)
+    assert result >= 0
+
+
+def test_calculate_mtd_with_power(sample_data):
+    """Test Mean Tweedie Deviance calculation with different power values."""
+    actual, pred = sample_data
+    # Test with power=1.5 (compound Poisson-Gamma)
+    result_15 = calculate_mtd(actual, pred, 'target', power=1.5)
+    assert isinstance(result_15, float)
+    assert result_15 >= 0
+    
+    # Test with power=2 (Gamma)
+    result_2 = calculate_mtd(actual, pred, 'target', power=2.0)
+    assert isinstance(result_2, float)
+    assert result_2 >= 0
+
+
 def test_calculate_coverage_uncertainty(sample_uncertainty_data):
     """Test Coverage calculation."""
     actual, pred = sample_uncertainty_data
@@ -121,7 +144,7 @@ def test_calculate_mis_uncertainty(sample_uncertainty_data):
 def test_point_metric_functions():
     """Test that all point metric functions are available."""
     expected_metrics = [
-        "RMSLE", "CRPS", "AP", "EMD", "SD", "pEMDiv", "Pearson", "Variogram"
+        "MSE", "MSLE", "RMSLE", "CRPS", "AP", "EMD", "SD", "pEMDiv", "Pearson", "Variogram", "MTD", "y_hat_bar"
     ]
     
     for metric in expected_metrics: