views-platform · smellycloud · Feb 3, 2026 · Sep 11, 2025 · Jan 31, 2026 · Jan 31, 2026
diff --git a/README.md b/README.md
@@ -55,9 +55,31 @@ VIEWS Evaluation ensures **forecasting accuracy and model robustness** as the **
   * **Step-wise evaluation**: groups and evaluates predictions by the respective steps from all models.
   * **Time-series-wise evaluation**: evaluates predictions for each time-series.
   * **Month-wise evaluation**: groups and evaluates predictions at a monthly level.
-* **Support for Mulyiple Metrics**
-  * **Point Evaluation Metrics**: RMSLE, CRPS, Average Precision (Brier Score, Jeffreys Divergence, Pearson Correlation, Sinkhorn/Earth-mover Distance & pEMDiv and Variogram to be added).
-  * **Uncertainty Evaluation Metrics**: CRPS (and more to be added in the future).
+* **Support for Multiple Metrics** (see table below for details)
+
+### **Available Metrics**
+
+| Metric | Key | Description | Available | Supports Distributions |
+|--------|-----|-------------|:---------:|:----------------------:|
+| Mean Squared Error | `MSE` | Average of squared differences between predictions and actuals | ✅ | ❌ |
+| Mean Squared Log Error | `MSLE` | MSE computed on log-transformed values | ✅ | ❌ |
+| Root Mean Squared Log Error | `RMSLE` | Square root of MSLE | ✅ | ❌ |
+| Mean Tweedie Deviance | `MTD` | Tweedie deviance with power=1.5, ideal for zero-inflated data | ✅ | ❌ |
+| Average Precision | `AP` | Area under precision-recall curve for binary classification | ✅ | ❌ |
+| Pearson Correlation | `Pearson` | Linear correlation between predictions and actuals | ✅ | ❌ |
+| Earth Mover's Distance | `EMD` | Wasserstein distance between predicted and actual distributions | ✅ | ✅ |
+| Continuous Ranked Probability Score | `CRPS` | Measures calibration and sharpness of probabilistic forecasts | ✅ | ✅ |
+| Mean Interval Score | `MIS` | Evaluates prediction interval width and coverage | ✅ | ✅ |
+| Ignorance Score | `Ignorance` | Logarithmic scoring rule for probabilistic predictions | ✅ | ✅ |
+| Coverage | `Coverage` | Proportion of actuals falling within prediction intervals | ✅ | ✅ |
+| Mean Prediction | `y_hat_bar` | Average of all predicted values | ✅ | ✅ |
+| Sinkhorn Distance | `SD` | Regularized optimal transport distance | ❌ | ✅ |
+| pseudo-Earth Mover Divergence | `pEMDiv` | Efficient EMD approximation | ❌ | ✅ |
+| Variogram | `Variogram` | Spatial/temporal correlation structure score | ❌ | ❌ |
+| Brier Score | `Brier` | Accuracy of probabilistic predictions | ❌ | ✅ |
+| Jeffreys Divergence | `Jeffreys` | Symmetric measure of distribution difference | ❌ | ✅ |
+
+> **Note:** Metrics marked with ✅ in "Supports Distributions" can be used for uncertainty evaluation with ensemble/sample-based predictions.
 * **Data Integrity Checks**: Ensures that input DataFrames conform to expected structures before evaluation based on point and uncertainty evaluation.
 * **Automatic Index Matching**: Aligns actual and predicted values based on MultiIndex structures.
 * **Planned Enhancements**: 

diff --git a/tests/test_metric_calculators.py b/tests/test_metric_calculators.py
@@ -11,6 +11,7 @@
     calculate_coverage,
     calculate_ignorance_score,
     calculate_mean_interval_score,
+    calculate_mtd,
     POINT_METRIC_FUNCTIONS,
     UNCERTAINTY_METRIC_FUNCTIONS,
 )
@@ -94,6 +95,28 @@ def test_calculate_pearson(sample_data):
     assert -1 <= result <= 1
 
 
+def test_calculate_mtd(sample_data):
+    """Test Mean Tweedie Deviance calculation."""
+    actual, pred = sample_data
+    result = calculate_mtd(actual, pred, 'target')
+    assert isinstance(result, float)
+    assert result >= 0
+
+
+def test_calculate_mtd_with_power(sample_data):
+    """Test Mean Tweedie Deviance calculation with different power values."""
+    actual, pred = sample_data
+    # Test with power=1.5 (compound Poisson-Gamma)
+    result_15 = calculate_mtd(actual, pred, 'target', power=1.5)
+    assert isinstance(result_15, float)
+    assert result_15 >= 0
+
+    # Test with power=2 (Gamma)
+    result_2 = calculate_mtd(actual, pred, 'target', power=2.0)
+    assert isinstance(result_2, float)
+    assert result_2 >= 0
+
+
 def test_calculate_coverage_uncertainty(sample_uncertainty_data):
     """Test Coverage calculation."""
     actual, pred = sample_uncertainty_data
@@ -121,7 +144,7 @@ def test_calculate_mis_uncertainty(sample_uncertainty_data):
 def test_point_metric_functions():
     """Test that all point metric functions are available."""
     expected_metrics = [
-        "RMSLE", "CRPS", "AP", "EMD", "SD", "pEMDiv", "Pearson", "Variogram"
+        "MSE", "MSLE", "RMSLE", "CRPS", "AP", "EMD", "SD", "pEMDiv", "Pearson", "Variogram", "MTD", "y_hat_bar"
     ]
 
     for metric in expected_metrics:

diff --git a/views_evaluation/evaluation/metric_calculators.py b/views_evaluation/evaluation/metric_calculators.py
@@ -7,6 +7,7 @@
     mean_squared_error,
     mean_squared_log_error,
     average_precision_score,
+    mean_tweedie_deviance,
 )
 from scipy.stats import wasserstein_distance, pearsonr
 
@@ -408,6 +409,66 @@ def _calculate_ignorance_score(predictions, observed, n, all_bins):
     return np.mean(scores)
 
 
+def calculate_mtd(
+    matched_actual: pd.DataFrame, matched_pred: pd.DataFrame, target: str, power: float = 1.5
+) -> float:
+    """
+    Calculate Mean Tweedie Deviance (MTD) between actual and predicted values.
+
+    The Tweedie deviance is a family of loss functions parameterized by a power parameter `p`.
+    It generalizes several common loss functions:
+        - p = 0: Equivalent to Mean Squared Error (Gaussian distribution)
+        - p = 1: Equivalent to Poisson deviance (count data)
+        - p = 2: Equivalent to Gamma deviance (positive continuous data)
+        - 1 < p < 2: Compound Poisson-Gamma distribution (zero-inflated positive continuous data)
+
+    With the default power of 1.5 (compound Poisson-Gamma), this metric is particularly
+    well-suited for conflict forecasting data which typically exhibits:
+        - Right-skewness (many small values, few large values)
+        - Zero-inflation (many observations with zero fatalities)
+        - Non-negative continuous outcomes
+
+    The Tweedie deviance for a single observation is defined as:
+        d(y, μ) = 2 * (y^(2-p)/((1-p)*(2-p)) - y*μ^(1-p)/(1-p) + μ^(2-p)/(2-p))
+    where y is the actual value and μ is the predicted value.
+
+    Lower values indicate better model performance.
+
+    Args:
+        matched_actual (pd.DataFrame): DataFrame containing actual values with the target column.
+            The target column should contain numpy arrays or lists of actual observations.
+        matched_pred (pd.DataFrame): DataFrame containing predictions with the `pred_{target}` column.
+            The prediction column should contain numpy arrays or lists of predicted values.
+        target (str): The target column name (without the 'pred_' prefix).
+        power (float): The power parameter for the Tweedie distribution. Must be in range
+            [0, 1) or >= 1. Default is 1.5, which corresponds to the compound Poisson-Gamma
+            distribution, ideal for zero-inflated positive continuous data.
+
+    Returns:
+        float: The Mean Tweedie Deviance score. Lower values indicate better predictions.
+
+    Raises:
+        ValueError: If predictions contain negative values when power > 0, or if
+            actual values are negative when power >= 1.
+
+    Example:
+        >>> mtd_score = calculate_mtd(actual_df, pred_df, "ln_sb_best")
+        >>> print(f"Mean Tweedie Deviance: {mtd_score:.4f}")
+
+    See Also:
+        - sklearn.metrics.mean_tweedie_deviance: The underlying implementation.
+        - calculate_mse: Mean Squared Error (equivalent to MTD with power=0).
+    """
+    actual_values = np.concatenate(matched_actual[target].values)
+    pred_values = np.concatenate(matched_pred[f"pred_{target}"].values)
+
+    actual_expanded = np.repeat(
+        actual_values, [len(x) for x in matched_pred[f"pred_{target}"]]
+    )
+
+    return mean_tweedie_deviance(actual_expanded, pred_values, power=power)
+
+
 def calculate_mean_prediction(
     matched_actual: pd.DataFrame, matched_pred: pd.DataFrame, target: str
 ) -> float:
@@ -428,6 +489,7 @@ def calculate_mean_prediction(
     "pEMDiv": calculate_pEMDiv,
     "Pearson": calculate_pearson,
     "Variogram": calculate_variogram,
+    "MTD": calculate_mtd,
     "y_hat_bar": calculate_mean_prediction,
 }
 

diff --git a/views_evaluation/evaluation/metrics.py b/views_evaluation/evaluation/metrics.py
@@ -128,6 +128,7 @@ class PointEvaluationMetrics(BaseEvaluationMetrics):
     pEMDiv: Optional[float] = None
     Pearson: Optional[float] = None
     Variogram: Optional[float] = None
+    MTD: Optional[float] = None
     y_hat_bar: Optional[float] = None