brightbandtech · rodrigoalmeida94 · Jan 13, 2026 · Jan 14, 2026
diff --git a/src/extremeweatherbench/metrics.py b/src/extremeweatherbench/metrics.py
@@ -658,6 +658,57 @@ def _compute_metric(
         return transformed.accuracy()
 
 
+class ReceiverOperatingCharacteristic(ThresholdMetric):
+    """Receiver Operating Characteristic metric."""
+
+    def __init__(self, name: str = "ReceiverOperatingCharacteristic", *args, **kwargs):
+        super().__init__(name, *args, **kwargs)
+
+    def _compute_metric(
+        self,
+        forecast: xr.DataArray,
+        target: xr.DataArray,
+        **kwargs: Any,
+    ) -> Any:
+        preserve_dims = kwargs.get("preserve_dims", self.preserve_dims)
+        op_func = utils.maybe_get_operator(kwargs.get("op_func", operator.ge))
+
+        # Binarize forecast and target using thresholds
+        binary_forecast = utils.maybe_densify_dataarray(
+            op_func(forecast, kwargs.get("forecast_threshold", self.forecast_threshold))
+        ).astype(float)
+        binary_target = utils.maybe_densify_dataarray(
+            op_func(target, kwargs.get("target_threshold", self.target_threshold))
+        ).astype(float)
+
+        return scores.probability.roc_curve_data(
+            binary_forecast,
+            binary_target,
+            thresholds="auto",
+            preserve_dims=preserve_dims,
+            weights=None,
+        )
+
+
+class ReceiverOperatingCharacteristicSkillScore(ReceiverOperatingCharacteristic):
+    """Receiver Operating Characteristic Skill Score metric."""
+
+    def __init__(
+        self, name: str = "ReceiverOperatingCharacteristicSkillScore", *args, **kwargs
+    ):
+        super().__init__(name, *args, **kwargs)
+
+    def _compute_metric(
+        self,
+        forecast: xr.DataArray,
+        target: xr.DataArray,
+        auc_reference: float = 0.5,
+        **kwargs: Any,
+    ) -> Any:
+        roc_curve_data = super()._compute_metric(forecast, target, **kwargs)
+        return (roc_curve_data["AUC"] - auc_reference) / (1 - auc_reference)
+
+
 class MeanSquaredError(BaseMetric):
     """Mean Squared Error metric.
 

diff --git a/tests/test_metrics.py b/tests/test_metrics.py
@@ -250,6 +250,15 @@ def test_accuracy_threshold_metric(self):
         assert acc_metric.forecast_threshold == 15000
         assert acc_metric.target_threshold == 0.3
 
+    def test_roc_threshold_metric(self):
+        """Test ROC threshold metric instantiation and properties."""
+        roc_metric = metrics.ReceiverOperatingCharacteristic(
+            forecast_threshold=15000, target_threshold=0.3
+        )
+        assert isinstance(roc_metric, metrics.ThresholdMetric)
+        assert roc_metric.forecast_threshold == 15000
+        assert roc_metric.target_threshold == 0.3
+
     def test_threshold_metric_instance_interface(self):
         """Test that instance callable interface works."""
         # Create test data
@@ -1979,6 +1988,90 @@ def test_compute_metric(self):
         assert isinstance(result, xr.DataArray)
 
 
+class TestROCSS:
+    """Tests for the ROCSS metric."""
+
+    def test_instantiation(self):
+        """Test that ROCSS can be instantiated."""
+        metric = metrics.ReceiverOperatingCharacteristicSkillScore()
+        assert isinstance(metric, metrics.ReceiverOperatingCharacteristic)
+
+    def test_compute_metric(self):
+        """Test ROCSS computation."""
+        metric = metrics.ReceiverOperatingCharacteristicSkillScore(
+            forecast_threshold=0.5, target_threshold=0.5
+        )
+
+        forecast = xr.DataArray(
+            data=[0.8, 0.3, 0.7, 0.2],
+            dims=["lead_time"],
+            coords={"lead_time": [0, 1, 2, 3]},
+        )
+        target = xr.DataArray(
+            data=[0.9, 0.1, 0.8, 0.6],
+            dims=["lead_time"],
+            coords={"lead_time": [0, 1, 2, 3]},
+        )
+
+        result = metric._compute_metric(forecast, target)
+        assert isinstance(result, xr.DataArray)
+
+    def test_skill_score_zero_when_auc_matches_reference(self):
+        """ROCSS should be zero when AUC equals the reference value."""
+        metric = metrics.ReceiverOperatingCharacteristicSkillScore(
+            forecast_threshold=0.5, target_threshold=0.5, preserve_dims=None
+        )
+
+        forecast = xr.DataArray(
+            data=[0.8, 0.3, 0.7, 0.2],
+            dims=["sample"],
+            coords={"sample": [0, 1, 2, 3]},
+        )
+        target = xr.DataArray(
+            data=[0.9, 0.1, 0.8, 0.6],
+            dims=["sample"],
+            coords={"sample": [0, 1, 2, 3]},
+        )
+
+        roc_metric = metrics.ReceiverOperatingCharacteristic(
+            forecast_threshold=0.5, target_threshold=0.5, preserve_dims=None
+        )
+        roc_curve_data = roc_metric._compute_metric(forecast, target)
+        auc = roc_curve_data["AUC"]
+
+        auc_reference = float(auc)
+        result = metric._compute_metric(forecast, target, auc_reference=auc_reference)
+
+        xr.testing.assert_allclose(result, xr.zeros_like(auc))
+
+    def test_skill_score_scales_auc_above_reference(self):
+        """ROCSS scales the AUC improvement over the reference."""
+        forecast = xr.DataArray(
+            data=[0.9, 0.7, 0.6, 0.2],
+            dims=["sample"],
+            coords={"sample": [0, 1, 2, 3]},
+        )
+        target = xr.DataArray(
+            data=[0.8, 0.4, 0.9, 0.3],
+            dims=["sample"],
+            coords={"sample": [0, 1, 2, 3]},
+        )
+
+        roc_metric = metrics.ReceiverOperatingCharacteristic(
+            forecast_threshold=0.6, target_threshold=0.5, preserve_dims=None
+        )
+        roc_curve_data = roc_metric._compute_metric(forecast, target)
+        auc = roc_curve_data["AUC"]
+
+        metric = metrics.ReceiverOperatingCharacteristicSkillScore(
+            forecast_threshold=0.6, target_threshold=0.5, preserve_dims=None
+        )
+        result = metric._compute_metric(forecast, target, auc_reference=0.5)
+
+        expected = (auc - 0.5) / (1 - 0.5)
+        xr.testing.assert_allclose(result, expected)
+
+
 class TestMetricIntegration:
     """Integration tests for metric classes."""