boyercb
diff --git a/‎R/cf_calibration.R‎
Lines changed: 86 additions & 31 deletions b/‎R/cf_calibration.R‎
Lines changed: 86 additions & 31 deletions
diff --git a/‎man/cf_calibration.Rd‎
Lines changed: 33 additions & 8 deletions b/‎man/cf_calibration.Rd‎
Lines changed: 33 additions & 8 deletions
diff --git a/‎man/cfperformance-package.Rd‎
Lines changed: 2 additions & 0 deletions b/‎man/cfperformance-package.Rd‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎tests/testthat/test-cf_calibration.R‎
Lines changed: 95 additions & 0 deletions b/‎tests/testthat/test-cf_calibration.R‎
Lines changed: 95 additions & 0 deletions
@@ -22,14 +22,29 @@
 #' @details
 #' The counterfactual calibration curve estimates the relationship between
 #' predicted risk and observed risk under the counterfactual intervention.
-#' This is done by applying inverse probability weights to the calibration
-#' curve estimation.
+#'
+#' The function implements three estimators:
+#'
+#' **IPW Estimator**: Weights observations by the inverse probability of
+#' receiving the counterfactual treatment. Requires a correctly specified
+#' propensity score model.
+#'
+#' **Conditional Loss (CL) Estimator**: Uses the fitted outcome model
+#' E[Y|X, A=a] to estimate calibration over all observations. Requires a
+#' correctly specified outcome model.
+#'
+#' **Doubly Robust (DR) Estimator**: Combines CL and IPW approaches. Consistent
+#' if either the propensity or outcome model is correctly specified.
 #'
 #' @references
 #' Boyer, C. B., Dahabreh, I. J., & Steingrimsson, J. A. (2025).
 #' "Estimating and evaluating counterfactual prediction models."
 #' *Statistics in Medicine*, 44(23-24), e70287. \doi{10.1002/sim.70287}
 #'
+#' Steingrimsson, J. A., Gatsonis, C., Li, B., & Dahabreh, I. J. (2023).
+#' "Transporting a prediction model for use in a new target population."
+#' *American Journal of Epidemiology*, 192(2), 296-304.
+#'
 #' @seealso [cf_mse()], [cf_auc()], [plot.cf_calibration()]
 #'
 #' @export
@@ -43,22 +58,32 @@
 #' y <- rbinom(n, 1, plogis(-1 + x - 0.5 * a))
 #' pred <- plogis(-1 + 0.8 * x)
 #'
-#' # Estimate counterfactual calibration curve
-#' result <- cf_calibration(
+#' # Estimate counterfactual calibration curve with different estimators
+#' result_ipw <- cf_calibration(
+#'   predictions = pred,
+#'   outcomes = y,
+#'   treatment = a,
+#'   covariates = data.frame(x = x),
+#'   treatment_level = 0,
+#'   estimator = "ipw"
+#' )
+#'
+#' result_dr <- cf_calibration(
 #'   predictions = pred,
 #'   outcomes = y,
 #'   treatment = a,
 #'   covariates = data.frame(x = x),
-#'   treatment_level = 0
+#'   treatment_level = 0,
+#'   estimator = "dr"
 #' )
-#' print(result)
-#' # plot(result)  # If ggplot2 is available
+#' print(result_dr)
+#' # plot(result_dr)  # If ggplot2 is available
 cf_calibration <- function(predictions,
                            outcomes,
                            treatment,
                            covariates,
                            treatment_level = 0,
-                           estimator = c("ipw", "cl"),
+                           estimator = c("dr", "ipw", "cl"),
                            propensity_model = NULL,
                            outcome_model = NULL,
                            smoother = c("loess", "binned"),
@@ -78,47 +103,76 @@ cf_calibration <- function(predictions,
 
   n <- length(outcomes)
 
-  # Fit propensity model if needed for IPW
-  if (estimator == "ipw" && is.null(propensity_model)) {
-    ps_data <- cbind(A = treatment, as.data.frame(covariates))
+  # Convert covariates to data frame if needed
+  if (!is.data.frame(covariates)) {
+    covariates <- as.data.frame(covariates)
+  }
+
+  # Fit propensity model if needed for IPW or DR
+  if (estimator %in% c("ipw", "dr") && is.null(propensity_model)) {
+    ps_data <- cbind(A = treatment, covariates)
     propensity_model <- glm(A ~ ., data = ps_data, family = binomial())
   }
 
+  # Fit outcome model if needed for CL or DR
+  if (estimator %in% c("cl", "dr") && is.null(outcome_model)) {
+    subset_idx <- treatment == treatment_level
+    outcome_data <- cbind(Y = outcomes, covariates)[subset_idx, ]
+    outcome_model <- glm(Y ~ ., data = outcome_data, family = binomial())
+  }
+
   # Get propensity scores
-  if (estimator == "ipw") {
-    ps <- .predict_nuisance(propensity_model, as.data.frame(covariates), type = "response")
+  if (estimator %in% c("ipw", "dr")) {
+    ps <- .predict_nuisance(propensity_model, covariates, type = "response")
     if (treatment_level == 0) {
       ps <- 1 - ps
     }
+    # Truncate extreme propensities for stability
+    ps <- pmax(pmin(ps, 0.975), 0.025)
+  }
+
+  # Get outcome model predictions E[Y|X, A=a]
+  if (estimator %in% c("cl", "dr")) {
+    mu_hat <- .predict_nuisance(outcome_model, covariates, type = "response")
   }
 
   # Indicator for treatment level
-  I_a <- treatment == treatment_level
+  I_a <- as.numeric(treatment == treatment_level)
 
-  # Compute weights for IPW calibration
+  # Compute pseudo-outcomes based on estimator
   if (estimator == "ipw") {
-    weights <- rep(0, n)
-    weights[I_a] <- 1 / ps[I_a]
+    # IPW: weight observations in treatment group
+    # Use only observations with A = a
+    pred_use <- predictions[I_a == 1]
+    pseudo_outcomes <- outcomes[I_a == 1]
+    weights <- 1 / ps[I_a == 1]
     # Normalize weights
-    weights <- weights / sum(weights) * sum(I_a)
-  } else {
+    weights <- weights / mean(weights)
+
+  } else if (estimator == "cl") {
+    # CL: use outcome model predictions for all observations
+    pred_use <- predictions
+    pseudo_outcomes <- mu_hat
     weights <- rep(1, n)
-  }
 
-  # Subset to counterfactual treatment group
-  pred_sub <- predictions[I_a]
-  out_sub <- outcomes[I_a]
-  w_sub <- weights[I_a]
+  } else if (estimator == "dr") {
+    # DR: augmented IPW over all observations
+    # Pseudo-outcome: mu_hat + I(A=a)/ps * (Y - mu_hat)
+    pred_use <- predictions
+    augmentation <- I_a / ps * (outcomes - mu_hat)
+    pseudo_outcomes <- mu_hat + augmentation
+    weights <- rep(1, n)
+  }
 
   # Compute calibration curve
   if (smoother == "loess") {
-    fit <- loess(out_sub ~ pred_sub, weights = w_sub, span = span)
-    predicted <- sort(unique(pred_sub))
+    fit <- loess(pseudo_outcomes ~ pred_use, weights = weights, span = span)
+    predicted <- sort(unique(pred_use))
     observed <- predict(fit, newdata = predicted)
   } else if (smoother == "binned") {
-    bins <- cut(pred_sub, breaks = n_bins, include.lowest = TRUE)
-    predicted <- tapply(pred_sub, bins, mean)
-    observed <- tapply(out_sub * w_sub, bins, sum) / tapply(w_sub, bins, sum)
+    bins <- cut(pred_use, breaks = n_bins, include.lowest = TRUE)
+    predicted <- tapply(pred_use, bins, mean)
+    observed <- tapply(pseudo_outcomes * weights, bins, sum) / tapply(weights, bins, sum)
   }
 
   # Compute calibration metrics
@@ -131,17 +185,18 @@ cf_calibration <- function(predictions,
   result <- list(
     predicted = predicted,
     observed = observed,
-    weights = w_sub,
+    weights = weights,
     smoother = smoother,
     estimator = estimator,
     metric = "calibration",
     treatment_level = treatment_level,
-    n_obs = sum(I_a),
+    n_obs = if (estimator == "ipw") sum(I_a) else n,
     ici = ici,
     e50 = e50,
     e90 = e90,
     emax = emax,
     propensity_model = propensity_model,
+    outcome_model = outcome_model,
     call = match.call()
   )
 
 
@@ -85,3 +85,98 @@ test_that("cf_calibration requires binary outcomes", {
     "binary"
   )
 })
+
+
+test_that("cf_calibration CL estimator works", {
+  set.seed(42)
+  n <- 300
+  x <- rnorm(n)
+  a <- rbinom(n, 1, plogis(-0.5 + 0.5 * x))
+  y <- rbinom(n, 1, plogis(-1 + x - 0.3 * a))
+  pred <- plogis(-1 + 0.9 * x)
+
+  result <- cf_calibration(
+    predictions = pred,
+    outcomes = y,
+    treatment = a,
+    covariates = data.frame(x = x),
+    treatment_level = 0,
+    estimator = "cl"
+  )
+
+  expect_s3_class(result, "cf_calibration")
+  expect_equal(result$estimator, "cl")
+  expect_equal(result$n_obs, n)  # CL uses all observations
+  expect_true(result$ici >= 0)
+})
+
+
+test_that("cf_calibration DR estimator works", {
+  set.seed(42)
+  n <- 300
+  x <- rnorm(n)
+  a <- rbinom(n, 1, plogis(-0.5 + 0.5 * x))
+  y <- rbinom(n, 1, plogis(-1 + x - 0.3 * a))
+  pred <- plogis(-1 + 0.9 * x)
+
+  result <- cf_calibration(
+    predictions = pred,
+    outcomes = y,
+    treatment = a,
+    covariates = data.frame(x = x),
+    treatment_level = 0,
+    estimator = "dr"
+  )
+
+  expect_s3_class(result, "cf_calibration")
+  expect_equal(result$estimator, "dr")
+  expect_equal(result$n_obs, n)  # DR uses all observations
+  expect_true(result$ici >= 0)
+  expect_true(!is.null(result$propensity_model))
+  expect_true(!is.null(result$outcome_model))
+})
+
+
+test_that("cf_calibration DR is default estimator", {
+  set.seed(42)
+  n <- 200
+  x <- rnorm(n)
+  a <- rbinom(n, 1, 0.5)
+  y <- rbinom(n, 1, plogis(-1 + x))
+  pred <- plogis(-1 + 0.9 * x)
+
+  result <- cf_calibration(
+    predictions = pred,
+    outcomes = y,
+    treatment = a,
+    covariates = data.frame(x = x),
+    treatment_level = 0
+  )
+
+  expect_equal(result$estimator, "dr")
+})
+
+
+test_that("cf_calibration all three estimators produce reasonable results", {
+  set.seed(123)
+  n <- 500
+  x <- rnorm(n)
+  a <- rbinom(n, 1, plogis(-0.5 + 0.5 * x))
+  y <- rbinom(n, 1, plogis(-1 + x - 0.5 * a))
+  pred <- plogis(-1 + 0.8 * x)
+  covs <- data.frame(x = x)
+
+  result_ipw <- cf_calibration(pred, y, a, covs, treatment_level = 0, estimator = "ipw")
+  result_cl <- cf_calibration(pred, y, a, covs, treatment_level = 0, estimator = "cl")
+  result_dr <- cf_calibration(pred, y, a, covs, treatment_level = 0, estimator = "dr")
+
+  # All should produce valid ICI values
+  expect_true(result_ipw$ici >= 0 && result_ipw$ici <= 1)
+  expect_true(result_cl$ici >= 0 && result_cl$ici <= 1)
+  expect_true(result_dr$ici >= 0 && result_dr$ici <= 1)
+
+  # Results should be somewhat similar (within a reasonable range)
+  # This is a loose check since estimators can vary
+  ici_values <- c(result_ipw$ici, result_cl$ici, result_dr$ici)
+  expect_true(max(ici_values) - min(ici_values) < 0.2)
+})