boyercb
diff --git a/‎NEWS.md‎
Lines changed: 17 additions & 0 deletions b/‎NEWS.md‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎R/cf_auc.R‎
Lines changed: 70 additions & 26 deletions b/‎R/cf_auc.R‎
Lines changed: 70 additions & 26 deletions
diff --git a/‎R/variance.R‎
Lines changed: 118 additions & 1 deletion b/‎R/variance.R‎
Lines changed: 118 additions & 1 deletion
diff --git a/‎man/cf_auc.Rd‎
Lines changed: 13 additions & 2 deletions b/‎man/cf_auc.Rd‎
Lines changed: 13 additions & 2 deletions
@@ -11,6 +11,10 @@ automatic cross-fitting for valid inference.
 * Automatic cross-fitting when `ml_learner` specs are detected
 * Seamlessly integrates with existing `propensity_model`/`outcome_model` arguments
 
+### Cross-Fitting Support
+* `cf_mse()` - Full support for ML learners with cross-fitting
+* `cf_auc()` - Full support for ML learners with cross-fitting (DR estimator)
+
 ### Supported Learners
 * **ranger** - Fast random forest implementation
 * **xgboost** - Gradient boosting (XGBoost)
@@ -21,12 +25,21 @@ automatic cross-fitting for valid inference.
 
 ### Usage Example
 ```r
+# MSE with ML learners
 cf_mse(
   predictions = pred, outcomes = y, treatment = a, covariates = df,
   propensity_model = ml_learner("ranger", num.trees = 500),
   outcome_model = ml_learner("xgboost", nrounds = 100),
   cross_fit = TRUE
 )
+
+# AUC with ML learners
+cf_auc(
+  predictions = pred, outcomes = y, treatment = a, covariates = df,
+  propensity_model = ml_learner("ranger", num.trees = 500),
+  outcome_model = ml_learner("ranger", num.trees = 500),
+  cross_fit = TRUE
+)
 ```
 
 ### Documentation
@@ -38,6 +51,10 @@ cf_mse(
 Chernozhukov, V., et al. (2018). Double/debiased machine learning for treatment
 and structural parameters. *The Econometrics Journal*, 21(1), C1-C68.
 
+Li, B., Gatsonis, C., Dahabreh, I. J., & Steingrimsson, J. A. (2022).
+Estimating the area under the ROC curve when transporting a prediction
+model to a target population. *Biometrics*, 79(3), 2343-2356.
+
 ---
 
 # cfperformance 0.2.0
 
@@ -31,6 +31,8 @@
 #' of treatment.
 #'
 #' **Doubly Robust (DR) Estimator**: Combines OM and IPW for double robustness.
+#' When `cross_fit = TRUE`, uses cross-fitting for valid inference with flexible
+#' ML methods (see [ml_learner()]).
 #'
 #' @references
 #' Boyer, C. B., Dahabreh, I. J., & Steingrimsson, J. A. (2025).
@@ -39,9 +41,10 @@
 #'
 #' Li, B., Gatsonis, C., Dahabreh, I. J., & Steingrimsson, J. A. (2022).
 #' "Estimating the area under the ROC curve when transporting a prediction
-#' model to a target population." *Biometrics*.
+#' model to a target population." *Biometrics*, 79(3), 2343-2356.
+#' \doi{10.1111/biom.13796}
 #'
-#' @seealso [cf_mse()], [cf_calibration()]
+#' @seealso [cf_mse()], [cf_calibration()], [ml_learner()]
 #'
 #' @export
 #'
@@ -76,6 +79,8 @@ cf_auc <- function(predictions,
                    se_method = c("bootstrap", "influence", "none"),
                    n_boot = 500,
                    conf_level = 0.95,
+                   cross_fit = FALSE,
+                   n_folds = 5,
                    parallel = FALSE,
                    ncores = NULL,
                    ...) {
@@ -92,40 +97,79 @@ cf_auc <- function(predictions,
 
   n <- length(outcomes)
 
+  # Detect if ml_learners are provided
+ use_ml_propensity <- is_ml_learner(propensity_model)
+  use_ml_outcome <- is_ml_learner(outcome_model)
+
+  # Initialize SE variables
+  se <- NULL
+  ci_lower <- NULL
+  ci_upper <- NULL
+
   # Fit nuisance models if needed
   if (estimator != "naive") {
-    nuisance <- .fit_nuisance_models(
-      treatment = treatment,
+    if (cross_fit && estimator == "dr") {
+      # Use cross-fitting for DR estimator
+      cf_result <- .compute_auc_crossfit(
+        predictions = predictions,
+        outcomes = outcomes,
+        treatment = treatment,
+        covariates = covariates,
+        treatment_level = treatment_level,
+        K = n_folds,
+        propensity_learner = if (use_ml_propensity) propensity_model else NULL,
+        outcome_learner = if (use_ml_outcome) outcome_model else NULL,
+        parallel = parallel,
+        ...
+      )
+      estimate <- cf_result$estimate
+      nuisance <- list(propensity = NULL, outcome = NULL,
+                       cross_fitted = TRUE,
+                       ps = cf_result$ps,
+                       q = cf_result$q,
+                       folds = cf_result$folds)
+
+      # SE from cross-fitting
+      if (se_method == "influence") {
+        se <- cf_result$se
+        z <- qnorm(1 - (1 - conf_level) / 2)
+        ci_lower <- estimate - z * se
+        ci_upper <- estimate + z * se
+      }
+    } else {
+      nuisance <- .fit_nuisance_models(
+        treatment = treatment,
+        outcomes = outcomes,
+        covariates = covariates,
+        treatment_level = treatment_level,
+        propensity_model = propensity_model,
+        outcome_model = outcome_model
+      )
+      estimate <- NULL
+    }
+  } else {
+    nuisance <- list(propensity = NULL, outcome = NULL)
+    estimate <- NULL
+  }
+
+  # Compute point estimate (if not already computed via cross-fitting)
+  if (is.null(estimate)) {
+    estimate <- .compute_auc(
+      predictions = predictions,
       outcomes = outcomes,
+      treatment = treatment,
       covariates = covariates,
       treatment_level = treatment_level,
-      propensity_model = propensity_model,
-      outcome_model = outcome_model
+      estimator = estimator,
+      propensity_model = nuisance$propensity,
+      outcome_model = nuisance$outcome
     )
-  } else {
-    nuisance <- list(propensity = NULL, outcome = NULL)
   }
 
-  # Compute point estimate
-  estimate <- .compute_auc(
-    predictions = predictions,
-    outcomes = outcomes,
-    treatment = treatment,
-    covariates = covariates,
-    treatment_level = treatment_level,
-    estimator = estimator,
-    propensity_model = nuisance$propensity,
-    outcome_model = nuisance$outcome
-  )
-
   # Naive estimate
   naive_estimate <- .compute_auc_naive(predictions, outcomes)
 
-  # Standard errors
-  se <- NULL
-  ci_lower <- NULL
-  ci_upper <- NULL
-
+  # Standard errors (if not already computed via cross-fitting)
   if (se_method == "bootstrap") {
     boot_result <- .bootstrap_auc(
       predictions = predictions,
@@ -142,7 +186,7 @@ cf_auc <- function(predictions,
     se <- boot_result$se
     ci_lower <- boot_result$ci_lower
     ci_upper <- boot_result$ci_upper
-  } else if (se_method == "influence") {
+  } else if (se_method == "influence" && !(cross_fit && estimator == "dr")) {
     se <- .influence_se_auc(
       predictions = predictions,
       outcomes = outcomes,
 
@@ -523,7 +523,8 @@ NULL
 
   # Initialize output vectors
   ps_cf <- numeric(n)  # Cross-fitted propensity scores
-  h_cf <- numeric(n)   # Cross-fitted conditional loss predictions
+  q_cf <- numeric(n)   # Cross-fitted outcome probabilities (for AUC)
+  h_cf <- numeric(n)   # Cross-fitted conditional loss predictions (for MSE)
 
   for (k in 1:K) {
     # Training and validation indices
@@ -575,13 +576,17 @@ NULL
                     type = "response")
     }
 
+    # Store outcome probability (q_hat) for AUC
+    q_cf[val_idx] <- pY
+
     # Compute conditional loss: E[(Y - pred)^2 | X, A=a] = p(1-p) + (p - pred)^2
     # For binary Y: E[Y^2] = p, so E[(Y - pred)^2] = p - 2*p*pred + pred^2
     h_cf[val_idx] <- pY - 2 * predictions[val_idx] * pY + predictions[val_idx]^2
   }
 
   list(
     ps = ps_cf,
+    q = q_cf,
     h = h_cf,
     folds = folds
   )
@@ -644,3 +649,115 @@ NULL
     folds = cf_nuisance$folds
   )
 }
+
+
+#' Compute DR AUC with Cross-Fitting
+#'
+#' Computes the doubly robust AUC estimator using cross-fitted nuisance functions.
+#'
+#' @inheritParams cf_auc
+#' @param K Number of folds for cross-fitting.
+#' @param propensity_learner Optional ml_learner for propensity model.
+#' @param outcome_learner Optional ml_learner for outcome model.
+#' @param parallel Logical for parallel processing (not yet implemented).
+#' @param ... Additional arguments.
+#'
+#' @return Doubly robust AUC estimate with cross-fitting.
+#'
+#' @references
+#' Li, B., Gatsonis, C., Dahabreh, I. J., & Steingrimsson, J. A. (2022).
+#' "Estimating the area under the ROC curve when transporting a prediction
+#' model to a target population." *Biometrics*, 79(3), 2343-2356.
+#' \doi{10.1111/biom.13796}
+#'
+#' @keywords internal
+.compute_auc_crossfit <- function(predictions, outcomes, treatment, covariates,
+                                   treatment_level, K = 5,
+                                   propensity_learner = NULL,
+                                   outcome_learner = NULL,
+                                   parallel = FALSE,
+                                   ...) {
+
+  n <- length(outcomes)
+
+  # Get cross-fitted nuisance functions
+  cf_nuisance <- .cross_fit_nuisance(
+    treatment = treatment,
+    outcomes = outcomes,
+    covariates = covariates,
+    treatment_level = treatment_level,
+    predictions = predictions,
+    K = K,
+    propensity_learner = propensity_learner,
+    outcome_learner = outcome_learner,
+    parallel = parallel,
+    ...
+  )
+
+  ps <- cf_nuisance$ps
+  q_hat <- cf_nuisance$q
+
+  # Treatment indicator
+  I_a <- as.numeric(treatment == treatment_level)
+
+  # Truncate propensity scores for stability
+  ps <- pmax(pmin(ps, 0.99), 0.01)
+
+  # Concordance indicator matrix (f_i > f_j)
+  ind_f <- outer(predictions, predictions, ">")
+
+  # IPW component: reweight observed cases and controls
+  pi_ratio <- I_a / ps
+  mat_ipw0 <- outer(I_a * (outcomes == 1), I_a * (outcomes == 0), "*") *
+              outer(pi_ratio, pi_ratio, "*")
+  mat_ipw1 <- mat_ipw0 * ind_f
+
+  # OM component: use outcome model predictions
+  mat_om0 <- outer(q_hat, 1 - q_hat, "*")
+  mat_om1 <- mat_om0 * ind_f
+
+  # DR correction term: subtract overlap
+  mat_dr0 <- outer(I_a * pi_ratio * q_hat, I_a * pi_ratio * (1 - q_hat), "*")
+  diag(mat_dr0) <- 0
+  mat_dr1 <- mat_dr0 * ind_f
+
+  # DR estimator
+  numerator <- sum(mat_ipw1) + sum(mat_om1) - sum(mat_dr1)
+  denominator <- sum(mat_ipw0) + sum(mat_om0) - sum(mat_dr0)
+  estimate <- numerator / denominator
+
+  # Influence function for SE based on DeLong-like approach
+
+  # Compute V10: for each case i, proportion of controls with lower score
+  # Compute V01: for each control j, proportion of cases with higher score
+  cases <- which(outcomes == 1)
+  controls <- which(outcomes == 0)
+  n1 <- length(cases)
+  n0 <- length(controls)
+
+  V10 <- numeric(n1)
+  V01 <- numeric(n0)
+
+  for (i in seq_len(n1)) {
+    V10[i] <- mean(as.numeric(predictions[cases[i]] > predictions[controls]) +
+                   0.5 * as.numeric(predictions[cases[i]] == predictions[controls]))
+  }
+
+  for (j in seq_len(n0)) {
+    V01[j] <- mean(as.numeric(predictions[controls[j]] < predictions[cases]) +
+                   0.5 * as.numeric(predictions[controls[j]] == predictions[cases]))
+  }
+
+  # DeLong variance estimate
+  S10 <- if (n1 > 1) var(V10) else 0
+  S01 <- if (n0 > 1) var(V01) else 0
+  se <- sqrt(S10 / n1 + S01 / n0)
+
+  list(
+    estimate = estimate,
+    se = se,
+    ps = ps,
+    q = q_hat,
+    folds = cf_nuisance$folds
+  )
+}