Skip to content

Commit 4f305e8

Browse files
committed
Add CL and DR estimators for cf_calibration
- Implement conditional loss (CL) estimator using outcome model - Implement doubly robust (DR) estimator combining IPW and CL - Change default estimator from IPW to DR for consistency with other functions - Add tests for new estimators (29 tests now pass) - Update documentation with estimator descriptions
1 parent c652dbc commit 4f305e8

5 files changed

Lines changed: 225 additions & 48 deletions

File tree

R/cf_calibration.R

Lines changed: 86 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,29 @@
2222
#' @details
2323
#' The counterfactual calibration curve estimates the relationship between
2424
#' predicted risk and observed risk under the counterfactual intervention.
25-
#' This is done by applying inverse probability weights to the calibration
26-
#' curve estimation.
25+
#'
26+
#' The function implements three estimators:
27+
#'
28+
#' **IPW Estimator**: Weights observations by the inverse probability of
29+
#' receiving the counterfactual treatment. Requires a correctly specified
30+
#' propensity score model.
31+
#'
32+
#' **Conditional Loss (CL) Estimator**: Uses the fitted outcome model
33+
#' E[Y|X, A=a] to estimate calibration over all observations. Requires a
34+
#' correctly specified outcome model.
35+
#'
36+
#' **Doubly Robust (DR) Estimator**: Combines CL and IPW approaches. Consistent
37+
#' if either the propensity or outcome model is correctly specified.
2738
#'
2839
#' @references
2940
#' Boyer, C. B., Dahabreh, I. J., & Steingrimsson, J. A. (2025).
3041
#' "Estimating and evaluating counterfactual prediction models."
3142
#' *Statistics in Medicine*, 44(23-24), e70287. \doi{10.1002/sim.70287}
3243
#'
44+
#' Steingrimsson, J. A., Gatsonis, C., Li, B., & Dahabreh, I. J. (2023).
45+
#' "Transporting a prediction model for use in a new target population."
46+
#' *American Journal of Epidemiology*, 192(2), 296-304.
47+
#'
3348
#' @seealso [cf_mse()], [cf_auc()], [plot.cf_calibration()]
3449
#'
3550
#' @export
@@ -43,22 +58,32 @@
4358
#' y <- rbinom(n, 1, plogis(-1 + x - 0.5 * a))
4459
#' pred <- plogis(-1 + 0.8 * x)
4560
#'
46-
#' # Estimate counterfactual calibration curve
47-
#' result <- cf_calibration(
61+
#' # Estimate counterfactual calibration curve with different estimators
62+
#' result_ipw <- cf_calibration(
63+
#' predictions = pred,
64+
#' outcomes = y,
65+
#' treatment = a,
66+
#' covariates = data.frame(x = x),
67+
#' treatment_level = 0,
68+
#' estimator = "ipw"
69+
#' )
70+
#'
71+
#' result_dr <- cf_calibration(
4872
#' predictions = pred,
4973
#' outcomes = y,
5074
#' treatment = a,
5175
#' covariates = data.frame(x = x),
52-
#' treatment_level = 0
76+
#' treatment_level = 0,
77+
#' estimator = "dr"
5378
#' )
54-
#' print(result)
55-
#' # plot(result) # If ggplot2 is available
79+
#' print(result_dr)
80+
#' # plot(result_dr) # If ggplot2 is available
5681
cf_calibration <- function(predictions,
5782
outcomes,
5883
treatment,
5984
covariates,
6085
treatment_level = 0,
61-
estimator = c("ipw", "cl"),
86+
estimator = c("dr", "ipw", "cl"),
6287
propensity_model = NULL,
6388
outcome_model = NULL,
6489
smoother = c("loess", "binned"),
@@ -78,47 +103,76 @@ cf_calibration <- function(predictions,
78103

79104
n <- length(outcomes)
80105

81-
# Fit propensity model if needed for IPW
82-
if (estimator == "ipw" && is.null(propensity_model)) {
83-
ps_data <- cbind(A = treatment, as.data.frame(covariates))
106+
# Convert covariates to data frame if needed
107+
if (!is.data.frame(covariates)) {
108+
covariates <- as.data.frame(covariates)
109+
}
110+
111+
# Fit propensity model if needed for IPW or DR
112+
if (estimator %in% c("ipw", "dr") && is.null(propensity_model)) {
113+
ps_data <- cbind(A = treatment, covariates)
84114
propensity_model <- glm(A ~ ., data = ps_data, family = binomial())
85115
}
86116

117+
# Fit outcome model if needed for CL or DR
118+
if (estimator %in% c("cl", "dr") && is.null(outcome_model)) {
119+
subset_idx <- treatment == treatment_level
120+
outcome_data <- cbind(Y = outcomes, covariates)[subset_idx, ]
121+
outcome_model <- glm(Y ~ ., data = outcome_data, family = binomial())
122+
}
123+
87124
# Get propensity scores
88-
if (estimator == "ipw") {
89-
ps <- .predict_nuisance(propensity_model, as.data.frame(covariates), type = "response")
125+
if (estimator %in% c("ipw", "dr")) {
126+
ps <- .predict_nuisance(propensity_model, covariates, type = "response")
90127
if (treatment_level == 0) {
91128
ps <- 1 - ps
92129
}
130+
# Truncate extreme propensities for stability
131+
ps <- pmax(pmin(ps, 0.975), 0.025)
132+
}
133+
134+
# Get outcome model predictions E[Y|X, A=a]
135+
if (estimator %in% c("cl", "dr")) {
136+
mu_hat <- .predict_nuisance(outcome_model, covariates, type = "response")
93137
}
94138

95139
# Indicator for treatment level
96-
I_a <- treatment == treatment_level
140+
I_a <- as.numeric(treatment == treatment_level)
97141

98-
# Compute weights for IPW calibration
142+
# Compute pseudo-outcomes based on estimator
99143
if (estimator == "ipw") {
100-
weights <- rep(0, n)
101-
weights[I_a] <- 1 / ps[I_a]
144+
# IPW: weight observations in treatment group
145+
# Use only observations with A = a
146+
pred_use <- predictions[I_a == 1]
147+
pseudo_outcomes <- outcomes[I_a == 1]
148+
weights <- 1 / ps[I_a == 1]
102149
# Normalize weights
103-
weights <- weights / sum(weights) * sum(I_a)
104-
} else {
150+
weights <- weights / mean(weights)
151+
152+
} else if (estimator == "cl") {
153+
# CL: use outcome model predictions for all observations
154+
pred_use <- predictions
155+
pseudo_outcomes <- mu_hat
105156
weights <- rep(1, n)
106-
}
107157

108-
# Subset to counterfactual treatment group
109-
pred_sub <- predictions[I_a]
110-
out_sub <- outcomes[I_a]
111-
w_sub <- weights[I_a]
158+
} else if (estimator == "dr") {
159+
# DR: augmented IPW over all observations
160+
# Pseudo-outcome: mu_hat + I(A=a)/ps * (Y - mu_hat)
161+
pred_use <- predictions
162+
augmentation <- I_a / ps * (outcomes - mu_hat)
163+
pseudo_outcomes <- mu_hat + augmentation
164+
weights <- rep(1, n)
165+
}
112166

113167
# Compute calibration curve
114168
if (smoother == "loess") {
115-
fit <- loess(out_sub ~ pred_sub, weights = w_sub, span = span)
116-
predicted <- sort(unique(pred_sub))
169+
fit <- loess(pseudo_outcomes ~ pred_use, weights = weights, span = span)
170+
predicted <- sort(unique(pred_use))
117171
observed <- predict(fit, newdata = predicted)
118172
} else if (smoother == "binned") {
119-
bins <- cut(pred_sub, breaks = n_bins, include.lowest = TRUE)
120-
predicted <- tapply(pred_sub, bins, mean)
121-
observed <- tapply(out_sub * w_sub, bins, sum) / tapply(w_sub, bins, sum)
173+
bins <- cut(pred_use, breaks = n_bins, include.lowest = TRUE)
174+
predicted <- tapply(pred_use, bins, mean)
175+
observed <- tapply(pseudo_outcomes * weights, bins, sum) / tapply(weights, bins, sum)
122176
}
123177

124178
# Compute calibration metrics
@@ -131,17 +185,18 @@ cf_calibration <- function(predictions,
131185
result <- list(
132186
predicted = predicted,
133187
observed = observed,
134-
weights = w_sub,
188+
weights = weights,
135189
smoother = smoother,
136190
estimator = estimator,
137191
metric = "calibration",
138192
treatment_level = treatment_level,
139-
n_obs = sum(I_a),
193+
n_obs = if (estimator == "ipw") sum(I_a) else n,
140194
ici = ici,
141195
e50 = e50,
142196
e90 = e90,
143197
emax = emax,
144198
propensity_model = propensity_model,
199+
outcome_model = outcome_model,
145200
call = match.call()
146201
)
147202

man/cf_calibration.Rd

Lines changed: 33 additions & 8 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/cfperformance-package.Rd

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-cf_calibration.R

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,3 +85,98 @@ test_that("cf_calibration requires binary outcomes", {
8585
"binary"
8686
)
8787
})
88+
89+
90+
test_that("cf_calibration CL estimator works", {
91+
set.seed(42)
92+
n <- 300
93+
x <- rnorm(n)
94+
a <- rbinom(n, 1, plogis(-0.5 + 0.5 * x))
95+
y <- rbinom(n, 1, plogis(-1 + x - 0.3 * a))
96+
pred <- plogis(-1 + 0.9 * x)
97+
98+
result <- cf_calibration(
99+
predictions = pred,
100+
outcomes = y,
101+
treatment = a,
102+
covariates = data.frame(x = x),
103+
treatment_level = 0,
104+
estimator = "cl"
105+
)
106+
107+
expect_s3_class(result, "cf_calibration")
108+
expect_equal(result$estimator, "cl")
109+
expect_equal(result$n_obs, n) # CL uses all observations
110+
expect_true(result$ici >= 0)
111+
})
112+
113+
114+
test_that("cf_calibration DR estimator works", {
115+
set.seed(42)
116+
n <- 300
117+
x <- rnorm(n)
118+
a <- rbinom(n, 1, plogis(-0.5 + 0.5 * x))
119+
y <- rbinom(n, 1, plogis(-1 + x - 0.3 * a))
120+
pred <- plogis(-1 + 0.9 * x)
121+
122+
result <- cf_calibration(
123+
predictions = pred,
124+
outcomes = y,
125+
treatment = a,
126+
covariates = data.frame(x = x),
127+
treatment_level = 0,
128+
estimator = "dr"
129+
)
130+
131+
expect_s3_class(result, "cf_calibration")
132+
expect_equal(result$estimator, "dr")
133+
expect_equal(result$n_obs, n) # DR uses all observations
134+
expect_true(result$ici >= 0)
135+
expect_true(!is.null(result$propensity_model))
136+
expect_true(!is.null(result$outcome_model))
137+
})
138+
139+
140+
test_that("cf_calibration DR is default estimator", {
141+
set.seed(42)
142+
n <- 200
143+
x <- rnorm(n)
144+
a <- rbinom(n, 1, 0.5)
145+
y <- rbinom(n, 1, plogis(-1 + x))
146+
pred <- plogis(-1 + 0.9 * x)
147+
148+
result <- cf_calibration(
149+
predictions = pred,
150+
outcomes = y,
151+
treatment = a,
152+
covariates = data.frame(x = x),
153+
treatment_level = 0
154+
)
155+
156+
expect_equal(result$estimator, "dr")
157+
})
158+
159+
160+
test_that("cf_calibration all three estimators produce reasonable results", {
161+
set.seed(123)
162+
n <- 500
163+
x <- rnorm(n)
164+
a <- rbinom(n, 1, plogis(-0.5 + 0.5 * x))
165+
y <- rbinom(n, 1, plogis(-1 + x - 0.5 * a))
166+
pred <- plogis(-1 + 0.8 * x)
167+
covs <- data.frame(x = x)
168+
169+
result_ipw <- cf_calibration(pred, y, a, covs, treatment_level = 0, estimator = "ipw")
170+
result_cl <- cf_calibration(pred, y, a, covs, treatment_level = 0, estimator = "cl")
171+
result_dr <- cf_calibration(pred, y, a, covs, treatment_level = 0, estimator = "dr")
172+
173+
# All should produce valid ICI values
174+
expect_true(result_ipw$ici >= 0 && result_ipw$ici <= 1)
175+
expect_true(result_cl$ici >= 0 && result_cl$ici <= 1)
176+
expect_true(result_dr$ici >= 0 && result_dr$ici <= 1)
177+
178+
# Results should be somewhat similar (within a reasonable range)
179+
# This is a loose check since estimators can vary
180+
ici_values <- c(result_ipw$ici, result_cl$ici, result_dr$ici)
181+
expect_true(max(ici_values) - min(ici_values) < 0.2)
182+
})

0 commit comments

Comments
 (0)