From 8eee5d68f9e0e4709edd48d6fa9dc03c601e31bd Mon Sep 17 00:00:00 2001 From: Sean Long Date: Wed, 5 Nov 2025 14:23:32 -0800 Subject: [PATCH 1/3] Add extract_rxc_precinct function for precinct-level EI estimates --- NAMESPACE | 1 + NEWS.md | 12 ++++ R/extract_rxc_precinct.R | 118 ++++++++++++++++++++++++++++++++++++ README.md | 26 ++++++++ man/extract_rxc_precinct.Rd | 64 +++++++++++++++++++ 5 files changed, 221 insertions(+) create mode 100644 R/extract_rxc_precinct.R create mode 100644 man/extract_rxc_precinct.Rd diff --git a/NAMESPACE b/NAMESPACE index 8993f3c..c1483e2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -15,6 +15,7 @@ export(ei_rc_good_table) export(ei_reg_bayes_conf_int) export(ei_rxc) export(elect_algebra) +export(extract_rxc_precinct) export(fips_extract) export(get_multi_barreled_surnames) export(get_special_character_surnames) diff --git a/NEWS.md b/NEWS.md index 0019e75..8689d73 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,15 @@ +# eiCompare 3.0.6 + +## added function 11/5/25 + +* included extract_rxc_precinct() function to extract precinct level estimates from ei_rxc() + +# eiCompare 3.0.5 + +## Package changes 10/27/25 + +* added add_rpv_normalize and removed wru dependency + # eiCompare 3.0.4 ## Package changes diff --git a/R/extract_rxc_precinct.R b/R/extract_rxc_precinct.R new file mode 100644 index 0000000..a8bc745 --- /dev/null +++ b/R/extract_rxc_precinct.R @@ -0,0 +1,118 @@ +#' Extract Precinct-Level Estimates from ei.MD.bayes Object +#' +#' Extracts precinct-specific ecological inference estimates from ei_rxc() output. +#' Uses exact string matching to handle variation column names +#' +#' @param eivote ei_rxc() output object containing stat_objects +#' @param cand_cols Character vector of candidate column names (e.g., c("pct_cand_A", "pct_cand_B")) +#' @param race_cols Character vector of race column names (e.g., c("pct_black", "pct_white")) +#' @param dat Original data frame used in ei_rxc() call +#' @param uniq Column name for precinct identifier (must exist in dat) +#' +#' @return Data frame with precinct IDs and race×candidate estimate columns +#' +#' @details +#' The function extracts md_out$draws$Beta from the ei_rxc() output, which contains +#' MCMC draws for each precinct-race-candidate combination. Beta column names follow +#' the format "beta.race_name.cand_name.precinct_idx". The function computes posterior +#' means across MCMC iterations for each precinct. +#' +#' Output columns follow expand.grid(cand, race) ordering, with column names formatted +#' as paste0(race, cand) (e.g., "pct_blackpct_cand_A"). +#' +#' @examples +#' \donttest{ +#' +#' # library(eiCompare) +#' # data(gwinnett_ei) +#' # +#' # gwinnett_ei$precinct <- 1:nrow(gwinnett_ei) +#' # +#' # eivote <- ei_rxc( #this will take some time +#' # data = gwinnett_ei, +#' # cand_cols = c("kemp", "abrams", "metz"), +#' # race_cols = c("white", "black", "other"), +#' # totals_col = "turnout", +#' # seed = 12345 +#' #) +#' +#' # # Extract precinct-level estimates +#' # precinct_results <- extract_rxc_precinct( +#' # eivote = eivote, +#' # cand_cols = c("kemp", "abrams"), +#' # race_cols = c("white", "black", "other"), +#' # dat = gwinnett_ei, +#' # uniq = "precinct" +#' #) +#' +#' #head(precinct_results) +#' } +#' +#' @export +extract_rxc_precinct <- function(eivote, cand_cols, race_cols, dat, uniq) { + + # Extract md_out object from ei_rxc wrapper + eiMD_object <- eivote$stat_objects[[1]] + + # Extract Beta matrix (MCMC iterations × beta parameters) + Beta <- eiMD_object$draws$Beta + + # Check that uniq column exists in dat + if(!uniq %in% colnames(dat)) { + stop(paste0("Column '", uniq, "' not found in dat. ", + "Available columns: ", paste(colnames(dat), collapse = ", "))) + } + + n_precincts <- nrow(dat) + beta_colnames <- colnames(Beta) + + # Initialize result matrix (precincts × race-candidate combinations) + result_matrix <- matrix(NA, + nrow = n_precincts, + ncol = length(race_cols) * length(cand_cols)) + + # Loop through race-candidate combinations and extract precinct estimates + col_idx <- 1 + for(race in race_cols) { + for(cand in cand_cols) { + + # Build expected prefix pattern for exact matching + # Format: beta.race.cand.precinct_number + expected_prefix <- paste0("beta.", race, ".", cand, ".") + + # Find Beta columns matching this race-candidate pair + matching_cols <- grep(paste0("^", gsub("\\.", "\\\\.", expected_prefix)), + beta_colnames, + value = FALSE) + + # Validation - should have exactly n_precincts matches + if(length(matching_cols) != n_precincts) { + stop(paste0("Column matching error for race='", race, "', cand='", cand, + "': found ", length(matching_cols), " columns but expected ", + n_precincts, " precincts")) + } + + # Extract precinct indices and reorder to match dat row order + precinct_nums <- sub(expected_prefix, "", beta_colnames[matching_cols]) + precinct_order <- order(as.numeric(precinct_nums)) + matching_cols_ordered <- matching_cols[precinct_order] + + # Calculate mean across MCMC iterations for each precinct + result_matrix[, col_idx] <- colMeans(Beta[, matching_cols_ordered]) + col_idx <- col_idx + 1 + } + } + + # Create column names (race + candidate, matching expand.grid order) + col_names_df <- expand.grid(cand = cand_cols, race = race_cols) + col_names <- paste0(col_names_df$race, col_names_df$cand) + + # Convert to data frame with column names + result_df <- as.data.frame(result_matrix) + colnames(result_df) <- col_names + + # Attach precinct IDs from original data as first column + result_df <- cbind(dat[, uniq, drop = FALSE], result_df) + + return(result_df) +} diff --git a/README.md b/README.md index 6353efe..a3b321c 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,32 @@ ## News +# eiCompare 3.0.6 + +## New function + +* included extract_rxc_precinct() function to extract precinct level estimates from ei_rxc() + +# eiCompare 3.0.5 + +## Package changes 10/27/25 + +* added add_rpv_normalize and removed wru dependency + +# eiCompare 3.0.4 + +## Package changes + +* incorporated rpv_coef_plot() and rpv_toDF() functions from eiExpand package +* edited ei_iter() to have flexible CI parameters (default is 0.95) using bayestestR for calculation and updated column naming, and to use reproducible parallel processing (.inorder=TRUE) +* edited ei_rxc() with repdocuible parallel processing and changed column naming to fit ei_iter() +* Fixed summary.eiCompare() print behavior +* Added viridis to imports for color visualiztion and updated RoxygenNote to 7.3.2 + +### eiCompare 3.0.3 + +Updated + ### eiCompare 3.0.2 #### Package changes diff --git a/man/extract_rxc_precinct.Rd b/man/extract_rxc_precinct.Rd new file mode 100644 index 0000000..9f16473 --- /dev/null +++ b/man/extract_rxc_precinct.Rd @@ -0,0 +1,64 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/extract_rxc_precinct.R +\name{extract_rxc_precinct} +\alias{extract_rxc_precinct} +\title{Extract Precinct-Level Estimates from ei.MD.bayes Object} +\usage{ +extract_rxc_precinct(eivote, cand_cols, race_cols, dat, uniq) +} +\arguments{ +\item{eivote}{ei_rxc() output object containing stat_objects} + +\item{cand_cols}{Character vector of candidate column names (e.g., c("pct_cand_A", "pct_cand_B"))} + +\item{race_cols}{Character vector of race column names (e.g., c("pct_black", "pct_white"))} + +\item{dat}{Original data frame used in ei_rxc() call} + +\item{uniq}{Column name for precinct identifier (must exist in dat)} +} +\value{ +Data frame with precinct IDs and race×candidate estimate columns +} +\description{ +Extracts precinct-specific ecological inference estimates from ei_rxc() output. +Uses exact string matching to handle variation column names +} +\details{ +The function extracts md_out$draws$Beta from the ei_rxc() output, which contains +MCMC draws for each precinct-race-candidate combination. Beta column names follow +the format "beta.race_name.cand_name.precinct_idx". The function computes posterior +means across MCMC iterations for each precinct. + +Output columns follow expand.grid(cand, race) ordering, with column names formatted +as paste0(race, cand) (e.g., "pct_blackpct_cand_A"). +} +\examples{ +\donttest{ + +# library(eiCompare) +# data(gwinnett_ei) +# +# gwinnett_ei$precinct <- 1:nrow(gwinnett_ei) +# +# eivote <- ei_rxc( #this will take some time +# data = gwinnett_ei, +# cand_cols = c("kemp", "abrams", "metz"), +# race_cols = c("white", "black", "other"), +# totals_col = "turnout", +# seed = 12345 +#) + +# # Extract precinct-level estimates +# precinct_results <- extract_rxc_precinct( +# eivote = eivote, +# cand_cols = c("kemp", "abrams"), +# race_cols = c("white", "black", "other"), +# dat = gwinnett_ei, +# uniq = "precinct" +#) + +#head(precinct_results) +} + +} From 4e1707e04ced3be82fed6c13f02daebe663120c7 Mon Sep 17 00:00:00 2001 From: Sean Long Date: Tue, 11 Nov 2025 13:38:41 -0800 Subject: [PATCH 2/3] Address PR feedback: fix formatting, rename parameter, correct versioning --- DESCRIPTION | 2 +- NEWS.md | 13 ++++--------- R/extract_rxc_precinct.R | 34 +++++++++++++++++----------------- README.md | 11 +++-------- man/extract_rxc_precinct.Rd | 26 +++++++++++++------------- 5 files changed, 38 insertions(+), 48 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index a2da2ea..fc2fa40 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: eiCompare Type: Package Title: Compares Different Ecological Inference Methods -Version: 3.0.4 +Version: 3.0.5 Authors@R: c(person(given = "Loren", family = "Collingwood", diff --git a/NEWS.md b/NEWS.md index 8689d73..c6b3785 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,19 +1,14 @@ -# eiCompare 3.0.6 - -## added function 11/5/25 - -* included extract_rxc_precinct() function to extract precinct level estimates from ei_rxc() - # eiCompare 3.0.5 -## Package changes 10/27/25 +## added function 11/11/25 -* added add_rpv_normalize and removed wru dependency +* included extract_rxc_precinct() function to extract precinct level estimates from ei_rxc() # eiCompare 3.0.4 ## Package changes - +* added add_rpv_normalize() function +* removed wru dependency * incorporated rpv_coef_plot() and rpv_toDF() functions from eiExpand package * edited ei_iter() to have flexible CI parameters (default is 0.95) using bayestestR for calculation and updated column naming, and to use reproducible parallel processing (.inorder=TRUE) * edited ei_rxc() with repdocuible parallel processing and changed column naming to fit ei_iter() diff --git a/R/extract_rxc_precinct.R b/R/extract_rxc_precinct.R index a8bc745..e1de4f1 100644 --- a/R/extract_rxc_precinct.R +++ b/R/extract_rxc_precinct.R @@ -3,22 +3,22 @@ #' Extracts precinct-specific ecological inference estimates from ei_rxc() output. #' Uses exact string matching to handle variation column names #' -#' @param eivote ei_rxc() output object containing stat_objects -#' @param cand_cols Character vector of candidate column names (e.g., c("pct_cand_A", "pct_cand_B")) -#' @param race_cols Character vector of race column names (e.g., c("pct_black", "pct_white")) -#' @param dat Original data frame used in ei_rxc() call -#' @param uniq Column name for precinct identifier (must exist in dat) +#' @param eivote `ei_rxc()` output object containing `stat_objects` +#' @param cand_cols Character vector of candidate column names (e.g., `c("pct_cand_A", "pct_cand_B")`) +#' @param race_cols Character vector of race column names (e.g., `c("pct_black", "pct_white")`) +#' @param dat Original data frame used in `ei_rxc()` call +#' @param precinct_id Column name for precinct identifier (must exist in `dat`) #' #' @return Data frame with precinct IDs and race×candidate estimate columns #' #' @details -#' The function extracts md_out$draws$Beta from the ei_rxc() output, which contains +#' The function extracts `md_out$draws$Beta` from the `ei_rxc()` output, which contains #' MCMC draws for each precinct-race-candidate combination. Beta column names follow -#' the format "beta.race_name.cand_name.precinct_idx". The function computes posterior +#' the format `"beta.race_name.cand_name.precinct_idx"`. The function computes posterior #' means across MCMC iterations for each precinct. #' -#' Output columns follow expand.grid(cand, race) ordering, with column names formatted -#' as paste0(race, cand) (e.g., "pct_blackpct_cand_A"). +#' Output columns follow `expand.grid(cand, race)` ordering, with column names formatted +#' as `paste0(race, cand)` (e.g., `"pct_blackpct_cand_A"`). #' #' @examples #' \donttest{ @@ -30,8 +30,8 @@ #' # #' # eivote <- ei_rxc( #this will take some time #' # data = gwinnett_ei, -#' # cand_cols = c("kemp", "abrams", "metz"), -#' # race_cols = c("white", "black", "other"), +#' # cand_cols = c("kemp", "abrams", "metz"), +#' # race_cols = c("white", "black", "other"), #' # totals_col = "turnout", #' # seed = 12345 #' #) @@ -42,14 +42,14 @@ #' # cand_cols = c("kemp", "abrams"), #' # race_cols = c("white", "black", "other"), #' # dat = gwinnett_ei, -#' # uniq = "precinct" +#' # precinct_id = "precinct" #' #) #' #' #head(precinct_results) #' } #' #' @export -extract_rxc_precinct <- function(eivote, cand_cols, race_cols, dat, uniq) { +extract_rxc_precinct <- function(eivote, cand_cols, race_cols, dat, precinct_id) { # Extract md_out object from ei_rxc wrapper eiMD_object <- eivote$stat_objects[[1]] @@ -57,9 +57,9 @@ extract_rxc_precinct <- function(eivote, cand_cols, race_cols, dat, uniq) { # Extract Beta matrix (MCMC iterations × beta parameters) Beta <- eiMD_object$draws$Beta - # Check that uniq column exists in dat - if(!uniq %in% colnames(dat)) { - stop(paste0("Column '", uniq, "' not found in dat. ", + # Check that precinct_id column exists in dat + if(!precinct_id %in% colnames(dat)) { + stop(paste0("Column '", precinct_id, "' not found in dat. ", "Available columns: ", paste(colnames(dat), collapse = ", "))) } @@ -112,7 +112,7 @@ extract_rxc_precinct <- function(eivote, cand_cols, race_cols, dat, uniq) { colnames(result_df) <- col_names # Attach precinct IDs from original data as first column - result_df <- cbind(dat[, uniq, drop = FALSE], result_df) + result_df <- cbind(dat[, precinct_id, drop = FALSE], result_df) return(result_df) } diff --git a/README.md b/README.md index a3b321c..1936cfe 100644 --- a/README.md +++ b/README.md @@ -13,22 +13,17 @@ ## News -# eiCompare 3.0.6 +# eiCompare 3.0.5 ## New function * included extract_rxc_precinct() function to extract precinct level estimates from ei_rxc() -# eiCompare 3.0.5 - -## Package changes 10/27/25 - -* added add_rpv_normalize and removed wru dependency - # eiCompare 3.0.4 ## Package changes - +* added add_rpv_normalize() function +* removed wru dependency * incorporated rpv_coef_plot() and rpv_toDF() functions from eiExpand package * edited ei_iter() to have flexible CI parameters (default is 0.95) using bayestestR for calculation and updated column naming, and to use reproducible parallel processing (.inorder=TRUE) * edited ei_rxc() with repdocuible parallel processing and changed column naming to fit ei_iter() diff --git a/man/extract_rxc_precinct.Rd b/man/extract_rxc_precinct.Rd index 9f16473..b77b3cf 100644 --- a/man/extract_rxc_precinct.Rd +++ b/man/extract_rxc_precinct.Rd @@ -4,18 +4,18 @@ \alias{extract_rxc_precinct} \title{Extract Precinct-Level Estimates from ei.MD.bayes Object} \usage{ -extract_rxc_precinct(eivote, cand_cols, race_cols, dat, uniq) +extract_rxc_precinct(eivote, cand_cols, race_cols, dat, precinct_id) } \arguments{ -\item{eivote}{ei_rxc() output object containing stat_objects} +\item{eivote}{`ei_rxc()` output object containing `stat_objects`} -\item{cand_cols}{Character vector of candidate column names (e.g., c("pct_cand_A", "pct_cand_B"))} +\item{cand_cols}{Character vector of candidate column names (e.g., `c("pct_cand_A", "pct_cand_B")`)} -\item{race_cols}{Character vector of race column names (e.g., c("pct_black", "pct_white"))} +\item{race_cols}{Character vector of race column names (e.g., `c("pct_black", "pct_white")`)} -\item{dat}{Original data frame used in ei_rxc() call} +\item{dat}{Original data frame used in `ei_rxc()` call} -\item{uniq}{Column name for precinct identifier (must exist in dat)} +\item{precinct_id}{Column name for precinct identifier (must exist in `dat`)} } \value{ Data frame with precinct IDs and race×candidate estimate columns @@ -25,13 +25,13 @@ Extracts precinct-specific ecological inference estimates from ei_rxc() output. Uses exact string matching to handle variation column names } \details{ -The function extracts md_out$draws$Beta from the ei_rxc() output, which contains +The function extracts `md_out$draws$Beta` from the `ei_rxc()` output, which contains MCMC draws for each precinct-race-candidate combination. Beta column names follow -the format "beta.race_name.cand_name.precinct_idx". The function computes posterior +the format `"beta.race_name.cand_name.precinct_idx"`. The function computes posterior means across MCMC iterations for each precinct. -Output columns follow expand.grid(cand, race) ordering, with column names formatted -as paste0(race, cand) (e.g., "pct_blackpct_cand_A"). +Output columns follow `expand.grid(cand, race)` ordering, with column names formatted +as `paste0(race, cand)` (e.g., `"pct_blackpct_cand_A"`). } \examples{ \donttest{ @@ -43,8 +43,8 @@ as paste0(race, cand) (e.g., "pct_blackpct_cand_A"). # # eivote <- ei_rxc( #this will take some time # data = gwinnett_ei, -# cand_cols = c("kemp", "abrams", "metz"), -# race_cols = c("white", "black", "other"), +# cand_cols = c("kemp", "abrams", "metz"), +# race_cols = c("white", "black", "other"), # totals_col = "turnout", # seed = 12345 #) @@ -55,7 +55,7 @@ as paste0(race, cand) (e.g., "pct_blackpct_cand_A"). # cand_cols = c("kemp", "abrams"), # race_cols = c("white", "black", "other"), # dat = gwinnett_ei, -# uniq = "precinct" +# precinct_id = "precinct" #) #head(precinct_results) From 76e3533dc4c45a0a89fcfd9d0b30548f7db597b1 Mon Sep 17 00:00:00 2001 From: Sean Long Date: Thu, 13 Nov 2025 16:10:20 -0800 Subject: [PATCH 3/3] Fix typo in NEWS.md: rpv_normalize not add_rpv_normalize --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index c6b3785..e529c2b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -7,7 +7,7 @@ # eiCompare 3.0.4 ## Package changes -* added add_rpv_normalize() function +* added rpv_normalize() function * removed wru dependency * incorporated rpv_coef_plot() and rpv_toDF() functions from eiExpand package * edited ei_iter() to have flexible CI parameters (default is 0.95) using bayestestR for calculation and updated column naming, and to use reproducible parallel processing (.inorder=TRUE)