From 25e7772398cb6b5a86af9f93aa234801debc45f3 Mon Sep 17 00:00:00 2001
From: davidn10 <david.newhouse@gmail.com>
Date: Tue, 9 May 2023 12:01:01 -0400
Subject: [PATCH 01/10] HT function

---
 R/direct_estimation.R | 2 ++
 R/direct_variance.R   | 9 ++++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/R/direct_estimation.R b/R/direct_estimation.R
index 204516dd..411ca850 100644
--- a/R/direct_estimation.R
+++ b/R/direct_estimation.R
@@ -104,6 +104,7 @@ direct <- function(y,
                    design = NULL,
                    threshold = NULL,
                    var = FALSE, 
+                   HT = FALSE, 
                    boot_type = "naive", 
                    B = 50,
                    seed = 123,
@@ -161,6 +162,7 @@ direct <- function(y,
                            smp_data = framework$smp_data,  
                            smp_domains = framework$smp_domains_vec,
                            design = design,
+                           HT = HT, 
                            bootType = boot_type, 
                            B = B,
                            seed = seed,
diff --git a/R/direct_variance.R b/R/direct_variance.R
index 61497ccf..c9703a69 100644
--- a/R/direct_variance.R
+++ b/R/direct_variance.R
@@ -5,7 +5,8 @@ direct_variance <- function(direct_estimator,
                             smp_data,
                             smp_domains,
                             design,
-                            indicator, 
+                            indicator,
+                            HT, 
                             bootType, 
                             B = B,
                             seed,
@@ -50,6 +51,12 @@ direct_variance <- function(direct_estimator,
     B <- as.integer(B[1])
   }
   
+  # separate code for HT estimation 
+  if HT=="TRUE" {
+    browser() 
+  }
+  
+  
   bootType #<- match.arg(bootType)
   
   # if calibrate bootstrap is selected

From c6453b46c8c8c054e81baaa2da8945d5dc5a6b0f Mon Sep 17 00:00:00 2001
From: davidn10 <david.newhouse@gmail.com>
Date: Tue, 9 May 2023 16:10:07 -0400
Subject: [PATCH 02/10] Update direct_variance.R

Add HT direct estimates
---
 R/direct_variance.R | 41 +++++++++++++++++++++++++++++++++++------
 1 file changed, 35 insertions(+), 6 deletions(-)

diff --git a/R/direct_variance.R b/R/direct_variance.R
index c9703a69..60067ff4 100644
--- a/R/direct_variance.R
+++ b/R/direct_variance.R
@@ -51,11 +51,7 @@ direct_variance <- function(direct_estimator,
     B <- as.integer(B[1])
   }
   
-  # separate code for HT estimation 
-  if HT=="TRUE" {
-    browser() 
-  }
-  
+
   
   bootType #<- match.arg(bootType)
   
@@ -80,6 +76,38 @@ direct_variance <- function(direct_estimator,
   smp_data$weight <- weights
   smp_data$Domain <- smp_domains
   
+  # separate code for HT estimation 
+  if (HT == TRUE) {
+    
+    
+    
+    domain_var <- function(df) {
+      tosum <-df[,2]*(df[,2]-1)*df[,1]^2 
+      nrow(df)^-2*sum(tosum)
+    }
+      
+
+    
+  if (indicator_name == "Mean") {
+    smp_data$indicator <- smp_data$y
+  }
+  else if (indicator_name == "Head_Count") {
+  smp_data$indicator <- as.integer(smp_data$y<threshold)
+}
+  else {
+    smp_data$indicator <- NA 
+  }
+ 
+    var <- as.vector(by(data=smp_data[c("indicator","weight")],INDICES=smp_data$Domain,FUN=domain_var))
+    
+    
+    varByDomain <- data.frame(Domain = rs, var = var)
+    indicator$varMethod <- "HT"
+  }
+  
+  else {
+  
+  
   # set seed for bootstrap
   if (!is.null(seed)) {
     set.seed(seed)
@@ -104,7 +132,6 @@ direct_variance <- function(direct_estimator,
                      envir = envir,
                      indicator_name = indicator_name)
                      #, ...)
-
   # if variance is calculated by domain
   if (byDomain) {
     var <- apply(b$t, 2, var, na.rm = TRUE)
@@ -113,9 +140,11 @@ direct_variance <- function(direct_estimator,
   } else {
     var <- var(b$t[, 1], na.rm = TRUE)
   }
+ 
   
   # preparation of return
   indicator$varMethod <- "bootstrap"
+  } # close HT else loop 
   indicator$var <- var
   if (byDomain) {
     indicator$varByDomain <- varByDomain

From a27830cdbf80b22b799a96ffba082fe835ca310e Mon Sep 17 00:00:00 2001
From: davidn10 <david.newhouse@gmail.com>
Date: Tue, 9 May 2023 16:54:38 -0400
Subject: [PATCH 03/10] Add HT option to direct variance

---
 R/direct_estimation.R          |    5 +-
 R/prepost_estimation_helpers.R | 1139 ++++++++++++++++++++++++++++++++
 2 files changed, 1142 insertions(+), 2 deletions(-)
 create mode 100644 R/prepost_estimation_helpers.R

diff --git a/R/direct_estimation.R b/R/direct_estimation.R
index 411ca850..10ba4af3 100644
--- a/R/direct_estimation.R
+++ b/R/direct_estimation.R
@@ -111,9 +111,10 @@ direct <- function(y,
                    X_calib = NULL, 
                    totals = NULL,
                    custom_indicator = NULL,
-                   na.rm = FALSE){
-  
+                   na.rm = FALSE,
+                   HT = FALSE){
   
+  smp_data <- as.data.frame(smp_data)
   
   direct_check(y = y, smp_data = smp_data, smp_domains = smp_domains, 
                 weights = weights, design = design, threshold = threshold, 
diff --git a/R/prepost_estimation_helpers.R b/R/prepost_estimation_helpers.R
new file mode 100644
index 00000000..b89e867b
--- /dev/null
+++ b/R/prepost_estimation_helpers.R
@@ -0,0 +1,1139 @@
+#' Create Descriptive Statistics for Small Area Estimation Report
+#'
+#' This function estimates the coefficient of variation at level specified, basic
+#' statistics such number of units, regions and target areas as well as the
+#' threshold on which SAE is applied and the outcome indicator of interest
+#' (i.e. poverty line and poverty rate). These indicators are all expressed for
+#' the census and survey
+#'
+#' @param ebp_object the EBP object produced from by EMDI from unit model estimation
+#' the object is of class "ebp emdi"
+#' @param smp_weights the sample weight variable in the household dataset
+#' (i.e. the training data), include column of 1s (DO NOT LEAVE UNSPECIFIED)
+#' @param pop_weights the population weight variable in the census (or synthetic)
+#' census dataset (i.e. the test data), include column of 1s (NO NOT LEAVE
+#' UNSPECIFIED)
+#' @param repvar the variable level at which Coefficient of Variation should be
+#' computed
+#' @param welfare the welfare aggregate variable or outcome variable of interest
+#' @param smp_data the survey/training data
+#' @param pop_data the population/census/training data
+#' @param threshold the poverty line or threshold specified
+#' @param pop_domains the target area variable within `pop_data`
+#' @param smp_domains the target area variable within `smp_data`
+#' 
+#' @examples 
+#' data("eusilcA_pop2")
+#' data("eusilcA_smp2")
+#' #### set of variables used in model estimation
+#'variables <- c("gender", "eqsize", "cash", "self_empl",
+#'               "unempl_ben", "age_ben", "surv_ben",
+#'               "sick_ben", "dis_ben", "rent", "fam_allow",
+#'               "house_allow", "cap_inv", "tax_adj")
+#'
+#'### estimate a unit model
+#'emdi_model <- emdiplus::ebp(fixed = as.formula(paste("eqIncome ~ ", paste(variables, 
+#'                                                                          collapse= "+"))),
+#'                            pop_data = eusilcA_pop2, 
+#'                            pop_domains = "district", 
+#'                            smp_data = eusilcA_smp2, 
+#'                            smp_domains = "district",
+#'                            na.rm = TRUE,
+#'                            weights = "weight",
+#'                            pop_weights = "popweights",
+#'                            MSE = TRUE,
+#'                            threshold = 11000,
+#'                            B = 2,
+#'                            L = 2)
+#'
+#'### model estimation
+#'ebp_reportdescriptives(ebp_object = emdi_model,
+#'                       smp_weights = "weight",
+#'                       pop_weights = "popweights",
+#'                       repvar = "state",
+#'                       welfare = "eqIncome",
+#'                       smp_data = eusilcA_smp2, 
+#'                       pop_data = eusilcA_pop2, 
+#'                       threshold = 11000, 
+#'                       pop_domains = "district", 
+#'                       smp_domains = "district")
+#'                       
+#'
+#'
+#' @export
+#'
+
+ebp_reportdescriptives <- function(ebp_object,
+                                   smp_weights,
+                                   pop_weights,
+                                   repvar,
+                                   welfare,
+                                   smp_data,
+                                   pop_data,
+                                   threshold,
+                                   pop_domains,
+                                   smp_domains,
+                                   HT = TRUE){
+  
+  ###get list of variables
+  hh_varlist <- colnames(ebp_object$framework$smp_data)
+  pop_varlist <- hh_varlist[!(hh_varlist %in% c(welfare, smp_weights))]
+
+  ### subset the survey and census data
+  smp_df <- smp_data[complete.cases(smp_data[,hh_varlist]), 
+                     c(hh_varlist, repvar, smp_weights)]
+  pop_df <- pop_data[complete.cases(pop_data[,pop_varlist]),
+                     c(pop_varlist, repvar, pop_weights)]
+  
+  
+  
+  ### ---------- Estimate CV for census and survey at repvar level --------- ###
+  
+  ##### create a dataset with headcounts, MSEs for survey and census information
+  smp_doms <- unique(smp_df[[smp_domains]])
+  
+  ##### quickly rename column names in the MSE section of the EBP object
+  colnames(ebp_object$MSE)[!grepl("Domain",
+                                  colnames(ebp_object$MSE))] <-
+    paste0("MSE_", colnames(ebp_object$MSE)[!grepl("Domain",
+                                                   colnames(ebp_object$MSE))])
+  
+  df <- merge(x = ebp_object$MSE[, c("Domain", "MSE_Head_Count")],
+              y = ebp_object$ind[, c("Domain", "Head_Count")],
+              by = "Domain")
+  
+  
+  
+  df$in_sample <- ifelse(df$Domain %in% smp_doms, 1, 0)
+  
+  # df$Domain <- as.integer(as.character(df$Domain))
+  
+  add_df <-
+    data.frame(Domain = names(tapply(smp_df[[smp_weights]],
+                                     smp_df[[smp_domains]],
+                                     sum,
+                                     na.rm = TRUE)),
+               smp_weights = tapply(smp_df[[smp_weights]],
+                                    smp_df[[smp_domains]],
+                                    sum,
+                                    na.rm = TRUE))
+  
+  df <- merge(x = df, y = add_df, by = "Domain")
+  
+  add_df <-
+    data.frame(Domain = names(tapply(pop_df[[pop_weights]],
+                                             pop_df[[pop_domains]],
+                                             sum,
+                                             na.rm = TRUE)),
+               pop_weights = tapply(pop_df[[pop_weights]],
+                                    pop_df[[pop_domains]],
+                                    sum,
+                                    na.rm = TRUE))
+  
+  df <- merge(x = df, y = add_df, by = "Domain")
+  
+  ### add the repvar variable to df as well
+  pop_df$Domain <- pop_df[[pop_domains]]
+  
+  add_df <- unique(pop_df[, c("Domain", repvar)])
+  
+  df <- merge(x = df,
+              y = add_df[, c("Domain", repvar)],
+              by = "Domain")
+  
+  df$CV <- df$MSE_Head_Count / df$Head_Count
+  
+  ### compute the cvs for census and survey at repvar level
+  naivevar_dt <- direct(y = welfare,
+                        smp_data = ebp_object$framework$smp_data,
+                        smp_domains = smp_domains,
+                        design = designvar,
+                        weights = smp_weights,
+                        threshold = threshold,
+                        var = TRUE)
+  
+  naivevar_dt$ind$Direct_Head_Count_CV <- sqrt(naivevar_dt$MSE$Head_Count) / naivevar_dt$ind$Head_Count
+  
+  
+  add_df <- data.frame(unique(df[[repvar]]))
+  
+  colnames(add_df) <- repvar
+  
+  add_df$sum_smp_weights <- tapply(X = df$smp_weights,
+                                   INDEX = df[[repvar]],
+                                   FUN = sum,
+                                   na.rm = TRUE)
+  add_df$sum_pop_weights <- tapply(X = df$pop_weights,
+                                   INDEX = df[[repvar]],
+                                   FUN = sum,
+                                   na.rm = TRUE)
+  df <- merge(x = df, y = add_df, by = repvar)
+  
+  povrate <- weighted.mean(x = df$Head_Count,
+                           w = df$smp_weights,
+                           na.rm = TRUE)
+  
+  
+  df$smp_weights <- df$smp_weights / df$sum_smp_weights
+  df$pop_weights <- df$pop_weights / df$sum_pop_weights
+  
+  df <- merge(x = df,
+              y = naivevar_dt$ind[, c("Domain", "Direct_Head_Count_CV")],
+              by = "Domain")
+  
+  
+  cv_df <-
+    data.frame(indicator = paste0("CV for Area: ", unique(df[[repvar]])),
+               ebp_cv = tapply(X = df$CV * df$pop_weights,
+                               INDEX = df[[repvar]],
+                               FUN = sum,
+                               na.rm = TRUE),
+               direct_cv = tapply(X = df$Direct_Head_Count_CV * df$smp_weights,
+                               INDEX = df[[repvar]],
+                               FUN = sum,
+                               na.rm = TRUE))
+  
+  #### ----------------- add other elements of the table ----------------- ####
+  ##### compute number of households in census and survey
+  basic_df <-
+    data.frame(indicator = c("Number of Units", "Number of Regions",
+                             "Number of Target Areas"),
+               census = c(round(sum(pop_df[[pop_weights]], na.rm = TRUE)),
+                          length(unique(pop_df[[repvar]][is.na(pop_df[[repvar]]) == FALSE])),
+                          length(unique(pop_df[[pop_domains]][is.na(pop_df[[smp_domains]]) == FALSE]))),
+               survey = c(ebp_object$framework$N_smp,
+                          length(unique(smp_df[[repvar]][is.na(smp_df[[repvar]]) == FALSE])),
+                          length(unique(smp_df[[smp_domains]][is.na(smp_df[[smp_domains]]) == FALSE]))))
+  
+  basic_df$census <- as.integer(basic_df$census)
+  basic_df$survey <- as.integer(basic_df$survey)
+  
+  ##### compute poverty numbers
+  smp_data$poor <- ifelse(smp_data[[welfare]] < threshold, 1, 0)
+  
+  smp_data[[smp_weights]] <-
+    smp_data[[smp_weights]] / sum(smp_data[[smp_weights]],
+                                  na.rm = TRUE)
+  
+  pov_df <-
+    data.frame(indicator = c("National Poverty Rate", "National Poverty Line"),
+               model = c(povrate, threshold),
+               survey = c(sum(smp_data$poor * smp_data[[smp_weights]]),
+                          threshold))
+  
+  row.names(cv_df) <- NULL
+  
+  return(list(cv_table = cv_df,
+              basicinfo_df = basic_df,
+              poverty_df = format(pov_df, scientific = FALSE)))
+  
+  
+}
+
+#' Perform test for difference between survey and census means
+#'
+#' This function computes weighted means of the same set of variables within the
+#' census and the survey. A test for difference of the means are performed for
+#' each variable with two-tailed p-values returned.
+#'
+#' @param smp_data the survey/training data
+#' @param pop_data the population/census/training data
+#' @param varlist character vector, the set of variables of interest 
+#' (ensure all factor variables are converted into integers to avoid error messages)
+#' @param smp_weights the sample weight variable in the household dataset
+#' (i.e. the training data), include column of 1s (DO NOT LEAVE UNSPECIFIED)
+#' @param pop_weights the population weight variable in the census (or synthetic)
+#' census dataset (i.e. the test data), include column of 1s (NO NOT LEAVE
+#' UNSPECIFIED)
+#' 
+#' @examples 
+#' data("eusilcA_pop2")
+#' data("eusilcA_smp2")
+#' 
+#' #### set of variables used in model estimation
+#'variables <- c("gender", "eqsize", "cash", "self_empl",
+#'               "unempl_ben", "age_ben", "surv_ben",
+#'               "sick_ben", "dis_ben", "rent", "fam_allow",
+#'               "house_allow", "cap_inv", "tax_adj")
+#'
+#'### estimate a unit model
+#'emdi_model <- emdiplus::ebp(fixed = as.formula(paste("eqIncome ~ ", paste(variables, 
+#'                                                                          collapse= "+"))),
+#'                            pop_data = eusilcA_pop2, 
+#'                            pop_domains = "district", 
+#'                            smp_data = eusilcA_smp2, 
+#'                            smp_domains = "district",
+#'                            na.rm = TRUE,
+#'                            weights = "weight",
+#'                            pop_weights = "popweights",
+#'                            MSE = TRUE,
+#'                            threshold = 11000,
+#'                            B = 2,
+#'                            L = 2)
+#'
+#'### model estimation
+#'ebp_test_means(varlist = variables,
+#'               smp_weights = "weight",
+#'               pop_weights = "popweights",
+#'               smp_data = eusilcA_smp2, 
+#'               pop_data = eusilcA_pop2)
+#' 
+#'
+#' @export
+
+ebp_test_means <- function(smp_data,
+                           pop_data,
+                           varlist,
+                           smp_weights,
+                           pop_weights){
+  
+  ### get the set of complete cases of the variables as
+  ### would have been used in model estimation
+  smp_df <- smp_data[complete.cases(smp_data[,c(varlist, smp_weights)]),
+                     c(varlist, smp_weights)]
+  pop_df <- pop_data[complete.cases(pop_data[,c(varlist, pop_weights)]),
+                     c(varlist, pop_weights)]
+  
+  weighted.sd <- function(x, w){
+    
+    delta_sq <- (x - mean(x))^2 ##square deviation of the xs
+    
+    nzero_w <- (length(w[w > 0]) - 1) / length(w[w > 0])
+    
+    result <- sqrt(sum(w * (delta_sq)) / (nzero_w * sum(w)))
+    
+    return(result)
+  }
+  
+  smp_means_df <- data.frame(smp_means = apply(X = smp_df[,varlist],
+                                               MARGIN = 2,
+                                               FUN = weighted.mean,
+                                               w = smp_df[[smp_weights]]),
+                             smp_sd = apply(X = smp_df[,varlist],
+                                            MARGIN = 2,
+                                            FUN = weighted.sd,
+                                            w = smp_df[[smp_weights]]),
+                             variable = varlist)
+  
+  pop_means_df <- data.frame(pop_means = apply(X = pop_df[,varlist],
+                                               MARGIN = 2,
+                                               FUN = weighted.mean,
+                                               w = pop_df[[pop_weights]]),
+                             pop_sd = apply(X = pop_df[,varlist],
+                                            MARGIN = 2,
+                                            FUN = weighted.sd,
+                                            w = pop_df[[pop_weights]]),
+                             variable = varlist)
+  
+  means_df <- merge(smp_means_df, pop_means_df, by = "variable")
+  
+  means_df$diff_sd <- sqrt((means_df$smp_sd)^2 + (means_df$pop_sd)^2)
+  
+  means_df$diff <- means_df$pop_means - means_df$smp_means
+  
+  means_df$zscore <- means_df$diff / means_df$diff_sd
+  
+  means_df$pvalue <- 2 * (1 - pnorm(abs(means_df$zscore)))
+  
+  return(means_df[, c("variable", "smp_means", "pop_means", "diff", "pvalue")])
+  
+}
+
+#' Produce coefficient table for reporting
+#'
+#' This function takes the object of class 'ebp' to present the regression
+#' model results having specified the number of decimal places.
+#'
+#' @param ebp_object the EBP object produced from by EMDI from unit model estimation
+#' the object is of class "ebp emdi"
+#' @param decimals the number of decimals to report on coefficient estimates
+#'
+#' @examples 
+#' data("eusilcA_pop")
+#' data("eusilcA_smp")
+#' 
+#' #### set of variables used in model estimation
+#'variables <- c("gender", "eqsize", "cash", "self_empl",
+#'               "unempl_ben", "age_ben", "surv_ben",
+#'               "sick_ben", "dis_ben", "rent", "fam_allow",
+#'               "house_allow", "cap_inv", "tax_adj")
+#'
+#'### estimate a unit model
+#'emdi_model <- emdiplus::ebp(fixed = as.formula(paste("eqIncome ~ ", paste(variables, 
+#'                                                                          collapse= "+"))),
+#'                            pop_data = eusilcA_pop2, 
+#'                            pop_domains = "district", 
+#'                            smp_data = eusilcA_smp2, 
+#'                            smp_domains = "district",
+#'                            na.rm = TRUE,
+#'                            weights = "weight",
+#'                            pop_weights = "popweights",
+#'                            MSE = TRUE,
+#'                            threshold = 11000,
+#'                            B = 2,
+#'                            L = 2)
+#'
+#'ebp_reportcoef_table(emdi_model, 4)
+#'
+#' @export
+
+
+ebp_reportcoef_table <- function(ebp_object,
+                                 decimals = 3) {
+  
+  specify_decimal <- function(x, k) trimws(format(round(x, k), nsmall=k)) ### round to decimal place
+  
+  options(scipen = 999) ##drop the scientific notation
+  
+  varname_dt <- as.data.frame(rownames(coef(summary(ebp_object$model))))
+  
+  colnames(varname_dt) <- "Variable"
+  
+  coef_dt <- as.data.frame(coef(summary(ebp_object$model)))
+  
+  coef_dt <- cbind(varname_dt, coef_dt)
+  
+  coef_dt$Sig <- ifelse(coef_dt$`p-value` < 0.001, "***",
+                        ifelse(coef_dt$`p-value` < 0.05 &
+                                 coef_dt$`p-value` >= 0.001, "**",
+                               ifelse(coef_dt$`p-value` < 0.01 &
+                                        coef_dt$`p-value` >= 0.05, "*", "")))
+  
+  # coef_dt[,Sig := ifelse(`p-value` < 0.001, "***",
+  #                        ifelse(`p-value` < 0.05 & `p-value` >= 0.001, "**",
+  #                               ifelse(`p-value` < 0.01 & `p-value` >= 0.05, "*", "")))]
+  
+  coef_dt$Value <- ifelse(coef_dt$Value < abs(0.0004999999),
+                          signif(coef_dt$Value, 2),
+                          specify_decimal(coef_dt$Value, decimals))
+  
+  # coef_dt[,Value := ifelse(Value < abs(0.0004999999),
+  #                          signif(Value, 2),
+  #                          specify_decimal(Value, decimals))]
+  
+  coef_dt$StdError <- ifelse(coef_dt$Std.Error < abs(0.0004999999),
+                             signif(coef_dt$Std.Error, 2),
+                             specify_decimal(coef_dt$Std.Error, decimals))
+  
+  # coef_dt[,StdError := ifelse(Std.Error < abs(0.0004999999),
+  #                              signif(Std.Error, 2),
+  #                              specify_decimal(Std.Error, decimals))]
+  
+  coef_dt$Value <- paste0(coef_dt$Value, coef_dt$Sig)
+  
+  ### quick relabellings
+  colnames(coef_dt)[colnames(coef_dt) %in% c("Value", "StdError")] <-
+    c("coeff", "std_error")
+  
+  rownames(coef_dt) <- seq(nrow(coef_dt))
+  
+  return(coef_dt[, c("Variable", "coeff", "std_error")])
+  
+}
+
+#' Produce EBP Head Count Population/Rate by Rank
+#'
+#' This function combines the ebp object with the census data to produce report
+#' tables that rank head count estimates either by population of poor or the
+#' head count rates themselves in descending order. The function allows the user
+#' to select the first/last "x" number of areas by name as well.
+#'
+#' @param ebp_object the EBP object produced from by EMDI from unit model estimation
+#' the object is of class "ebp emdi"
+#' @param pop_data the population/census/training data
+#' @param pop_domains the target area variable within `pop_data`
+#' @param pop_domnames the population domain names
+#' @param pop_weights the population weight variable in the census
+#' @param byrank_indicator if argument is "count", the function ranks the product
+#' of Head_Count (from object of class `ebp`) and `pop_weights`, otherwise it
+#' the function simply ranks Head_Count output within `ebp` object
+#' @param number_to_list an integer, the first `number_to_list` number of
+#' target areas to produce from `byrank_indicator` ordering.
+#' @param head a logical, if `TRUE` the top `number_to_list` results will be returned
+#' and if `FALSE` the bottom `number_to_list` will be returned
+#' 
+#' @examples 
+#' data("eusilcA_pop2")
+#' data("eusilcA_smp2")
+#' 
+#' #### set of variables used in model estimation
+#'variables <- c("gender", "eqsize", "cash", "self_empl",
+#'               "unempl_ben", "age_ben", "surv_ben",
+#'               "sick_ben", "dis_ben", "rent", "fam_allow",
+#'               "house_allow", "cap_inv", "tax_adj")
+#'
+#'### estimate a unit model
+#'emdi_model <- emdiplus::ebp(fixed = as.formula(paste("eqIncome ~ ", paste(variables, 
+#'                                                                          collapse= "+"))),
+#'                            pop_data = eusilcA_pop2, 
+#'                            pop_domains = "district", 
+#'                            smp_data = eusilcA_smp2, 
+#'                            smp_domains = "district",
+#'                            na.rm = TRUE,
+#'                            weights = "weight",
+#'                            pop_weights = "popweights",
+#'                            MSE = TRUE,
+#'                            threshold = 11000,
+#'                            B = 2,
+#'                            L = 2)
+#'                            
+#'### full data of highest population below threshold by rank (descending order)
+#'ebp_report_byrank(ebp_object = emdi_model,
+#'                  pop_data = eusilcA_pop2,
+#'                  pop_domnames = "district",
+#'                  pop_weights = "popweights")
+#'
+#'### full data of highest rate below threshold by rank (descending order)                   
+#'ebp_report_byrank(ebp_object = emdi_model,
+#'                  pop_data = eusilcA_pop2, 
+#'                  pop_domains = "district",
+#'                  pop_weights = "popweights",
+#'                  byrank_indicator = "rate")
+#'
+#'### bottom 10 poverty count below threshold by rank (in ascending order)                  
+#'ebp_report_byrank(ebp_object = emdi_model,
+#'                  pop_data = eusilcA_pop2,
+#'                  pop_domains = "district",
+#'                  pop_weights = "popweights",
+#'                  number_to_list = 10,
+#'                  head = FALSE)
+#'                    
+#'                                    
+#'
+#' @export
+
+
+ebp_report_byrank <- function(ebp_object,
+                              pop_data,
+                              pop_domains,
+                              pop_weights,
+                              byrank_indicator = "count",
+                              number_to_list = NULL,
+                              head = TRUE){
+  
+  ### compute population totals
+  pop_data <- pop_data[, c(pop_domains, pop_weights)]
+  
+  result_dt <- tapply(X = pop_data[[pop_weights]],
+                      INDEX = pop_data[[pop_domains]],
+                      FUN = sum,
+                      na.rm = TRUE)
+  
+  result_dt <- as.data.frame(result_dt)
+  
+  result_dt <- data.frame(domain = rownames(result_dt),
+                          population = result_dt[[1]])
+  
+  pop_data[[pop_domains]] <- as.character(pop_data[[pop_domains]])
+  
+  ### include the EBP Head_Count
+  
+  result_dt <- merge(x = result_dt,
+                     y = ebp_object$ind[, c("Domain", "Head_Count")],
+                     by.x = "domain",
+                     by.y = "Domain")
+  
+  result_dt$poor_count <- result_dt$Head_Count * result_dt$population
+  
+  ### rank order the table as requested
+  if (byrank_indicator == "count") {
+    
+    result_dt <- result_dt[order(-result_dt$poor_count),]
+    
+  } else {
+    
+    result_dt <- result_dt[order(-result_dt$Head_Count),]
+    
+  }
+  
+  if (is.null(number_to_list)){
+    
+    number_to_list <- nrow(result_dt)
+    
+  }
+  
+  if (head == TRUE) {
+    
+    result_dt <- head(result_dt, number_to_list)
+    
+  } else if (head == FALSE) {
+    
+    result_dt <- tail(result_dt, number_to_list)
+  }
+  
+  
+  return(result_dt)
+  
+}
+
+#' Coefficient of Variation (CV) estimations for Unit EBP Model Headcount Estimates
+#' 
+#' Function \code{ebp_compute_cv} estimates CVs for the headcount of the unit model
+#' EBP functions using three different methods. CV, by definition, is the ratio of
+#' mean square error of the head count to the  head count estimates. Therefore, the 
+#' CV types are distinguished by the method of estimating the mean square. 
+#' 
+#' Method 1 uses the calibrated/naive bootstrapping of the MSE which allows to calibrate 
+#' each bootstrap sample on auxiliary information using the \code{direct} function. 
+#' Calibrated bootstrap improves on the bias of the naive bootstrap when used in the 
+#' complex survey context (see \cite{Rao and Wu (1988)}) for more details.
+#' 
+#' Method 2 employs the Horowitz Thompson variance estimation technique to compute 
+#' MSE i.e. each household is assigned the probability selection within the sample
+#' under a given sampling scheme. The computation employs \code{sae::direct} function.
+#'
+#' Method 3 finally uses the design effect adjusted naive calibrated MSE. The design
+#' effect is estimated using the \code{survey::svydesign} function.  
+#' 
+#' @param ebp_object the EBP object produced from by EMDI from unit model estimation
+#' the object is of class "ebp emdi"
+#' @param smp_data the survey/training data
+#' @param welfare the welfare aggregate variable or outcome variable of interest
+#' @param calibvar the calibration variable to be used in method 1
+#' @param domainvar the target area variable
+#' @param boot_type the bootstrap type "calibrated" or "naive" to be used in method 1
+#' @param designvar the survey design variable to be used in estimating the design 
+#' effect for method 3.
+#' @param smp_weights the weight variable
+#' @param threshold the poverty line or threshold specified
+#' 
+#' @examples 
+#' data("eusilcA_pop2")
+#' data("eusilcA_smp2")
+#' 
+#'#### set of variables used in model estimation
+#'variables <- c("gender", "eqsize", "cash", "self_empl",
+#'               "unempl_ben", "age_ben", "surv_ben",
+#'               "sick_ben", "dis_ben", "rent", "fam_allow",
+#'               "house_allow", "cap_inv", "tax_adj")
+#'
+#'### estimate a unit model
+#'emdi_model <- emdiplus::ebp(fixed = as.formula(paste("eqIncome ~ ", paste(variables, 
+#'                                                                          collapse= "+"))),
+#'                            pop_data = eusilcA_pop2, 
+#'                            pop_domains = "district", 
+#'                            smp_data = eusilcA_smp2, 
+#'                            smp_domains = "district",
+#'                            na.rm = TRUE,
+#'                            weights = "weight",
+#'                            pop_weights = "popweights",
+#'                            MSE = TRUE,
+#'                            threshold = 11000,
+#'                            B = 2,
+#'                            L = 2)
+#'                            
+#'### full data of highest population below threshold by rank (descending order)
+#'ebp_report_byrank(ebp_object = emdi_model,
+#'                  smp_data = eusilcA_smp2,
+#'                  welfare = "eqIncome",
+#'                  calibvar = "state",
+#'                  domainvar = "district",
+#'                  threshold = 11000,
+#'                  smp_weights = "weight")           
+#'                  
+#'                                    
+#'@export
+#'
+
+ebp_compute_cv <- function(ebp_object,
+                           smp_data,
+                           welfare,
+                           calibvar,
+                           domainvar,
+                           boot_type = "calibrate",
+                           designvar = NULL,
+                           threshold,
+                           smp_weights){
+  
+  ## ******************** Direct Estimate : Mean and CV ************************
+  
+  
+  ## computing direct estimate using calibrated bootstrapping (EMDI + LAEKEN) - direct CV1
+  #### first prepare the calibration matrix
+  
+  calibmatrix <- create_calibmatrix(smp_data[[calibvar]])
+  
+  
+  direct_obj <- emdi::direct(y = welfare,
+                             smp_data = ebp_object$framework$smp_data,
+                             smp_domains = domainvar,
+                             weights = smp_weights,
+                             design = designvar,
+                             threshold = threshold,
+                             var = TRUE,
+                             HT = HT, 
+                             boot_type = boot_type,
+                             X_calib = calibmatrix,
+                             totals = NULL,
+                             na.rm = TRUE)
+  
+  direct_obj$ind$Direct_Head_Count_CV <- sqrt(direct_obj$MSE$Head_Count) / direct_obj$ind$Head_Count
+  
+  ## computing direct estimate using the Horowitz Thompson (HT) indicator - direct CV2
+  ### first compute poverty rates
+  poor <- as.integer(ebp_object$framework$smp_data[[welfare]] < threshold)
+  
+  domsize_dt <- as.data.frame(tapply(ebp_object$model$data[[smp_weights]],
+                                     ebp_object$model$data[[domainvar]],
+                                     sum,
+                                     na.rm = TRUE))
+  
+  colnames(domsize_dt) <- "popsize"
+  domsize_dt$Domain <- rownames(domsize_dt)
+  
+  domsize_dt <- domsize_dt[is.na(domsize_dt$popsize) == FALSE,]
+  
+  domsize_dt <- domsize_dt[,c("Domain", "popsize")]
+  
+  ### HT estimator CV for direct estimate
+  directht_dt <- sae::direct(y = poor,
+                             dom = ebp_object$model$data[[domainvar]],
+                             sweight = ebp_object$model$data[[smp_weights]],
+                             domsize = domsize_dt)
+  
+  directht_dt$Domain <- direct_obj$ind$Domain
+  ## Compute design effect controlled direct estimates and CVs. (direct CV3)
+  #### first estimate naive bootstrap
+  #### compute design effect
+  #### include psu list into the ebp data object
+  ebp_object$model$data$domainvar <- ebp_object$model$data[[domainvar]]
+  
+  ebp_object$model$data$poor <- as.integer(ebp_object$model$data[[welfare]] > log(threshold))
+  
+  ebp_object$model$data$weights <- smp_data[[smp_weights]]
+  
+  if(is.null(designvar)){
+    
+    ebpobj_svy <- survey::svydesign(ids = ~1,
+                                    weights = ~weights,
+                                    strata = NULL,
+                                    survey.lonely.psu = "adjust",
+                                    data = ebp_object$model$data)
+    
+  } else {
+    
+    ebp_object$model$data$designvar <- ebp_object$model$data[[designvar]]
+    
+    ebpobj_svy <- survey::svydesign(ids = ~1,
+                                    weights = ~weights,
+                                    strata = ~designvar,
+                                    survey.lonely.psu = "adjust",
+                                    data = ebp_object$model$data)
+    
+  }
+  
+  
+  deff_adjust <- survey::svymean(x = ~poor, ebpobj_svy, na = TRUE, deff = TRUE)
+  deff_adjust <- attr(deff_adjust, "deff")[1,1]
+  
+  ### multiple design effect with naive calibration
+  naivevar_dt <- direct(y = welfare,
+                        smp_data = ebp_object$framework$smp_data,
+                        smp_domains = domainvar,
+                        design = designvar,
+                        weights = smp_weights,
+                        threshold = threshold,
+                        var = TRUE)
+  
+  naivevar_dt$ind$deff_CV <- 
+    sqrt(deff_adjust) * (sqrt(naivevar_dt$MSE$Head_Count) / naivevar_dt$ind$Head_Count)
+  
+  ## ************************ SAE Model Estimates and CV Estimation ****************************
+  
+  ## compute standard CV using the EMDI package estimator function
+  emdi_dt <- emdi::estimators(object = ebp_object,
+                              indicator = "Head_Count",
+                              MSE = FALSE,
+                              CV = TRUE)
+  
+  result_dt <- emdi_dt$ind
+  
+  colnames(result_dt)[colnames(result_dt) %in% c("Head_Count", 
+                                                 "Head_Count_CV")] <- 
+    c("EBP_Head_Count", "EBP_Head_Count_CV")
+  
+  direct_dt <- direct_obj$ind[, c("Domain", "Head_Count", "Direct_Head_Count_CV")]
+  
+  colnames(direct_dt)[colnames(direct_dt) %in% c("Head_Count", 
+                                                 "Direct_Head_Count_CV")] <- 
+    c("Direct_Head_Count", "CB_Head_Count_CV")
+  
+  result_dt <- merge(result_dt, 
+                     direct_dt,
+                     on = "Domain")
+  
+  direct_dt <- directht_dt[, c("Domain", "CV")]
+  
+  direct_dt$CV <- direct_dt$CV / 100
+  
+  direct_dt$Domain <- as.factor(direct_dt$Domain)
+  
+  colnames(direct_dt)[colnames(direct_dt) %in% "CV"] <- "HT_Head_Count_CV"
+  
+  result_dt <- merge(result_dt,
+                     direct_dt,
+                     on = "Domain")
+  
+  direct_dt <- naivevar_dt$ind[, c("Domain", "deff_CV")]
+  
+  colnames(direct_dt)[colnames(direct_dt) %in% "deff_CV"] <- "DesignEffect_CV"
+  
+  result_dt <- merge(result_dt,
+                     direct_dt,
+                     on = "Domain")
+  
+  result_dt <- result_dt[,c("Domain", "Direct_Head_Count", "EBP_Head_Count", 
+                            "HT_Head_Count_CV", "CB_Head_Count_CV", 
+                            "DesignEffect_CV", "EBP_Head_Count_CV")]
+  
+  
+  return(result_dt)
+  
+}
+
+
+create_calibmatrix <- function(x){
+  
+  
+  unique_obs <- unique(x)
+  
+  result <- 
+  lapply(unique_obs,
+         function(y) {
+           
+           z <- as.integer(y == x)
+           
+           return(z)
+           
+         })
+  
+  result <- do.call(cbind, result)
+  
+  colnames(result) <- unique_obs
+  
+  return(result)
+
+}
+
+
+#' Compare estimated and direct parameters in aggregate area levels
+#' 
+#' The function \code{aggregate_saedirect} computes the head count, poverty gap
+#' and gini estimates at higher levels of aggregation than that estimated by
+#' \code{ebp}. This serves to compare at a more representative level the 
+#' model estimates as well as direct survey estimates for each of the three
+#' parameters. 
+#' 
+#' @param ebp_object the EBP object produced from by EMDI from unit model estimation
+#' the object is of class "ebp emdi"
+#' @param smp_data a data.frame; the survey/training dataframe
+#' @param pop_data data.frame; the population/census dataframe
+#' @param welfare character; the welfare aggregate variable or outcome variable of interest
+#' @param pop_domains character; the domain variable within the population/census dataframe
+#' @param smp_domains character; the domain variable within the survey dataframe
+#' @param smp_weights character; the sample weight variable
+#' @param pop_weights character; the population weight variable
+#' @param threshold numeric; the poverty line or threshold specified
+#' @param indicator character; an outcome indicator of interest. Options are "Head_Count",
+#' "Gini" and "Poverty_Gap"
+#' @param in_sample logical, if TRUE only in-sample areas will be used in estimating the 
+#' aggregated EBP model estimates. Otherwise, all areas are included in estimating the means
+#' 
+#' @examples 
+#' data("eusilcA_pop2")
+#' data("eusilcA_smp2")
+#' 
+#'#### set of variables used in model estimation
+#'variables <- c("gender", "eqsize", "cash", "self_empl",
+#'               "unempl_ben", "age_ben", "surv_ben",
+#'               "sick_ben", "dis_ben", "rent", "fam_allow",
+#'               "house_allow", "cap_inv", "tax_adj")
+#'
+#'### estimate a unit model
+#'emdi_model <- emdiplus::ebp(fixed = as.formula(paste("eqIncome ~ ", paste(variables, 
+#'                                                                          collapse= "+"))),
+#'                            pop_data = eusilcA_pop2, 
+#'                            pop_domains = "district", 
+#'                            smp_data = eusilcA_smp2, 
+#'                            smp_domains = "district",
+#'                            na.rm = TRUE,
+#'                            weights = "weight",
+#'                            pop_weights = "popweights",
+#'                            MSE = TRUE,
+#'                            threshold = 11000,
+#'                            B = 2,
+#'                            L = 2)
+#'
+#'  ### estimate aggregated poverty headcounts (EBP and Direct) for regions                            
+#' aggregate_saedirect(ebp_object = emdi_model,
+#'                     smp_data = eusilcA_smp2,
+#'                     pop_data = eusilcA_pop2, 
+#'                     welfare = "eqIncome",
+#'                     smp_domains = "district",
+#'                     pop_domains = "district",
+#'                     pop_weights = "popweights",
+#'                     pop_regions = "state",
+#'                     smp_weights = "weight",
+#'                     threshold = 11000,
+#'                     indicator = "Head_Count",
+#'                     in_sample = FALSE,
+#'                     smp_regions = "state")
+#'                     
+#'  ### estimate aggregated gini (EBP and Direct) for regions                            
+#' aggregate_saedirect(ebp_object = emdi_model,
+#'                     smp_data = eusilcA_smp2,
+#'                     pop_data = eusilcA_pop2, 
+#'                     welfare = "eqIncome",
+#'                     smp_domains = "district",
+#'                     pop_domains = "district",
+#'                     pop_weights = "popweights",
+#'                     pop_regions = "state",
+#'                     smp_weights = "weight",
+#'                     threshold = 11000,
+#'                     indicator = "Gini",
+#'                     in_sample = FALSE,
+#'                     smp_regions = "state")
+#'                     
+#'  ### estimate aggregated Poverty_Gap (EBP and Direct) for regions                            
+#' aggregate_saedirect(ebp_object = emdi_model,
+#'                     smp_data = eusilcA_smp2,
+#'                     pop_data = eusilcA_pop2, 
+#'                     welfare = "eqIncome",
+#'                     smp_domains = "district",
+#'                     pop_domains = "district",
+#'                     pop_weights = "popweights",
+#'                     pop_regions = "state",
+#'                     smp_weights = "weight",
+#'                     threshold = 11000,
+#'                     indicator = "Head_Count",
+#'                     in_sample = FALSE,
+#'                     smp_regions = "state")
+#'                     
+#'  @export
+
+
+aggregate_saedirect <- function(ebp_object,
+                                smp_data,
+                                pop_data,
+                                welfare,
+                                smp_domains,
+                                pop_domains,
+                                pop_weights,
+                                pop_regions,
+                                smp_regions,
+                                smp_weights,
+                                threshold,
+                                indicator,
+                                in_sample = TRUE) {
+  
+  
+  #### compute the small area estimates at regionvar level
+  
+  ### include pop_weights and pop_regions in the ebp indicator dataset
+  pop_df <- pop_data[, c(pop_domains, pop_weights)]
+  
+  pop_df <- as.data.frame(tapply(X = pop_df[[pop_weights]],
+                                 INDEX = pop_df[[pop_domains]],
+                                 FUN = sum,
+                                 na.rm = TRUE))
+  
+  colnames(pop_df) <- "pop_weights"
+  
+  pop_df$Domain <- rownames(pop_df)
+  
+  ### add indicator for in and out of sample
+  ebp_object$ind$in_sample <- ifelse(ebp_object$ind$Domain %in% 
+                                       unique(smp_data[,smp_domains]), 
+                                     1, 
+                                     0)
+  
+  ebp_object$ind <- merge(x = ebp_object$ind,
+                          y = pop_df,
+                          by = "Domain")
+  
+  ebp_object$ind <- merge(x = ebp_object$ind,
+                          y = unique(pop_data[, c(pop_domains, 
+                                                  pop_regions)]),
+                          by.x = "Domain",
+                          by.y = pop_domains)
+  
+  
+  ### quick function to compute weighted mean since weighted.mean 
+  ### is failing for whatever reason
+  wgt.mean <- function(x, na.rm) {
+    
+    x <- x[1:length(x)/2]
+    w <- x[(length(x)/2) + 1 : length(x)]
+    
+    df <- data.frame(x = x,
+                     w = w)
+    
+    df <- df[complete.cases(df),]
+    
+    w <- df$w
+    x <- df$x
+    
+    y <- sum(w * x, na.rm = na.rm) / sum(w, na.rm = na.rm)
+    
+    return(y)
+    
+  }
+  
+  ### regional SAE computation
+  if (in_sample == TRUE){
+    
+    sae_df <- as.data.frame(tapply(X = c(ebp_object$ind[ebp_object$ind$in_sample == 1,
+                                                        indicator],
+                                         ebp_object$ind[ebp_object$ind$in_sample == 1,
+                                                        "pop_weights"]),
+                                   INDEX = rep(ebp_object$ind[ebp_object$ind$in_sample == 1,
+                                                          pop_regions], 2),
+                                   FUN = wgt.mean,
+                                   na.rm = TRUE))
+    
+  } else {
+    
+    sae_df <- as.data.frame(tapply(X = c(ebp_object$ind[[indicator]], 
+                                         ebp_object$ind[["pop_weights"]]),
+                                   INDEX = rep(ebp_object$ind[[pop_regions]], 
+                                               2),
+                                   FUN = wgt.mean,
+                                   na.rm = TRUE))
+    
+  }
+  
+  colnames(sae_df) <- paste0("EBP_", indicator)
+  
+  sae_df[[pop_regions]] <- rownames(sae_df)
+  
+  rownames(sae_df) <- NULL
+  
+  ### compute direct estimates
+  call_df <- data.frame(argument = c("Head_Count", 
+                                     "Gini", 
+                                     "Poverty_Gap"),
+                        call = c("compute_headcount", 
+                                 "compute_gini", 
+                                 "compute_gap"))
+  
+  function_call <- call_df[call_df$argument == indicator, "call"]
+  
+  
+  if (indicator %in% c("Head_Count", "Poverty_Gap")) {
+    
+    direct_df <- as.data.frame(tapply(X = c(smp_data[[welfare]],
+                                            smp_data[[smp_weights]]),
+                                      INDEX = rep(smp_data[[smp_regions]], 2),
+                                      FUN = get(function_call),
+                                      threshold = threshold))
+    
+    
+  } else {
+    
+    direct_df <- as.data.frame(tapply(X = c(smp_data[[welfare]], 
+                                            smp_data[[smp_weights]]),
+                                      INDEX = rep(smp_data[[smp_regions]], 2),
+                                      FUN = get(function_call)))
+    
+  }
+  
+  direct_df <- cbind(data.frame(rownames(direct_df)), direct_df)
+    
+  colnames(direct_df) <- c(smp_regions, paste0("Direct_", indicator))
+  
+  row.names(direct_df) <- NULL
+  
+  ## combine results
+  results_df <- merge(direct_df, sae_df, by = smp_regions)
+  
+  return(results_df)
+  
+}
+
+
+
+#### --------  internals
+
+compute_gini <- function(welfare) {
+
+  
+  welfare <- welfare[1:length(welfare)/2]
+  weight <- welfare[(length(welfare)/2) + 1 : length(welfare)]
+  
+  if (is.null(weight)){
+    weight <- rep(1, length(welfare))
+    }
+
+  df <- data.frame(welfare = welfare,
+                   weight = weight)
+
+  df <- df[complete.cases(df),]
+
+  welfare <- df$welfare
+  weight <- df$weight
+  
+  weight <- weight/sum(weight)
+  
+  order <- order(welfare)
+  welfare <- welfare[order]
+  weight <- weight[order]
+  p <- cumsum(weight)
+  nu <- cumsum(weight * welfare)
+  n <- length(nu)
+  nu <- nu/nu[n]
+  gini <- sum(nu[-1] * p[-n]) - sum(nu[-n] * p[-1])
+
+  return(gini)  
+}
+
+compute_gap <- function(welfare, threshold){
+  
+  welfare <- welfare[1:length(welfare)/2]
+  weight <- welfare[(length(welfare)/2) + 1 : length(welfare)]
+  
+  df <- data.frame(welfare = welfare,
+                   weight = weight)
+  
+  df <- df[complete.cases(df),]
+  
+  welfare <- df$welfare
+  weight <- df$weight
+  
+  
+  pov_status <- (welfare < threshold)
+  
+  relative_distance <- (1 - (welfare[pov_status] / threshold))
+  
+  weight_pov <- weight[pov_status]
+  
+  weight_total <- sum(weight)
+  
+  fgt1 <- sum(relative_distance * weight_pov) / weight_total
+  
+  return(fgt1)
+  
+}
+
+compute_headcount <- function(welfare, threshold){
+  
+  welfare <- welfare[1:length(welfare)/2]
+  weight <- welfare[(length(welfare)/2) + 1 : length(welfare)]
+  
+  df <- data.frame(welfare = welfare,
+                   weight = weight)
+  
+  df <- df[complete.cases(df),]
+  
+  welfare <- df$welfare
+  weight <- df$weight
+  
+  pov_status <- as.integer(welfare < threshold)
+  
+  fgt0 <- sum(pov_status * weight, na.rm = TRUE) / sum(weight, na.rm = TRUE)
+  
+  return(fgt0)
+  
+}
+
+
+

From e90f3df3995188e08fb216e9b3108f52f7f07794 Mon Sep 17 00:00:00 2001
From: davidn10 <david.newhouse@gmail.com>
Date: Tue, 9 May 2023 16:57:12 -0400
Subject: [PATCH 04/10] Add HT option to direct variance

---
 R/direct_estimation.R | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/R/direct_estimation.R b/R/direct_estimation.R
index 10ba4af3..b53e2c44 100644
--- a/R/direct_estimation.R
+++ b/R/direct_estimation.R
@@ -111,8 +111,7 @@ direct <- function(y,
                    X_calib = NULL, 
                    totals = NULL,
                    custom_indicator = NULL,
-                   na.rm = FALSE,
-                   HT = FALSE){
+                   na.rm = FALSE){
   
   smp_data <- as.data.frame(smp_data)
   

From 0b9a7bb8cbecc8c3c398a70854a81001bea3857a Mon Sep 17 00:00:00 2001
From: davidn10 <david.newhouse@gmail.com>
Date: Tue, 9 May 2023 17:08:16 -0400
Subject: [PATCH 05/10] Update prepost_estimation_helpers.R

---
 R/prepost_estimation_helpers.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/prepost_estimation_helpers.R b/R/prepost_estimation_helpers.R
index b89e867b..c32e73b2 100644
--- a/R/prepost_estimation_helpers.R
+++ b/R/prepost_estimation_helpers.R
@@ -73,6 +73,7 @@ ebp_reportdescriptives <- function(ebp_object,
                                    threshold,
                                    pop_domains,
                                    smp_domains,
+                                   design = NULL,
                                    HT = TRUE){
   
   ###get list of variables
@@ -147,7 +148,7 @@ ebp_reportdescriptives <- function(ebp_object,
   naivevar_dt <- direct(y = welfare,
                         smp_data = ebp_object$framework$smp_data,
                         smp_domains = smp_domains,
-                        design = designvar,
+                        design = design,
                         weights = smp_weights,
                         threshold = threshold,
                         var = TRUE)

From a8f6311c79d06239a7475c2137157bd5607cefc9 Mon Sep 17 00:00:00 2001
From: davidn10 <david.newhouse@gmail.com>
Date: Tue, 9 May 2023 17:19:27 -0400
Subject: [PATCH 06/10] Update prepost_estimation_helpers.R

---
 R/prepost_estimation_helpers.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/prepost_estimation_helpers.R b/R/prepost_estimation_helpers.R
index c32e73b2..cad0039f 100644
--- a/R/prepost_estimation_helpers.R
+++ b/R/prepost_estimation_helpers.R
@@ -642,7 +642,7 @@ ebp_compute_cv <- function(ebp_object,
                            calibvar,
                            domainvar,
                            boot_type = "calibrate",
-                           designvar = NULL,
+                           design = NULL,
                            threshold,
                            smp_weights){
   
@@ -659,7 +659,7 @@ ebp_compute_cv <- function(ebp_object,
                              smp_data = ebp_object$framework$smp_data,
                              smp_domains = domainvar,
                              weights = smp_weights,
-                             design = designvar,
+                             design = design,
                              threshold = threshold,
                              var = TRUE,
                              HT = HT, 

From c6b99b3cc237598b4e4a6c0676d3cd763a760dae Mon Sep 17 00:00:00 2001
From: davidn10 <david.newhouse@gmail.com>
Date: Tue, 9 May 2023 17:24:28 -0400
Subject: [PATCH 07/10] Update prepost_estimation_helpers.R

---
 R/prepost_estimation_helpers.R | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/R/prepost_estimation_helpers.R b/R/prepost_estimation_helpers.R
index cad0039f..75a5ac47 100644
--- a/R/prepost_estimation_helpers.R
+++ b/R/prepost_estimation_helpers.R
@@ -593,7 +593,7 @@ ebp_report_byrank <- function(ebp_object,
 #' @param calibvar the calibration variable to be used in method 1
 #' @param domainvar the target area variable
 #' @param boot_type the bootstrap type "calibrated" or "naive" to be used in method 1
-#' @param designvar the survey design variable to be used in estimating the design 
+#' @param design the survey design variable to be used in estimating the design 
 #' effect for method 3.
 #' @param smp_weights the weight variable
 #' @param threshold the poverty line or threshold specified
@@ -703,7 +703,7 @@ ebp_compute_cv <- function(ebp_object,
   
   ebp_object$model$data$weights <- smp_data[[smp_weights]]
   
-  if(is.null(designvar)){
+  if(is.null(design)){
     
     ebpobj_svy <- survey::svydesign(ids = ~1,
                                     weights = ~weights,
@@ -713,11 +713,11 @@ ebp_compute_cv <- function(ebp_object,
     
   } else {
     
-    ebp_object$model$data$designvar <- ebp_object$model$data[[designvar]]
+    ebp_object$model$data$design <- ebp_object$model$data[[design]]
     
     ebpobj_svy <- survey::svydesign(ids = ~1,
                                     weights = ~weights,
-                                    strata = ~designvar,
+                                    strata = ~design,
                                     survey.lonely.psu = "adjust",
                                     data = ebp_object$model$data)
     
@@ -731,7 +731,7 @@ ebp_compute_cv <- function(ebp_object,
   naivevar_dt <- direct(y = welfare,
                         smp_data = ebp_object$framework$smp_data,
                         smp_domains = domainvar,
-                        design = designvar,
+                        design = design,
                         weights = smp_weights,
                         threshold = threshold,
                         var = TRUE)

From 8efcaf6ec4f6e36150e9e356d86f9d3a517c128d Mon Sep 17 00:00:00 2001
From: davidn10 <david.newhouse@gmail.com>
Date: Wed, 10 May 2023 10:11:11 -0400
Subject: [PATCH 08/10] edits to ebp_reportdescriptives

---
 R/prepost_estimation_helpers.R | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/R/prepost_estimation_helpers.R b/R/prepost_estimation_helpers.R
index 75a5ac47..c491e444 100644
--- a/R/prepost_estimation_helpers.R
+++ b/R/prepost_estimation_helpers.R
@@ -197,14 +197,15 @@ ebp_reportdescriptives <- function(ebp_object,
   #### ----------------- add other elements of the table ----------------- ####
   ##### compute number of households in census and survey
   basic_df <-
-    data.frame(indicator = c("Number of Units", "Number of Regions",
+    data.frame(indicator = c("Number of Units","Number of Regions",
                              "Number of Target Areas"),
-               census = c(round(sum(pop_df[[pop_weights]], na.rm = TRUE)),
+               census = c(length(pop_df[[pop_weights]][is.na(pop_df[[pop_weights]]) == FALSE]), 
                           length(unique(pop_df[[repvar]][is.na(pop_df[[repvar]]) == FALSE])),
                           length(unique(pop_df[[pop_domains]][is.na(pop_df[[smp_domains]]) == FALSE]))),
                survey = c(ebp_object$framework$N_smp,
                           length(unique(smp_df[[repvar]][is.na(smp_df[[repvar]]) == FALSE])),
                           length(unique(smp_df[[smp_domains]][is.na(smp_df[[smp_domains]]) == FALSE]))))
+  #round(sum(pop_df[[pop_weights]], na.rm = TRUE)),
   
   basic_df$census <- as.integer(basic_df$census)
   basic_df$survey <- as.integer(basic_df$survey)

From d7f55873d6511b4557e51b6569c0a702d5e8d49a Mon Sep 17 00:00:00 2001
From: Ifeanyi Edochie <ifeanyi.edochie@gmail.com>
Date: Wed, 10 May 2023 10:26:26 -0400
Subject: [PATCH 09/10] Update prepost_estimation_helpers.R

just quick fix
---
 R/prepost_estimation_helpers.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/R/prepost_estimation_helpers.R b/R/prepost_estimation_helpers.R
index c491e444..f2c3252b 100644
--- a/R/prepost_estimation_helpers.R
+++ b/R/prepost_estimation_helpers.R
@@ -190,9 +190,9 @@ ebp_reportdescriptives <- function(ebp_object,
                                FUN = sum,
                                na.rm = TRUE),
                direct_cv = tapply(X = df$Direct_Head_Count_CV * df$smp_weights,
-                               INDEX = df[[repvar]],
-                               FUN = sum,
-                               na.rm = TRUE))
+                                  INDEX = df[[repvar]],
+                                  FUN = sum,
+                                  na.rm = TRUE))
   
   #### ----------------- add other elements of the table ----------------- ####
   ##### compute number of households in census and survey

From a543f9a331a7bd6be28f28b0c943c351770a867e Mon Sep 17 00:00:00 2001
From: davidn10 <david.newhouse@gmail.com>
Date: Wed, 10 May 2023 15:14:12 -0400
Subject: [PATCH 10/10] Update prepost_estimation_helpers.R

add pop domain to pop df
---
 R/prepost_estimation_helpers.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/prepost_estimation_helpers.R b/R/prepost_estimation_helpers.R
index c491e444..bc603a20 100644
--- a/R/prepost_estimation_helpers.R
+++ b/R/prepost_estimation_helpers.R
@@ -84,7 +84,7 @@ ebp_reportdescriptives <- function(ebp_object,
   smp_df <- smp_data[complete.cases(smp_data[,hh_varlist]), 
                      c(hh_varlist, repvar, smp_weights)]
   pop_df <- pop_data[complete.cases(pop_data[,pop_varlist]),
-                     c(pop_varlist, repvar, pop_weights)]
+                     c(pop_varlist, repvar, pop_weights,pop_domains)]