From faf53edd6d69a75d7fed30b323c18f94ae59cd70 Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Tue, 30 Sep 2025 22:59:29 +0530 Subject: [PATCH] improve documentation --- R/compute_fgt_new.R | 18 +++----------- R/duckdb_func.R | 17 +++++-------- R/fg_pip.R | 11 --------- R/pip.R | 9 ------- R/pip_grp_logic.R | 12 --------- R/rg_pip.R | 8 ++---- R/utils.R | 51 +-------------------------------------- man/compute_fgt_dt.Rd | 4 +-- man/is_empty.Rd | 2 +- man/load_inter_cache.Rd | 2 +- man/map_fgt.Rd | 2 +- man/pip_agg.Rd | 6 ++--- man/pipapi-package.Rd | 2 +- man/return_if_exists.Rd | 2 +- man/rg_pip.Rd | 2 +- man/ui_hp_countries.Rd | 5 +++- man/ui_pc_charts.Rd | 5 +++- man/ui_pc_regional.Rd | 5 +++- man/update_master_file.Rd | 2 +- 19 files changed, 35 insertions(+), 130 deletions(-) diff --git a/R/compute_fgt_new.R b/R/compute_fgt_new.R index 6c68b139..69d27be7 100644 --- a/R/compute_fgt_new.R +++ b/R/compute_fgt_new.R @@ -1,7 +1,7 @@ # OLD APPROACH WITH MEAN -------------- -# Efficient FGT calculation for a data.table and vector of poverty lines -#' Title +#' Efficient FGT calculation for a data.table and vector of poverty lines +#' #' #' @param dt data frame with `welfare` and `weight` columns #' @param welfare character: welfare variable name @@ -76,8 +76,6 @@ compute_fgt_dt <- function(dt, welfare, weight, povlines, mean_and_med = FALSE) } - - #' Efficient FGT calculation for vectors (No data.table) #' #' @param w character: welfare variable name @@ -125,7 +123,6 @@ compute_fgt <- function(w, wt, povlines) { } } - data.table( povline = povlines, headcount = res[, 1], @@ -166,8 +163,6 @@ DT_fgt_by_rl <- \(x, y, nx, povline) { )] } - - #' jkoin reporting level and lt list into one data.table #' #' @rdname map_fgt @@ -196,11 +191,10 @@ map_lt_to_dt <- \(lt, l_rl_rows, povline) { rbindlist(fill = TRUE) } - #' map over list of data.tables and indices to compute FGT by reporting_level #' #' @param lt list of data.tables with welfare and weight data -#' @param l_rl_rows list of indeces +#' @param l_rl_rows list of indices #' #' @return data.table with all measured #' @keywords internal @@ -273,8 +267,6 @@ load_data_list <- \(metadata) { } - - pov_from_DT <- function(DT, povline, g, cores = 1) { w <- DT$welfare wt <- DT$weight @@ -322,10 +314,6 @@ pov_from_DT <- function(DT, povline, g, cores = 1) { out } - - - - # pov_from_DT2 <- function(DT, povline, g) { # fgt0 <- numeric(length(povline)) # fgt1 <- numeric(length(povline)) diff --git a/R/duckdb_func.R b/R/duckdb_func.R index 12b47e80..edbfc107 100644 --- a/R/duckdb_func.R +++ b/R/duckdb_func.R @@ -2,7 +2,7 @@ #' #' @inheritParams subset_lkup #' -#' @return Dataframe +#' @return list with 3 elements data_present_in_master, modified `lkup` value and `povline` #' @export return_if_exists <- function(slkup, povline, @@ -200,7 +200,7 @@ return_if_exists <- function(slkup, #' @param dat Dataframe to be appended #' @param cache_file_path path where cache file is saved #' -#' @return number of rows updated +#' @return a number i.e no. of rows updated #' @export #' update_master_file <- function(dat, @@ -291,7 +291,7 @@ update_master_file <- function(dat, ")) duckdb::dbDisconnect(write_con) - + if (nr > 0 && verbose) message(glue("{target_file} is updated.")) return(nr) @@ -362,7 +362,7 @@ reset_cache <- function(pass = Sys.getenv('PIP_CACHE_LOCAL_KEY'), DBI::dbExecute(write_con, "DELETE from fg_master_file") } duckdb::dbDisconnect(write_con) - + } create_duckdb_file <- function(cache_file_path) { @@ -388,17 +388,14 @@ create_duckdb_file <- function(cache_file_path) { watts DOUBLE )") DBI::dbDisconnect(con) - -} - - +} #' Load Intermediate cache data #' #' @inheritParams return_if_exists #' -#' @return data frame +#' @return cached data frame #' @export load_inter_cache <- function(lkup = NULL, cache_file_path = NULL, @@ -423,8 +420,6 @@ load_inter_cache <- function(lkup = NULL, # connection object if it is not closed More details here # https://app.clickup.com/t/868cdpe3q duckdb::dbDisconnect(con) - - setDT(master_file) } diff --git a/R/fg_pip.R b/R/fg_pip.R index 70111a53..cb305899 100644 --- a/R/fg_pip.R +++ b/R/fg_pip.R @@ -192,7 +192,6 @@ fg_pip <- function(country, } - #' Remove duplicated rows created during the interpolation process #' #' @param df data.table: Table of results created in `fg_pip()` @@ -239,8 +238,6 @@ fg_remove_duplicates <- function(df, # Ensure that out does not have duplicates df <- unique(df) } - - return(df) } @@ -277,11 +274,6 @@ fg_assign_nas_values_to_dup_cols <- function(df, return(df) } - - - - - #' Create full list for fg data load, not including country-years in cache #' #' @param metadata data table from subset_lkup()$lkup @@ -292,6 +284,3 @@ create_full_list <- function(metadata) { funique() } - - - diff --git a/R/pip.R b/R/pip.R index 44a4d556..be81d7b9 100644 --- a/R/pip.R +++ b/R/pip.R @@ -1,5 +1,3 @@ - - #' Compute PIP statistics #' #' Compute the main PIP poverty and inequality statistics. @@ -97,10 +95,3 @@ pip <- function(country = "ALL", out } - - - - - - - diff --git a/R/pip_grp_logic.R b/R/pip_grp_logic.R index cc23d253..5fadf427 100644 --- a/R/pip_grp_logic.R +++ b/R/pip_grp_logic.R @@ -2,10 +2,6 @@ #' @rdname pip_agg #' #' @return data.table -#' @examples -#' \dontrun{ -#' # Create lkups -#' } #' @export pip_grp_logic <- function(country = "ALL", year = "ALL", @@ -243,20 +239,14 @@ pip_grp_logic <- function(country = "ALL", } else { ret <- de } - - # add new estimate type - ret <- estimate_type_var(ret,lkup) - - # Censor regional values ----------- We are not censoring at this stage # anymore because we need to show al the years in the homre page, including # nowcast. we are now filtering at the UI and wrappers levels # if (censor) { # ret <- censor_rows(ret, lkup[["censored"]], type = "regions") # } - data.table::setcolorder(ret, names_grp) # Select columns @@ -266,8 +256,6 @@ pip_grp_logic <- function(country = "ALL", #Order rows by country code and reporting year setorder(ret, region_code , reporting_year) - - # ____________________________________________________________________ # Return #### return(ret) diff --git a/R/rg_pip.R b/R/rg_pip.R index a1cce53c..6cd651be 100644 --- a/R/rg_pip.R +++ b/R/rg_pip.R @@ -3,7 +3,7 @@ #' Compute the main PIP poverty and inequality statistics for survey years. #' #' @inheritParams pip -#' @return data.frame +#' @return list of 2 data.frames, main_data and data_in_cache #' @keywords internal rg_pip <- function(country, year, @@ -78,7 +78,7 @@ rg_pip <- function(country, res <- lapply(lt, process_dt, povline = povline) } rm(lt) - + res <- rbindlist(res, fill = TRUE) @@ -100,9 +100,5 @@ rg_pip <- function(country, setnames(out, "povline", "poverty_line") - return(list(main_data = out, data_in_cache = data_present_in_master)) } - - - diff --git a/R/utils.R b/R/utils.R index 7c42de11..ebe6aab3 100644 --- a/R/utils.R +++ b/R/utils.R @@ -81,7 +81,6 @@ lkup_filter <- function(lkup, return(lkup) } - #' select_country #' Helper function for subset_lkup() #' @inheritParams subset_lkup @@ -116,9 +115,6 @@ select_country <- function(lkup, keep, country, valid_regions) { return(keep) } - - - #' select_years #' Helper function for subset_lkup() #' @inheritParams subset_lkup @@ -212,7 +208,6 @@ select_years <- function(lkup, return(keep) } - #' Helper to filter metadata #' aggregate distribution need to be filtered out when popshare is not null #' This is a temporary function until a full fix is implemented, and popshare is @@ -236,7 +231,6 @@ filter_lkup <- function(metadata, } - #' helper function to correctly filter look up table according to requested #' reporting level #' @@ -322,7 +316,6 @@ get_svy_data <- function(svy_id, return(out) } - #' Add pre-computed distributional stats #' #' @param df data.table: Data frame of poverty statistics @@ -374,13 +367,9 @@ add_dist_stats <- function(df, lkup, fill_gaps) { allow.cartesian = TRUE ] } - - df } - - #' Add pre-computed distributional stats #' #' @param df data.table: Data frame of poverty statistics @@ -583,10 +572,6 @@ estimate_type_ctr_lnp <- function(out, lkup) { } - - - - #' Create query controls #' @param syv_lkup data.table: Survey lkup table #' @param ref_lkup data.table: Reference lkup table @@ -800,7 +785,6 @@ convert_empty <- function(string) { } } - #' Subset country-years table #' This is a table created at start time to facilitate imputations #' It part of the interpolated_list object @@ -898,10 +882,9 @@ clear_cache <- function(cd) { }) } - #' Test whether a vector is length zero and IS not NULL #' -#' @param x Value to be passed +#' @param x Vector to be passed #' #' @return logical. TRUE if x is empty but it is not NULL #' @export @@ -921,9 +904,6 @@ is_empty <- function(x) { } } - - - #' Populate list in parent frame #' #' Fill in maned objects of a list with the value of named objects in the @@ -1011,7 +991,6 @@ get_valid_aux_long_format_tables <- function() { c('cpi', 'ppp', 'gdp', 'pce', 'pop') } - #' load SPR table from aux data #' #' If there is no data available, return an empty data.frame @@ -1109,10 +1088,6 @@ get_pg_table <- function(data_dir, return(pg) } - - - - #' Add Prosperity Gap #' #' @param df data frame inside [fg_pip] or [rg_pip] @@ -1143,8 +1118,6 @@ add_pg <- function(df, fill_gaps, data_dir) { ] } - - #' Add Distribution type #' #' @param df data frame from [fg_pip] or [rg_pip] @@ -1192,8 +1165,6 @@ add_distribution_type <- function(df, lkup, fill_gaps) { verbose = 0 ) - - if (fill_gaps) { # line up years ---------- @@ -1319,11 +1290,6 @@ add_spl <- function(df, fill_gaps, data_dir) { return(invisible(out)) } - - - - - #' Add Aggregate medians #' #' @param df data frame from either [fg_pip] or [rg_pip] @@ -1334,8 +1300,6 @@ add_spl <- function(df, fill_gaps, data_dir) { #' @return data.table add_agg_medians <- function(df, fill_gaps, data_dir) { - - if (fill_gaps) { table = "spr_lnp" # set all lines up medians to NA. @@ -1366,8 +1330,6 @@ add_agg_medians <- function(df, fill_gaps, data_dir) { } - - #' Get functions names in call stack #' #' @return character vector of calls @@ -1434,10 +1396,6 @@ get_caller_names <- function() { invisible(caller_names) } - - - - #' Add all the variables that are estimated outside the pipelines #' #' This includes variables such as the SPL, SPR, PG, and distribution @@ -1501,11 +1459,6 @@ unnest_dt_longer <- function(tbl, cols) { tbl } - - - - - #' merge into fgt table the mean and median from dist stats table in lkup #' #' @param fgt data,table with fgt measures @@ -1542,5 +1495,3 @@ get_mean_median <- \(fgt, lkup, fill_gaps) { validate = "m:1", # multiple povlines verbose = 0L) } - - diff --git a/man/compute_fgt_dt.Rd b/man/compute_fgt_dt.Rd index ac27c0f1..14f217cb 100644 --- a/man/compute_fgt_dt.Rd +++ b/man/compute_fgt_dt.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/compute_fgt_new.R \name{compute_fgt_dt} \alias{compute_fgt_dt} -\title{Title} +\title{Efficient FGT calculation for a data.table and vector of poverty lines} \usage{ compute_fgt_dt(dt, welfare, weight, povlines, mean_and_med = FALSE) } @@ -19,6 +19,6 @@ compute_fgt_dt(dt, welfare, weight, povlines, mean_and_med = FALSE) data.table with estimates poverty estimates } \description{ -Title +Efficient FGT calculation for a data.table and vector of poverty lines } \keyword{internal} diff --git a/man/is_empty.Rd b/man/is_empty.Rd index 0058b4f0..573ac625 100644 --- a/man/is_empty.Rd +++ b/man/is_empty.Rd @@ -7,7 +7,7 @@ is_empty(x) } \arguments{ -\item{x}{Value to be passed} +\item{x}{Vector to be passed} } \value{ logical. TRUE if x is empty but it is not NULL diff --git a/man/load_inter_cache.Rd b/man/load_inter_cache.Rd index 74adcfa5..3bce13d0 100644 --- a/man/load_inter_cache.Rd +++ b/man/load_inter_cache.Rd @@ -13,7 +13,7 @@ load_inter_cache(lkup = NULL, cache_file_path = NULL, fill_gaps = FALSE) values for missing years} } \value{ -data frame +cached data frame } \description{ Load Intermediate cache data diff --git a/man/map_fgt.Rd b/man/map_fgt.Rd index e4d1c224..eb29239e 100644 --- a/man/map_fgt.Rd +++ b/man/map_fgt.Rd @@ -24,7 +24,7 @@ map_fgt(lt, l_rl_rows, povline) \item{lt}{list of data.tables with welfare and weight data} -\item{l_rl_rows}{list of indeces} +\item{l_rl_rows}{list of indices} } \value{ data.table with all measured diff --git a/man/pip_agg.Rd b/man/pip_agg.Rd index f47dbba8..51417e2e 100644 --- a/man/pip_agg.Rd +++ b/man/pip_agg.Rd @@ -40,7 +40,8 @@ pip_grp_new( reporting_level = c("all", "national"), lkup, censor = TRUE, - additional_ind = FALSE + additional_ind = FALSE, + lkup_hash = lkup$cache_data_id$hash_pip_grp ) } \arguments{ @@ -82,7 +83,4 @@ New way to estimate Aggregate data \dontrun{ # Create lkups } -\dontrun{ -# Create lkups -} } diff --git a/man/pipapi-package.Rd b/man/pipapi-package.Rd index 8d3ec80f..99b9458f 100644 --- a/man/pipapi-package.Rd +++ b/man/pipapi-package.Rd @@ -18,7 +18,7 @@ Useful links: } \author{ -\strong{Maintainer}: R.Andrés Castañeda \email{acastanedaa@worldbank.org} +\strong{Maintainer}: R.Andrés Castañeda \email{acastanedaa@worldbank.org} Authors: \itemize{ diff --git a/man/return_if_exists.Rd b/man/return_if_exists.Rd index bc763065..a5e91d0f 100644 --- a/man/return_if_exists.Rd +++ b/man/return_if_exists.Rd @@ -21,7 +21,7 @@ return_if_exists( values for missing years} } \value{ -Dataframe +list with 3 elements data_present_in_master, modified \code{lkup} value and \code{povline} } \description{ Return the rows of the table if they exist in master file diff --git a/man/rg_pip.Rd b/man/rg_pip.Rd index 4a4f1881..4607fc53 100644 --- a/man/rg_pip.Rd +++ b/man/rg_pip.Rd @@ -34,7 +34,7 @@ poverty line} \item{lkup}{list: A list of lkup tables} } \value{ -data.frame +list of 2 data.frames, main_data and data_in_cache } \description{ Compute the main PIP poverty and inequality statistics for survey years. diff --git a/man/ui_hp_countries.Rd b/man/ui_hp_countries.Rd index 2ffcf443..50f19b60 100644 --- a/man/ui_hp_countries.Rd +++ b/man/ui_hp_countries.Rd @@ -8,7 +8,8 @@ ui_hp_countries( country = c("IDN", "CIV"), povline = 1.9, pop_units = 1e+06, - lkup + lkup, + lkup_hash = lkup$cache_data_id$hash_pip ) } \arguments{ @@ -20,6 +21,8 @@ ui_hp_countries( to million)} \item{lkup}{list: A list of lkup tables} + +\item{lkup_hash}{character: hash of pip} } \value{ data.table diff --git a/man/ui_pc_charts.Rd b/man/ui_pc_charts.Rd index 9801ecfe..190c3040 100644 --- a/man/ui_pc_charts.Rd +++ b/man/ui_pc_charts.Rd @@ -14,7 +14,8 @@ ui_pc_charts( reporting_level = c("all", "national", "rural", "urban"), pop_units = 1e+06, censor = TRUE, - lkup + lkup, + lkup_hash = lkup$cache_data_id$hash_pip_grp ) } \arguments{ @@ -40,6 +41,8 @@ to million)} \item{censor}{logical: Triggers censoring of country/year statistics} \item{lkup}{list: A list of lkup tables} + +\item{lkup_hash}{character: hash of pip} } \value{ data.table diff --git a/man/ui_pc_regional.Rd b/man/ui_pc_regional.Rd index 083e418b..8c414601 100644 --- a/man/ui_pc_regional.Rd +++ b/man/ui_pc_regional.Rd @@ -9,7 +9,8 @@ ui_pc_regional( year = "ALL", povline = 1.9, pop_units = 1e+06, - lkup + lkup, + lkup_hash = lkup$cache_data_id$hash_pip_grp ) } \arguments{ @@ -23,6 +24,8 @@ ui_pc_regional( to million)} \item{lkup}{list: A list of lkup tables} + +\item{lkup_hash}{character: hash of pip} } \value{ data.table diff --git a/man/update_master_file.Rd b/man/update_master_file.Rd index 968d3de4..b27eed63 100644 --- a/man/update_master_file.Rd +++ b/man/update_master_file.Rd @@ -21,7 +21,7 @@ update_master_file( values for missing years} } \value{ -number of rows updated +a number i.e no. of rows updated } \description{ Update master file with the contents of the dataframe