From 0beb12c8ebe0f875f55b1a514a84c7ae011ead3d Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Thu, 12 Dec 2024 06:46:21 +0530 Subject: [PATCH 01/49] first draft --- .gitignore | 1 + R/pip.R | 305 ++++++++++++++++++++++++++++------------------------- 2 files changed, 161 insertions(+), 145 deletions(-) diff --git a/.gitignore b/.gitignore index b1204fe3..52b9f47f 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,4 @@ tests/testdata/app_data/ docs logs/ /sessionInfoLog +demo.duckdb diff --git a/R/pip.R b/R/pip.R index c71fdb7f..2378f4da 100644 --- a/R/pip.R +++ b/R/pip.R @@ -103,175 +103,190 @@ pip <- function(country = "ALL", valid_years = lkup$valid_years, aux_files = lkup$aux_files ) - - # mains estimates --------------- - if (fill_gaps) { - ## lineup years----------------- - out <- fg_pip( - country = lcv$est_ctrs, - year = year, - povline = povline, - popshare = popshare, - welfare_type = welfare_type, - reporting_level = reporting_level, - ppp = ppp, - lkup = lkup - ) - } else { - ## survey years ------------------ - out <- rg_pip( - country = lcv$est_ctrs, - year = year, - povline = povline, - popshare = popshare, - welfare_type = welfare_type, - reporting_level = reporting_level, - ppp = ppp, - lkup = lkup - ) - } - - # Eary return for empty table--------------- - if (nrow(out) == 0) { - return(out) - } - - # aggregate distributions ------------------ - if (reporting_level %in% c("national", "all")) { - out <- add_agg_stats( - df = out, - return_cols = lkup$return_cols$ag_average_poverty_stats + # lcv$est_ctrs has all the country_code that we are interested in + # Integrate return_if_exists for following scenario + # 1) country = "AGO" year = 2000 pl = 1.9 should return from master file + # 2) country = "AGO" year = 2019 pl = 1.9 should return pip call + # 3) country = c("CHN", "IND"), year = 2019, 2017 should return half from master file and half from pip call + con <- duckdb::dbConnect(duckdb::duckdb(), dbdir = "demo.duckdb") + result_from_cache <- pipfun::return_if_exists(country, year, povline, con) + # This initialization is necessary for rowbind at the end if all the data is present in cache + out <- NULL + # only run pip code if there is data that is not present in cache + if(nrow(result_from_cache$absent_args) > 0) { + # use result_from_cache$absent_args$country_code reporting_year and poverty_line and pass it further. + + # mains estimates --------------- + if (fill_gaps) { + ## lineup years----------------- + out <- fg_pip( + country = result_from_cache$absent_args$country_code, + year = result_from_cache$absent_args$reporting_year, + povline = result_from_cache$absent_args$povline, + popshare = popshare, + welfare_type = welfare_type, + reporting_level = reporting_level, + ppp = ppp, + lkup = lkup + ) + } else { + ## survey years ------------------ + out <- rg_pip( + country = result_from_cache$absent_args$country_code, + year = result_from_cache$absent_args$reporting_year, + povline = result_from_cache$absent_args$poverty_line, + popshare = popshare, + welfare_type = welfare_type, + reporting_level = reporting_level, + ppp = ppp, + lkup = lkup ) - if (reporting_level == "national") { - out <- out[reporting_level == "national"] } - } - - # Add extra variables -------------- - # ## Add SPL and SPR --------------- - # out <- add_spl(df = out, - # fill_gaps = fill_gaps, - # data_dir = lkup$data_root) - # - # ## Add prosperity Gap ----------- - # - # out <- add_pg(df = out, - # fill_gaps = fill_gaps, - # data_dir = lkup$data_root) - # - # ## add distribution type ------------- - # # based on info in framework data, rather than welfare data - # add_distribution_type(df = out, - # lkup = lkup, - # fill_gaps = fill_gaps) + # Eary return for empty table--------------- + if (nrow(out) == 0) { + return(out) + } - add_vars_out_of_pipeline(out, fill_gaps = fill_gaps, lkup = lkup) + # aggregate distributions ------------------ + if (reporting_level %in% c("national", "all")) { + out <- add_agg_stats( + df = out, + return_cols = lkup$return_cols$ag_average_poverty_stats + ) + if (reporting_level == "national") { + out <- out[reporting_level == "national"] + } + } + # Add extra variables -------------- + + # ## Add SPL and SPR --------------- + # out <- add_spl(df = out, + # fill_gaps = fill_gaps, + # data_dir = lkup$data_root) + # + # ## Add prosperity Gap ----------- + # + # out <- add_pg(df = out, + # fill_gaps = fill_gaps, + # data_dir = lkup$data_root) + # + # ## add distribution type ------------- + # # based on info in framework data, rather than welfare data + # add_distribution_type(df = out, + # lkup = lkup, + # fill_gaps = fill_gaps) + + add_vars_out_of_pipeline(out, fill_gaps = fill_gaps, lkup = lkup) + + + + + # **** TO BE REMOVED **** REMOVAL STARTS HERE + # Once `pip-grp` has been integrated in ingestion pipeline + # Handles grouped aggregations + if (group_by != "none") { + # Handle potential (insignificant) difference in poverty_line values that + # may mess-up the grouping + out$poverty_line <- povline + + out <- pip_aggregate_by( + df = out, + group_lkup = lkup[["pop_region"]], + return_cols = lkup$return_cols$pip_grp + ) + # Censor regional values + if (censor) { + out <- censor_rows(out, lkup[["censored"]], type = "regions") + } + + out <- out[, c("region_name", + "region_code", + "reporting_year", + "reporting_pop", + "poverty_line", + "headcount", + "poverty_gap", + "poverty_severity", + "watts", + "mean", + "pop_in_poverty")] + + return(out) + } + # **** TO BE REMOVED **** REMOVAL ENDS HERE + # pre-computed distributional stats --------------- + crr_names <- names(out) # current variables + names2keep <- lkup$return_cols$pip$cols # all variables - # **** TO BE REMOVED **** REMOVAL STARTS HERE - # Once `pip-grp` has been integrated in ingestion pipeline - # Handles grouped aggregations - if (group_by != "none") { - # Handle potential (insignificant) difference in poverty_line values that - # may mess-up the grouping - out$poverty_line <- povline - - out <- pip_aggregate_by( - df = out, - group_lkup = lkup[["pop_region"]], - return_cols = lkup$return_cols$pip_grp + out <- add_dist_stats( + df = out, + dist_stats = lkup[["dist_stats"]] ) - # Censor regional values - if (censor) { - out <- censor_rows(out, lkup[["censored"]], type = "regions") - } - out <- out[, c("region_name", - "region_code", - "reporting_year", - "reporting_pop", - "poverty_line", - "headcount", - "poverty_gap", - "poverty_severity", - "watts", - "mean", - "pop_in_poverty")] - - return(out) - } - # **** TO BE REMOVED **** REMOVAL ENDS HERE + # Add aggregate medians ---------------- + out <- add_agg_medians( + df = out, + fill_gaps = fill_gaps, + data_dir = lkup$data_root + ) + # format ---------------- - # pre-computed distributional stats --------------- - crr_names <- names(out) # current variables - names2keep <- lkup$return_cols$pip$cols # all variables - out <- add_dist_stats( - df = out, - dist_stats = lkup[["dist_stats"]] - ) + if (fill_gaps) { - # Add aggregate medians ---------------- - out <- add_agg_medians( - df = out, - fill_gaps = fill_gaps, - data_dir = lkup$data_root - ) + ## Inequality indicators to NA for lineup years ---- + dist_vars <- names2keep[!(names2keep %in% crr_names)] + out[, + (dist_vars) := NA_real_] - # format ---------------- + ## estimate_var ----- + out <- estimate_type_ctr_lnp(out, lkup) + } else { + out[, estimate_type := NA_character_] + } + ## Handle survey coverage ------------ + if (reporting_level != "all") { + keep <- out$reporting_level == reporting_level + out <- out[keep, ] + } - if (fill_gaps) { + # Censor country values + if (censor) { + out <- censor_rows(out, lkup[["censored"]], type = "countries") + } - ## Inequality indicators to NA for lineup years ---- - dist_vars <- names2keep[!(names2keep %in% crr_names)] - out[, - (dist_vars) := NA_real_] - ## estimate_var ----- - out <- estimate_type_ctr_lnp(out, lkup) + # Select columns + if (additional_ind) { + get_additional_indicators(out) + added_names <- attr(out, "new_indicators_names") + names2keep <- c(names2keep, added_names) - } else { - out[, estimate_type := NA_character_] - } - ## Handle survey coverage ------------ - if (reporting_level != "all") { - keep <- out$reporting_level == reporting_level - out <- out[keep, ] - } + } + # Keep relevant variables + out <- out[, .SD, .SDcols = names2keep] - # Censor country values - if (censor) { - out <- censor_rows(out, lkup[["censored"]], type = "countries") - } + # make sure we always report the same precision in all numeric variables + doub_vars <- + names(out)[unlist(lapply(out, is.double))] |> + data.table::copy() - # Select columns - if (additional_ind) { - get_additional_indicators(out) - added_names <- attr(out, "new_indicators_names") - names2keep <- c(names2keep, added_names) + out[, (doub_vars) := lapply(.SD, round, digits = 12), + .SDcols = doub_vars] + # Order rows by country code and reporting year + data.table::setorder(out, country_code, reporting_year, reporting_level, welfare_type) } - # Keep relevant variables - out <- out[, .SD, .SDcols = names2keep] - - - # make sure we always report the same precision in all numeric variables - doub_vars <- - names(out)[unlist(lapply(out, is.double))] |> - data.table::copy() - - out[, (doub_vars) := lapply(.SD, round, digits = 12), - .SDcols = doub_vars] - - # Order rows by country code and reporting year - data.table::setorder(out, country_code, reporting_year, reporting_level, welfare_type) - + final_result <- collapse::rowbind( + result_from_cache$present_data, out + ) # return ------------- - return(out) + return(final_result) } From 8f799dda3d53113214ce5dcabb62bd045657fb31 Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Fri, 13 Dec 2024 19:20:50 +0530 Subject: [PATCH 02/49] bring all functions in pipapi --- R/duckdb_fun.R | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++ R/pip.R | 3 ++- 2 files changed, 62 insertions(+), 1 deletion(-) create mode 100644 R/duckdb_fun.R diff --git a/R/duckdb_fun.R b/R/duckdb_fun.R new file mode 100644 index 00000000..bd346b99 --- /dev/null +++ b/R/duckdb_fun.R @@ -0,0 +1,60 @@ +#' Return the rows of the table if they exist in master file +#' +#' @param country_code Country Code +#' @param year Year +#' @param poverty_line Poverty Lines +#' @param con Connection object +#' +#' @return Dataframe +#' @export +#' +return_if_exists <- function(country_code, year, poverty_line, con) { + all_args_data <- all_args(country_code, year, poverty_line) |> + duckplyr::as_duckplyr_tibble() + # This file will be read from shared drive which will be an argument of this function. + # Additionally there were will more arguments to join instead of only 3 + # In fact, it will be joined by all the arguments in `pip` call + # It is not possible to append to parquet file https://stackoverflow.com/questions/39234391/how-to-append-data-to-an-existing-parquet-file + # Writing entire data will be very costly as data keeps on growing, better is to save data in duckdb and append to it. + master_file <- DBI::dbGetQuery(con, "select * from master_file") |> + duckplyr::as_duckplyr_tibble() + + args_not_present_in_master <- duckplyr::anti_join( + all_args_data, master_file, + by = c("country_code", "reporting_year", "poverty_line") + ) + args_present_in_master <- duckplyr::inner_join( + master_file, all_args_data, + by = c("country_code", "reporting_year", "poverty_line") + ) + + return(list(present_data = args_present_in_master, absent_args = args_not_present_in_master)) +} + +#' Create a dataframe with all possible combinations of `country_code`, `reporting_year` and `poverty_line` +#' +#' @param country_code Code of countries to be expanded +#' @param reporting_year Reported year(s) +#' @param poverty_line Poverty Line(s) +#' +#' @return A dataframe +#' +all_args <- function(country_code, reporting_year, poverty_line) { + expand.grid(country_code = country_code, reporting_year = reporting_year, poverty_line = poverty_line) +} + +#' Update master file with the contents of the dataframe +#' +#' @param dat Dataframe to be appended +#' @param con DuckDB connection object +#' +#' @return number of rows updated +#' @export +#' +update_master_file <- function(dat, con) { + duckdb::duckdb_register(con, "append_data", dat, overwrite = TRUE) + DBI::dbExecute(con, "INSERT INTO master_file SELECT * FROM append_data;") + message("Master File is updated.") + + return(nrow(dat)) +} diff --git a/R/pip.R b/R/pip.R index 2378f4da..aa5ca4d9 100644 --- a/R/pip.R +++ b/R/pip.R @@ -109,7 +109,7 @@ pip <- function(country = "ALL", # 2) country = "AGO" year = 2019 pl = 1.9 should return pip call # 3) country = c("CHN", "IND"), year = 2019, 2017 should return half from master file and half from pip call con <- duckdb::dbConnect(duckdb::duckdb(), dbdir = "demo.duckdb") - result_from_cache <- pipfun::return_if_exists(country, year, povline, con) + result_from_cache <- return_if_exists(country, year, povline, con) # This initialization is necessary for rowbind at the end if all the data is present in cache out <- NULL # only run pip code if there is data that is not present in cache @@ -283,6 +283,7 @@ pip <- function(country = "ALL", # Order rows by country code and reporting year data.table::setorder(out, country_code, reporting_year, reporting_level, welfare_type) + update_master_file(out, con) } final_result <- collapse::rowbind( result_from_cache$present_data, out From 5adc8bff41400437adbb5fb499046a9733024c2a Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Fri, 13 Dec 2024 21:33:27 +0530 Subject: [PATCH 03/49] finish 3rd case --- R/rg_pip.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/rg_pip.R b/R/rg_pip.R index 1bd84225..2a19440b 100644 --- a/R/rg_pip.R +++ b/R/rg_pip.R @@ -53,7 +53,7 @@ rg_pip <- function(country, tmp_stats <- wbpip:::prod_compute_pip_stats( welfare = svy_data$df0$welfare, - povline = povline, + povline = povline[i], popshare = popshare, population = svy_data$df0$weight, requested_mean = tmp_metadata$survey_mean_ppp, @@ -64,7 +64,6 @@ rg_pip <- function(country, ppp = ppp, distribution_type = tmp_metadata$distribution_type ) - # Add stats columns to data frame for (j in seq_along(tmp_stats)) { tmp_metadata[[names(tmp_stats)[j]]] <- tmp_stats[[j]] From 8d0d5475b80cc07772653faa54813e8c2db4740e Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Mon, 16 Dec 2024 21:08:44 +0530 Subject: [PATCH 04/49] draft for case 4 --- R/pip.R | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/R/pip.R b/R/pip.R index aa5ca4d9..d959abf6 100644 --- a/R/pip.R +++ b/R/pip.R @@ -108,7 +108,24 @@ pip <- function(country = "ALL", # 1) country = "AGO" year = 2000 pl = 1.9 should return from master file # 2) country = "AGO" year = 2019 pl = 1.9 should return pip call # 3) country = c("CHN", "IND"), year = 2019, 2017 should return half from master file and half from pip call + # + # 4) country = "all" year = 2019 + # 5) country = "AGO" year = "all" + # 6) country = "all" year = "all" + #browser() con <- duckdb::dbConnect(duckdb::duckdb(), dbdir = "demo.duckdb") + + if((country == "all" || year == "all") && fill_gaps) { + + } else if((country == "all" || year == "all") && !fill_gaps) { + if(country == "all") { + country = unique(lcv$est_ctrs) + } + if(year == "all") { + year = unique(lkup$svy_lkup$reporting_year) + } + } + result_from_cache <- return_if_exists(country, year, povline, con) # This initialization is necessary for rowbind at the end if all the data is present in cache out <- NULL From 48669d74bcca1a2ef87a868caaebc7dc1230551b Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Mon, 16 Dec 2024 21:23:35 +0530 Subject: [PATCH 05/49] change all case --- R/pip.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/pip.R b/R/pip.R index d959abf6..e306d6d4 100644 --- a/R/pip.R +++ b/R/pip.R @@ -112,16 +112,16 @@ pip <- function(country = "ALL", # 4) country = "all" year = 2019 # 5) country = "AGO" year = "all" # 6) country = "all" year = "all" - #browser() + # browser() con <- duckdb::dbConnect(duckdb::duckdb(), dbdir = "demo.duckdb") - if((country == "all" || year == "all") && fill_gaps) { + if((country == "ALL" || year == "ALL") && fill_gaps) { - } else if((country == "all" || year == "all") && !fill_gaps) { - if(country == "all") { + } else if((country == "ALL" || year == "ALL") && !fill_gaps) { + if(country == "ALL") { country = unique(lcv$est_ctrs) } - if(year == "all") { + if(year == "ALL") { year = unique(lkup$svy_lkup$reporting_year) } } From 6efb333f152074987b68d9726dcf1c64f105c508 Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Mon, 16 Dec 2024 21:43:06 +0530 Subject: [PATCH 06/49] fix case 4 --- R/pip.R | 242 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 120 insertions(+), 122 deletions(-) diff --git a/R/pip.R b/R/pip.R index e306d6d4..003374f6 100644 --- a/R/pip.R +++ b/R/pip.R @@ -115,9 +115,9 @@ pip <- function(country = "ALL", # browser() con <- duckdb::dbConnect(duckdb::duckdb(), dbdir = "demo.duckdb") - if((country == "ALL" || year == "ALL") && fill_gaps) { + if(("ALL" %in% country || "ALL" %in% year) && fill_gaps) { - } else if((country == "ALL" || year == "ALL") && !fill_gaps) { + } else if(("ALL" %in% country || "ALL" %in% year) && !fill_gaps) { if(country == "ALL") { country = unique(lcv$est_ctrs) } @@ -160,147 +160,145 @@ pip <- function(country = "ALL", ) } - # Eary return for empty table--------------- - if (nrow(out) == 0) { - return(out) - } - - # aggregate distributions ------------------ - if (reporting_level %in% c("national", "all")) { - out <- add_agg_stats( - df = out, - return_cols = lkup$return_cols$ag_average_poverty_stats - ) - if (reporting_level == "national") { - out <- out[reporting_level == "national"] + # Early return for empty table--------------- + if (nrow(out) > 0) { + # aggregate distributions ------------------ + if (reporting_level %in% c("national", "all")) { + out <- add_agg_stats( + df = out, + return_cols = lkup$return_cols$ag_average_poverty_stats + ) + if (reporting_level == "national") { + out <- out[reporting_level == "national"] + } } - } - # Add extra variables -------------- - - # ## Add SPL and SPR --------------- - # out <- add_spl(df = out, - # fill_gaps = fill_gaps, - # data_dir = lkup$data_root) - # - # ## Add prosperity Gap ----------- - # - # out <- add_pg(df = out, - # fill_gaps = fill_gaps, - # data_dir = lkup$data_root) - # - # ## add distribution type ------------- - # # based on info in framework data, rather than welfare data - # add_distribution_type(df = out, - # lkup = lkup, - # fill_gaps = fill_gaps) - - add_vars_out_of_pipeline(out, fill_gaps = fill_gaps, lkup = lkup) - - - - - # **** TO BE REMOVED **** REMOVAL STARTS HERE - # Once `pip-grp` has been integrated in ingestion pipeline - # Handles grouped aggregations - if (group_by != "none") { - # Handle potential (insignificant) difference in poverty_line values that - # may mess-up the grouping - out$poverty_line <- povline - - out <- pip_aggregate_by( - df = out, - group_lkup = lkup[["pop_region"]], - return_cols = lkup$return_cols$pip_grp - ) - # Censor regional values - if (censor) { - out <- censor_rows(out, lkup[["censored"]], type = "regions") + # Add extra variables -------------- + + # ## Add SPL and SPR --------------- + # out <- add_spl(df = out, + # fill_gaps = fill_gaps, + # data_dir = lkup$data_root) + # + # ## Add prosperity Gap ----------- + # + # out <- add_pg(df = out, + # fill_gaps = fill_gaps, + # data_dir = lkup$data_root) + # + # ## add distribution type ------------- + # # based on info in framework data, rather than welfare data + # add_distribution_type(df = out, + # lkup = lkup, + # fill_gaps = fill_gaps) + + add_vars_out_of_pipeline(out, fill_gaps = fill_gaps, lkup = lkup) + + + + + # **** TO BE REMOVED **** REMOVAL STARTS HERE + # Once `pip-grp` has been integrated in ingestion pipeline + # Handles grouped aggregations + if (group_by != "none") { + # Handle potential (insignificant) difference in poverty_line values that + # may mess-up the grouping + out$poverty_line <- povline + + out <- pip_aggregate_by( + df = out, + group_lkup = lkup[["pop_region"]], + return_cols = lkup$return_cols$pip_grp + ) + # Censor regional values + if (censor) { + out <- censor_rows(out, lkup[["censored"]], type = "regions") + } + + out <- out[, c("region_name", + "region_code", + "reporting_year", + "reporting_pop", + "poverty_line", + "headcount", + "poverty_gap", + "poverty_severity", + "watts", + "mean", + "pop_in_poverty")] + + return(out) } - - out <- out[, c("region_name", - "region_code", - "reporting_year", - "reporting_pop", - "poverty_line", - "headcount", - "poverty_gap", - "poverty_severity", - "watts", - "mean", - "pop_in_poverty")] - - return(out) - } - # **** TO BE REMOVED **** REMOVAL ENDS HERE + # **** TO BE REMOVED **** REMOVAL ENDS HERE - # pre-computed distributional stats --------------- - crr_names <- names(out) # current variables - names2keep <- lkup$return_cols$pip$cols # all variables + # pre-computed distributional stats --------------- + crr_names <- names(out) # current variables + names2keep <- lkup$return_cols$pip$cols # all variables - out <- add_dist_stats( - df = out, - dist_stats = lkup[["dist_stats"]] - ) + out <- add_dist_stats( + df = out, + dist_stats = lkup[["dist_stats"]] + ) - # Add aggregate medians ---------------- - out <- add_agg_medians( - df = out, - fill_gaps = fill_gaps, - data_dir = lkup$data_root - ) + # Add aggregate medians ---------------- + out <- add_agg_medians( + df = out, + fill_gaps = fill_gaps, + data_dir = lkup$data_root + ) - # format ---------------- + # format ---------------- - if (fill_gaps) { + if (fill_gaps) { - ## Inequality indicators to NA for lineup years ---- - dist_vars <- names2keep[!(names2keep %in% crr_names)] - out[, - (dist_vars) := NA_real_] + ## Inequality indicators to NA for lineup years ---- + dist_vars <- names2keep[!(names2keep %in% crr_names)] + out[, + (dist_vars) := NA_real_] - ## estimate_var ----- - out <- estimate_type_ctr_lnp(out, lkup) + ## estimate_var ----- + out <- estimate_type_ctr_lnp(out, lkup) - } else { - out[, estimate_type := NA_character_] - } - ## Handle survey coverage ------------ - if (reporting_level != "all") { - keep <- out$reporting_level == reporting_level - out <- out[keep, ] - } + } else { + out[, estimate_type := NA_character_] + } + ## Handle survey coverage ------------ + if (reporting_level != "all") { + keep <- out$reporting_level == reporting_level + out <- out[keep, ] + } - # Censor country values - if (censor) { - out <- censor_rows(out, lkup[["censored"]], type = "countries") - } + # Censor country values + if (censor) { + out <- censor_rows(out, lkup[["censored"]], type = "countries") + } - # Select columns - if (additional_ind) { - get_additional_indicators(out) - added_names <- attr(out, "new_indicators_names") - names2keep <- c(names2keep, added_names) + # Select columns + if (additional_ind) { + get_additional_indicators(out) + added_names <- attr(out, "new_indicators_names") + names2keep <- c(names2keep, added_names) - } - # Keep relevant variables - out <- out[, .SD, .SDcols = names2keep] + } + # Keep relevant variables + out <- out[, .SD, .SDcols = names2keep] - # make sure we always report the same precision in all numeric variables - doub_vars <- - names(out)[unlist(lapply(out, is.double))] |> - data.table::copy() + # make sure we always report the same precision in all numeric variables + doub_vars <- + names(out)[unlist(lapply(out, is.double))] |> + data.table::copy() - out[, (doub_vars) := lapply(.SD, round, digits = 12), - .SDcols = doub_vars] + out[, (doub_vars) := lapply(.SD, round, digits = 12), + .SDcols = doub_vars] - # Order rows by country code and reporting year - data.table::setorder(out, country_code, reporting_year, reporting_level, welfare_type) - update_master_file(out, con) + # Order rows by country code and reporting year + data.table::setorder(out, country_code, reporting_year, reporting_level, welfare_type) + update_master_file(out, con) + } } final_result <- collapse::rowbind( result_from_cache$present_data, out From af7bad12d4fc79c147a245edf1a7c31bd5957194 Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Sun, 22 Dec 2024 18:02:21 +0530 Subject: [PATCH 07/49] draft push --- R/fg_pip.R | 9 +++------ R/pip.R | 13 +++++++++---- R/utils.R | 2 +- pipapi.Rproj | 1 + 4 files changed, 14 insertions(+), 11 deletions(-) diff --git a/R/fg_pip.R b/R/fg_pip.R index 979ecadc..1cd60d50 100644 --- a/R/fg_pip.R +++ b/R/fg_pip.R @@ -19,7 +19,6 @@ fg_pip <- function(country, data_dir <- lkup$data_root ref_lkup <- lkup$ref_lkup - # Handle interpolation metadata <- subset_lkup( country = country, @@ -54,7 +53,7 @@ fg_pip <- function(country, # Extract country-years for which stats will be computed from the same files # tmp_metadata <- interpolation_list[[unique_survey_files[svy_id]]]$tmp_metadata iteration <- interpolation_list[[unique_survey_files[svy_id]]] - + if(svy_id == 1428) browser() svy_data <- get_svy_data(svy_id = iteration$cache_ids, reporting_level = iteration$reporting_level, path = iteration$paths) @@ -69,7 +68,7 @@ fg_pip <- function(country, results_subset <- vector(mode = "list", length = nrow(ctry_years)) for (ctry_year_id in seq_along(ctry_years$interpolation_id)) { - + print(ctry_year_id) # Extract records to be used for a single country-year estimation interp_id <- ctry_years[["interpolation_id"]][ctry_year_id] tmp_metadata <- metadata[metadata$interpolation_id == interp_id, ] @@ -86,7 +85,7 @@ fg_pip <- function(country, default_ppp = tmp_metadata[["ppp"]], ppp = ppp, distribution_type = tmp_metadata[["distribution_type"]], - poverty_line = povline, + poverty_line = povline[svy_id], popshare = popshare ) @@ -96,7 +95,6 @@ fg_pip <- function(country, } # # tmp_metadata <- unique(tmp_metadata) - # Add stats columns to data frame for (stat in seq_along(tmp_stats)) { tmp_metadata[[names(tmp_stats)[stat]]] <- tmp_stats[[stat]] @@ -108,7 +106,6 @@ fg_pip <- function(country, out[[svy_id]] <- results_subset } - out <- unlist(out, recursive = FALSE) out <- data.table::rbindlist(out) diff --git a/R/pip.R b/R/pip.R index 003374f6..c75e77fd 100644 --- a/R/pip.R +++ b/R/pip.R @@ -69,7 +69,6 @@ pip <- function(country = "ALL", # set up ------------- - welfare_type <- match.arg(welfare_type) reporting_level <- match.arg(reporting_level) group_by <- match.arg(group_by) @@ -116,12 +115,18 @@ pip <- function(country = "ALL", con <- duckdb::dbConnect(duckdb::duckdb(), dbdir = "demo.duckdb") if(("ALL" %in% country || "ALL" %in% year) && fill_gaps) { + if("ALL" %in% country) { + country = lkup$aux_files$countries$country_code + } + if("ALL" %in% year) { + year = lkup$valid_years$valid_interpolated_years + } } else if(("ALL" %in% country || "ALL" %in% year) && !fill_gaps) { - if(country == "ALL") { + if("ALL" %in% country) { country = unique(lcv$est_ctrs) } - if(year == "ALL") { + if("ALL" %in% year) { year = unique(lkup$svy_lkup$reporting_year) } } @@ -139,7 +144,7 @@ pip <- function(country = "ALL", out <- fg_pip( country = result_from_cache$absent_args$country_code, year = result_from_cache$absent_args$reporting_year, - povline = result_from_cache$absent_args$povline, + povline = result_from_cache$absent_args$poverty_line, popshare = popshare, welfare_type = welfare_type, reporting_level = reporting_level, diff --git a/R/utils.R b/R/utils.R index 59f98453..7ba63622 100644 --- a/R/utils.R +++ b/R/utils.R @@ -713,7 +713,7 @@ subset_ctry_years <- function(country, } else { keep_regions <- rep(FALSE, length(lkup$region_code)) } - keep_countries <- lkup$country_code %chin% country + keep_countries <- lkup$country_code %chin% as.character(country) keep <- keep & (keep_countries | keep_regions) } diff --git a/pipapi.Rproj b/pipapi.Rproj index 4e3ca1bc..92f8cfee 100644 --- a/pipapi.Rproj +++ b/pipapi.Rproj @@ -1,4 +1,5 @@ Version: 1.0 +ProjectId: bd9bbc88-fff8-48c9-bbe3-6ff0f016ecca RestoreWorkspace: No SaveWorkspace: No From dbbfa79d8745bfbbbe00264ef032b9310edb5082 Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Mon, 30 Dec 2024 20:31:24 +0530 Subject: [PATCH 08/49] making sure everything works except country and year all --- R/duckdb_fun.R | 3 +++ R/fg_pip.R | 2 -- R/pip.R | 2 +- vignettes/duckdb-caching.Rmd | 29 +++++++++++++++++++++++++++++ 4 files changed, 33 insertions(+), 3 deletions(-) create mode 100644 vignettes/duckdb-caching.Rmd diff --git a/R/duckdb_fun.R b/R/duckdb_fun.R index bd346b99..779d16ad 100644 --- a/R/duckdb_fun.R +++ b/R/duckdb_fun.R @@ -28,6 +28,9 @@ return_if_exists <- function(country_code, year, poverty_line, con) { by = c("country_code", "reporting_year", "poverty_line") ) + if(nrow(args_present_in_master)) { + message("Returning data from cache.") + } return(list(present_data = args_present_in_master, absent_args = args_not_present_in_master)) } diff --git a/R/fg_pip.R b/R/fg_pip.R index 1cd60d50..ff3c5fd7 100644 --- a/R/fg_pip.R +++ b/R/fg_pip.R @@ -53,7 +53,6 @@ fg_pip <- function(country, # Extract country-years for which stats will be computed from the same files # tmp_metadata <- interpolation_list[[unique_survey_files[svy_id]]]$tmp_metadata iteration <- interpolation_list[[unique_survey_files[svy_id]]] - if(svy_id == 1428) browser() svy_data <- get_svy_data(svy_id = iteration$cache_ids, reporting_level = iteration$reporting_level, path = iteration$paths) @@ -68,7 +67,6 @@ fg_pip <- function(country, results_subset <- vector(mode = "list", length = nrow(ctry_years)) for (ctry_year_id in seq_along(ctry_years$interpolation_id)) { - print(ctry_year_id) # Extract records to be used for a single country-year estimation interp_id <- ctry_years[["interpolation_id"]][ctry_year_id] tmp_metadata <- metadata[metadata$interpolation_id == interp_id, ] diff --git a/R/pip.R b/R/pip.R index c75e77fd..6f61a33e 100644 --- a/R/pip.R +++ b/R/pip.R @@ -127,7 +127,7 @@ pip <- function(country = "ALL", country = unique(lcv$est_ctrs) } if("ALL" %in% year) { - year = unique(lkup$svy_lkup$reporting_year) + year = unique(lkup$svy_lkup$reporting_year[lkup$svy_lkup$country_code %in% country]) } } diff --git a/vignettes/duckdb-caching.Rmd b/vignettes/duckdb-caching.Rmd new file mode 100644 index 00000000..320ddf2c --- /dev/null +++ b/vignettes/duckdb-caching.Rmd @@ -0,0 +1,29 @@ +--- +title: "duckdb-caching" +output: html_document +date: "2024-12-26" +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE) +``` + +## R Markdown + +This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see . + +When you click the **Knit** button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this: + +```{r cars} +summary(cars) +``` + +## Including Plots + +You can also embed plots, for example: + +```{r pressure, echo=FALSE} +plot(pressure) +``` + +Note that the `echo = FALSE` parameter was added to the code chunk to prevent printing of the R code that generated the plot. From fa96de3e50988eb8181da4b9fcf36a508541b0cd Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Wed, 1 Jan 2025 19:12:15 +0530 Subject: [PATCH 09/49] new version --- R/duckdb_fun.R | 32 ++++++++++++++++++++------------ R/fg_pip.R | 6 ++++-- R/pip.R | 45 ++++++++++++++------------------------------- R/rg_pip.R | 10 ++++++---- R/utils.R | 11 +++++++---- 5 files changed, 51 insertions(+), 53 deletions(-) diff --git a/R/duckdb_fun.R b/R/duckdb_fun.R index 779d16ad..884868d7 100644 --- a/R/duckdb_fun.R +++ b/R/duckdb_fun.R @@ -8,9 +8,9 @@ #' @return Dataframe #' @export #' -return_if_exists <- function(country_code, year, poverty_line, con) { - all_args_data <- all_args(country_code, year, poverty_line) |> - duckplyr::as_duckplyr_tibble() +return_if_exists <- function(lkup, con) { + # all_args_data <- all_args(country_code, year, poverty_line) |> + # duckplyr::as_duckplyr_tibble() # This file will be read from shared drive which will be an argument of this function. # Additionally there were will more arguments to join instead of only 3 # In fact, it will be joined by all the arguments in `pip` call @@ -19,19 +19,27 @@ return_if_exists <- function(country_code, year, poverty_line, con) { master_file <- DBI::dbGetQuery(con, "select * from master_file") |> duckplyr::as_duckplyr_tibble() - args_not_present_in_master <- duckplyr::anti_join( - all_args_data, master_file, - by = c("country_code", "reporting_year", "poverty_line") - ) - args_present_in_master <- duckplyr::inner_join( - master_file, all_args_data, - by = c("country_code", "reporting_year", "poverty_line") + # args_not_present_in_master <- duckplyr::anti_join( + # lkup, master_file, + # by = c("country_code", "reporting_year") + # ) + #browser() + data_present_in_master <- duckplyr::inner_join( + master_file, lkup |> collapse::fselect(country_code, reporting_year), + by = c("country_code", "reporting_year") ) - if(nrow(args_present_in_master)) { + keep <- TRUE + if(nrow(data_present_in_master) > 0) { + keep <- !paste(lkup$country_code, lkup$reporting_year) %in% + paste(data_present_in_master$country_code, data_present_in_master$reporting_year) + + lkup <- lkup[keep, ] + message("Returning data from cache.") } - return(list(present_data = args_present_in_master, absent_args = args_not_present_in_master)) + # nrow(data_present_in_master) should be equal to sum(keep) + return(list(data_present_in_master = data_present_in_master, lkup = lkup)) } #' Create a dataframe with all possible combinations of `country_code`, `reporting_year` and `poverty_line` diff --git a/R/fg_pip.R b/R/fg_pip.R index ff3c5fd7..181f8cef 100644 --- a/R/fg_pip.R +++ b/R/fg_pip.R @@ -29,6 +29,8 @@ fg_pip <- function(country, valid_regions = valid_regions, data_dir = data_dir ) + data_present_in_master <- metadata$data_present_in_master + metadata <- metadata$lkup # Remove aggregate distribution if popshare is specified # TEMPORARY FIX UNTIL popshare is supported for aggregate distributions metadata <- filter_lkup(metadata = metadata, @@ -37,7 +39,7 @@ fg_pip <- function(country, # Return empty dataframe if no metadata is found if (nrow(metadata) == 0) { - return(pipapi::empty_response) + return(list(main_data = empty_response, data_in_cache = data_present_in_master)) } unique_survey_files <- unique(metadata$data_interpolation_id) @@ -119,7 +121,7 @@ fg_pip <- function(country, poverty_line := round(poverty_line, digits = 3) ] - return(out) + return(list(main_data = out, data_in_cache = data_present_in_master)) } #' Remove duplicated rows created during the interpolation process diff --git a/R/pip.R b/R/pip.R index 6f61a33e..69811e24 100644 --- a/R/pip.R +++ b/R/pip.R @@ -112,39 +112,20 @@ pip <- function(country = "ALL", # 5) country = "AGO" year = "all" # 6) country = "all" year = "all" # browser() - con <- duckdb::dbConnect(duckdb::duckdb(), dbdir = "demo.duckdb") - if(("ALL" %in% country || "ALL" %in% year) && fill_gaps) { - if("ALL" %in% country) { - country = lkup$aux_files$countries$country_code - } - if("ALL" %in% year) { - year = lkup$valid_years$valid_interpolated_years - } - - } else if(("ALL" %in% country || "ALL" %in% year) && !fill_gaps) { - if("ALL" %in% country) { - country = unique(lcv$est_ctrs) - } - if("ALL" %in% year) { - year = unique(lkup$svy_lkup$reporting_year[lkup$svy_lkup$country_code %in% country]) - } - } - - result_from_cache <- return_if_exists(country, year, povline, con) # This initialization is necessary for rowbind at the end if all the data is present in cache - out <- NULL + #out <- NULL # only run pip code if there is data that is not present in cache - if(nrow(result_from_cache$absent_args) > 0) { + #if(nrow(result_from_cache$absent_args) > 0) { # use result_from_cache$absent_args$country_code reporting_year and poverty_line and pass it further. # mains estimates --------------- if (fill_gaps) { ## lineup years----------------- out <- fg_pip( - country = result_from_cache$absent_args$country_code, - year = result_from_cache$absent_args$reporting_year, - povline = result_from_cache$absent_args$poverty_line, + country = lcv$est_ctrs, + year = year, + povline = povline, popshare = popshare, welfare_type = welfare_type, reporting_level = reporting_level, @@ -154,9 +135,9 @@ pip <- function(country = "ALL", } else { ## survey years ------------------ out <- rg_pip( - country = result_from_cache$absent_args$country_code, - year = result_from_cache$absent_args$reporting_year, - povline = result_from_cache$absent_args$poverty_line, + country = lcv$est_ctrs, + year = year, + povline = povline, popshare = popshare, welfare_type = welfare_type, reporting_level = reporting_level, @@ -165,6 +146,8 @@ pip <- function(country = "ALL", ) } + cached_data <- out$data_in_cache + out <- out$main_data # Early return for empty table--------------- if (nrow(out) > 0) { # aggregate distributions ------------------ @@ -304,10 +287,10 @@ pip <- function(country = "ALL", data.table::setorder(out, country_code, reporting_year, reporting_level, welfare_type) update_master_file(out, con) } - } - final_result <- collapse::rowbind( - result_from_cache$present_data, out + #} + out <- collapse::rowbind( + cached_data, out ) # return ------------- - return(final_result) + return(out) } diff --git a/R/rg_pip.R b/R/rg_pip.R index 2a19440b..cca85f50 100644 --- a/R/rg_pip.R +++ b/R/rg_pip.R @@ -13,13 +13,12 @@ rg_pip <- function(country, reporting_level, ppp, lkup) { - # get values from lkup valid_regions <- lkup$query_controls$region$values svy_lkup <- lkup$svy_lkup data_dir <- lkup$data_root - + #browser() metadata <- subset_lkup( country = country, year = year, @@ -29,6 +28,8 @@ rg_pip <- function(country, valid_regions = valid_regions, data_dir = data_dir ) + data_present_in_master <- metadata$data_present_in_master + metadata <- metadata$lkup # Remove aggregate distribution if popshare is specified # TEMPORARY FIX UNTIL popshare is supported for aggregate distributions @@ -37,7 +38,7 @@ rg_pip <- function(country, # return empty dataframe if no metadata is found if (nrow(metadata) == 0) { - return(empty_response) + return(list(main_data = empty_response, data_in_cache = data_present_in_master)) } out <- vector(mode = "list", length = nrow(metadata)) @@ -71,7 +72,8 @@ rg_pip <- function(country, out[[i]] <- tmp_metadata } + #browser() out <- data.table::rbindlist(out) - return(out) + return(list(main_data = out, data_in_cache = data_present_in_master)) } diff --git a/R/utils.R b/R/utils.R index 7ba63622..6f238a77 100644 --- a/R/utils.R +++ b/R/utils.R @@ -11,7 +11,8 @@ subset_lkup <- function(country, reporting_level, lkup, valid_regions, - data_dir = NULL) { + data_dir = NULL + ) { # STEP 1 - Keep every row by default keep <- rep(TRUE, nrow(lkup)) @@ -37,9 +38,13 @@ subset_lkup <- function(country, keep = keep, reporting_level = reporting_level[1]) + lkup <- lkup[keep, ] + #browser() + con <- duckdb::dbConnect(duckdb::duckdb(), dbdir = "demo.duckdb") + cached_data <- return_if_exists(lkup,con) - return(lkup) + return(list(lkup = cached_data$lkup, data_present_in_master = cached_data$data_present_in_master)) } #' select_country @@ -90,8 +95,6 @@ select_years <- function(lkup, grepl("pip_grp", caller_names) |> any() - - dtmp <- lkup year <- toupper(year) From cd752f3089b990dde6f29a749022d74d897e447f Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Thu, 2 Jan 2025 19:27:35 +0530 Subject: [PATCH 10/49] fix for new implementation --- R/duckdb_fun.R | 1 - R/fg_pip.R | 2 +- R/pip.R | 2 ++ R/rg_pip.R | 3 +-- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/duckdb_fun.R b/R/duckdb_fun.R index 884868d7..425e2256 100644 --- a/R/duckdb_fun.R +++ b/R/duckdb_fun.R @@ -23,7 +23,6 @@ return_if_exists <- function(lkup, con) { # lkup, master_file, # by = c("country_code", "reporting_year") # ) - #browser() data_present_in_master <- duckplyr::inner_join( master_file, lkup |> collapse::fselect(country_code, reporting_year), by = c("country_code", "reporting_year") diff --git a/R/fg_pip.R b/R/fg_pip.R index 181f8cef..9c594de7 100644 --- a/R/fg_pip.R +++ b/R/fg_pip.R @@ -85,7 +85,7 @@ fg_pip <- function(country, default_ppp = tmp_metadata[["ppp"]], ppp = ppp, distribution_type = tmp_metadata[["distribution_type"]], - poverty_line = povline[svy_id], + poverty_line = povline, popshare = popshare ) diff --git a/R/pip.R b/R/pip.R index 69811e24..59f6b45a 100644 --- a/R/pip.R +++ b/R/pip.R @@ -291,6 +291,8 @@ pip <- function(country = "ALL", out <- collapse::rowbind( cached_data, out ) + # Make sure no duplicate remains + out <- out |> collapse::funique() # return ------------- return(out) } diff --git a/R/rg_pip.R b/R/rg_pip.R index cca85f50..ef79a2c7 100644 --- a/R/rg_pip.R +++ b/R/rg_pip.R @@ -18,7 +18,6 @@ rg_pip <- function(country, svy_lkup <- lkup$svy_lkup data_dir <- lkup$data_root - #browser() metadata <- subset_lkup( country = country, year = year, @@ -54,7 +53,7 @@ rg_pip <- function(country, tmp_stats <- wbpip:::prod_compute_pip_stats( welfare = svy_data$df0$welfare, - povline = povline[i], + povline = povline, popshare = popshare, population = svy_data$df0$weight, requested_mean = tmp_metadata$survey_mean_ppp, From 3da0aff832c86c0a2f85020cfcff023cdd4f3339 Mon Sep 17 00:00:00 2001 From: Ronak Sunil Shah Date: Thu, 2 Jan 2025 12:52:01 -0500 Subject: [PATCH 11/49] change for fill_gaps --- R/fg_pip.R | 7 ++++--- R/pip.R | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/R/fg_pip.R b/R/fg_pip.R index 9c594de7..be0e9cc5 100644 --- a/R/fg_pip.R +++ b/R/fg_pip.R @@ -29,6 +29,7 @@ fg_pip <- function(country, valid_regions = valid_regions, data_dir = data_dir ) + data_present_in_master <- metadata$data_present_in_master metadata <- metadata$lkup # Remove aggregate distribution if popshare is specified @@ -66,6 +67,9 @@ fg_pip <- function(country, valid_regions = valid_regions, data_dir = data_dir) + ctry_years <- collapse::join(ctry_years, metadata |> collapse::fselect(names(ctry_years)), + verbose = 0,how = "inner") + results_subset <- vector(mode = "list", length = nrow(ctry_years)) for (ctry_year_id in seq_along(ctry_years$interpolation_id)) { @@ -99,11 +103,8 @@ fg_pip <- function(country, for (stat in seq_along(tmp_stats)) { tmp_metadata[[names(tmp_stats)[stat]]] <- tmp_stats[[stat]] } - - results_subset[[ctry_year_id]] <- tmp_metadata } - out[[svy_id]] <- results_subset } out <- unlist(out, recursive = FALSE) diff --git a/R/pip.R b/R/pip.R index 59f6b45a..f443a431 100644 --- a/R/pip.R +++ b/R/pip.R @@ -145,7 +145,6 @@ pip <- function(country = "ALL", lkup = lkup ) } - cached_data <- out$data_in_cache out <- out$main_data # Early return for empty table--------------- @@ -285,6 +284,7 @@ pip <- function(country = "ALL", # Order rows by country code and reporting year data.table::setorder(out, country_code, reporting_year, reporting_level, welfare_type) + con <- duckdb::dbConnect(duckdb::duckdb(), dbdir = "demo.duckdb") update_master_file(out, con) } #} From 55ccf31c1119e262f1538fdcf1cee3e4f8d5d398 Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Mon, 6 Jan 2025 22:20:14 +0530 Subject: [PATCH 12/49] use more keys for joining --- NAMESPACE | 2 ++ R/duckdb_fun.R | 27 +++++++-------------------- R/fg_pip.R | 5 ++++- R/rg_pip.R | 4 +++- R/utils.R | 7 +++++-- man/return_if_exists.Rd | 23 +++++++++++++++++++++++ man/subset_lkup.Rd | 5 ++++- man/update_master_file.Rd | 19 +++++++++++++++++++ 8 files changed, 67 insertions(+), 25 deletions(-) create mode 100644 man/return_if_exists.Rd create mode 100644 man/update_master_file.Rd diff --git a/NAMESPACE b/NAMESPACE index 77762c0e..976cf64f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -27,6 +27,7 @@ export(pip_grp) export(pip_grp_logic) export(pipgd_lorenz_curve) export(return_correct_version) +export(return_if_exists) export(select_off_alt_agg) export(select_reporting_level) export(select_user_aggs) @@ -39,6 +40,7 @@ export(ui_hp_stacked) export(ui_pc_charts) export(ui_pc_regional) export(ui_svy_meta) +export(update_master_file) export(valid_years) export(validate_input_grouped_stats) export(version_dataframe) diff --git a/R/duckdb_fun.R b/R/duckdb_fun.R index 425e2256..702813f4 100644 --- a/R/duckdb_fun.R +++ b/R/duckdb_fun.R @@ -1,14 +1,13 @@ #' Return the rows of the table if they exist in master file #' #' @param country_code Country Code -#' @param year Year -#' @param poverty_line Poverty Lines +#' @inheritParams subset_lkup #' @param con Connection object #' #' @return Dataframe #' @export #' -return_if_exists <- function(lkup, con) { +return_if_exists <- function(lkup, is_interpolated, povline, con) { # all_args_data <- all_args(country_code, year, poverty_line) |> # duckplyr::as_duckplyr_tibble() # This file will be read from shared drive which will be an argument of this function. @@ -24,14 +23,14 @@ return_if_exists <- function(lkup, con) { # by = c("country_code", "reporting_year") # ) data_present_in_master <- duckplyr::inner_join( - master_file, lkup |> collapse::fselect(country_code, reporting_year), - by = c("country_code", "reporting_year") - ) + master_file, lkup |> collapse::fselect(country_code, reporting_year, is_interpolated), + by = c("country_code", "reporting_year", "is_interpolated") + ) |> duckplyr::filter(poverty_line == povline) keep <- TRUE if(nrow(data_present_in_master) > 0) { - keep <- !paste(lkup$country_code, lkup$reporting_year) %in% - paste(data_present_in_master$country_code, data_present_in_master$reporting_year) + keep <- !with(lkup, paste(country_code, reporting_year, is_interpolated)) %in% + with(data_present_in_master, paste(country_code, reporting_year, is_interpolated)) lkup <- lkup[keep, ] @@ -41,18 +40,6 @@ return_if_exists <- function(lkup, con) { return(list(data_present_in_master = data_present_in_master, lkup = lkup)) } -#' Create a dataframe with all possible combinations of `country_code`, `reporting_year` and `poverty_line` -#' -#' @param country_code Code of countries to be expanded -#' @param reporting_year Reported year(s) -#' @param poverty_line Poverty Line(s) -#' -#' @return A dataframe -#' -all_args <- function(country_code, reporting_year, poverty_line) { - expand.grid(country_code = country_code, reporting_year = reporting_year, poverty_line = poverty_line) -} - #' Update master file with the contents of the dataframe #' #' @param dat Dataframe to be appended diff --git a/R/fg_pip.R b/R/fg_pip.R index be0e9cc5..34b79340 100644 --- a/R/fg_pip.R +++ b/R/fg_pip.R @@ -27,7 +27,9 @@ fg_pip <- function(country, reporting_level = reporting_level, lkup = ref_lkup, valid_regions = valid_regions, - data_dir = data_dir + data_dir = data_dir, + is_interpolated = TRUE, + povline = povline ) data_present_in_master <- metadata$data_present_in_master @@ -67,6 +69,7 @@ fg_pip <- function(country, valid_regions = valid_regions, data_dir = data_dir) + # Join because some data might be coming from cache so it might be absent in metadata ctry_years <- collapse::join(ctry_years, metadata |> collapse::fselect(names(ctry_years)), verbose = 0,how = "inner") diff --git a/R/rg_pip.R b/R/rg_pip.R index ef79a2c7..dd1d9ba3 100644 --- a/R/rg_pip.R +++ b/R/rg_pip.R @@ -25,7 +25,9 @@ rg_pip <- function(country, reporting_level = reporting_level, lkup = svy_lkup, valid_regions = valid_regions, - data_dir = data_dir + data_dir = data_dir, + is_interpolated = FALSE, + povline = povline ) data_present_in_master <- metadata$data_present_in_master metadata <- metadata$lkup diff --git a/R/utils.R b/R/utils.R index 6f238a77..772ab629 100644 --- a/R/utils.R +++ b/R/utils.R @@ -3,6 +3,7 @@ #' @param valid_regions character: List of valid region codes that can be used #' for region selection #' @param data_dir character: directory path from lkup$data_root +#' @param is_interpolated logical : If `TRUE`, the call is from `fg_pip()`, for `FALSE` `rg_pip()` #' @return data.frame #' @keywords internal subset_lkup <- function(country, @@ -11,7 +12,9 @@ subset_lkup <- function(country, reporting_level, lkup, valid_regions, - data_dir = NULL + data_dir = NULL, + is_interpolated, + povline ) { # STEP 1 - Keep every row by default @@ -42,7 +45,7 @@ subset_lkup <- function(country, lkup <- lkup[keep, ] #browser() con <- duckdb::dbConnect(duckdb::duckdb(), dbdir = "demo.duckdb") - cached_data <- return_if_exists(lkup,con) + cached_data <- return_if_exists(lkup,is_interpolated, povline, con) return(list(lkup = cached_data$lkup, data_present_in_master = cached_data$data_present_in_master)) } diff --git a/man/return_if_exists.Rd b/man/return_if_exists.Rd new file mode 100644 index 00000000..d94283e6 --- /dev/null +++ b/man/return_if_exists.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/duckdb_fun.R +\name{return_if_exists} +\alias{return_if_exists} +\title{Return the rows of the table if they exist in master file} +\usage{ +return_if_exists(lkup, is_interpolated, con) +} +\arguments{ +\item{lkup}{list: A list of lkup tables} + +\item{is_interpolated}{logical : If \code{TRUE}, the call is from \code{fg_pip()}, for \code{FALSE} \code{rg_pip()}} + +\item{con}{Connection object} + +\item{country_code}{Country Code} +} +\value{ +Dataframe +} +\description{ +Return the rows of the table if they exist in master file +} diff --git a/man/subset_lkup.Rd b/man/subset_lkup.Rd index 9278bf4b..e39880be 100644 --- a/man/subset_lkup.Rd +++ b/man/subset_lkup.Rd @@ -11,7 +11,8 @@ subset_lkup( reporting_level, lkup, valid_regions, - data_dir = NULL + data_dir = NULL, + is_interpolated ) } \arguments{ @@ -29,6 +30,8 @@ subset_lkup( for region selection} \item{data_dir}{character: directory path from lkup$data_root} + +\item{is_interpolated}{logical : If \code{TRUE}, the call is from \code{fg_pip()}, for \code{FALSE} \code{rg_pip()}} } \value{ data.frame diff --git a/man/update_master_file.Rd b/man/update_master_file.Rd new file mode 100644 index 00000000..a281d50a --- /dev/null +++ b/man/update_master_file.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/duckdb_fun.R +\name{update_master_file} +\alias{update_master_file} +\title{Update master file with the contents of the dataframe} +\usage{ +update_master_file(dat, con) +} +\arguments{ +\item{dat}{Dataframe to be appended} + +\item{con}{DuckDB connection object} +} +\value{ +number of rows updated +} +\description{ +Update master file with the contents of the dataframe +} From 4d409522b95c54ac6bb4d7030425dfdc677266e8 Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Thu, 9 Jan 2025 19:53:21 +0530 Subject: [PATCH 13/49] final touches --- R/duckdb_fun.R | 2 +- R/fg_pip.R | 1 - R/pip.R | 2 +- R/rg_pip.R | 1 - R/utils.R | 6 +-- R/zzz.R | 2 +- vignettes/duckdb-caching.Rmd | 91 +++++++++++++++++++++++++++++++----- 7 files changed, 85 insertions(+), 20 deletions(-) diff --git a/R/duckdb_fun.R b/R/duckdb_fun.R index 702813f4..90bedb4e 100644 --- a/R/duckdb_fun.R +++ b/R/duckdb_fun.R @@ -7,7 +7,7 @@ #' @return Dataframe #' @export #' -return_if_exists <- function(lkup, is_interpolated, povline, con) { +return_if_exists <- function(lkup, povline, con) { # all_args_data <- all_args(country_code, year, poverty_line) |> # duckplyr::as_duckplyr_tibble() # This file will be read from shared drive which will be an argument of this function. diff --git a/R/fg_pip.R b/R/fg_pip.R index 34b79340..b735459d 100644 --- a/R/fg_pip.R +++ b/R/fg_pip.R @@ -28,7 +28,6 @@ fg_pip <- function(country, lkup = ref_lkup, valid_regions = valid_regions, data_dir = data_dir, - is_interpolated = TRUE, povline = povline ) diff --git a/R/pip.R b/R/pip.R index f443a431..7001ce78 100644 --- a/R/pip.R +++ b/R/pip.R @@ -284,7 +284,7 @@ pip <- function(country = "ALL", # Order rows by country code and reporting year data.table::setorder(out, country_code, reporting_year, reporting_level, welfare_type) - con <- duckdb::dbConnect(duckdb::duckdb(), dbdir = "demo.duckdb") + con <- duckdb::dbConnect(duckdb::duckdb(), dbdir = Sys.getenv("PIP_CACHE_FILE")) update_master_file(out, con) } #} diff --git a/R/rg_pip.R b/R/rg_pip.R index dd1d9ba3..ba815d89 100644 --- a/R/rg_pip.R +++ b/R/rg_pip.R @@ -26,7 +26,6 @@ rg_pip <- function(country, lkup = svy_lkup, valid_regions = valid_regions, data_dir = data_dir, - is_interpolated = FALSE, povline = povline ) data_present_in_master <- metadata$data_present_in_master diff --git a/R/utils.R b/R/utils.R index 772ab629..4177fb5f 100644 --- a/R/utils.R +++ b/R/utils.R @@ -3,7 +3,6 @@ #' @param valid_regions character: List of valid region codes that can be used #' for region selection #' @param data_dir character: directory path from lkup$data_root -#' @param is_interpolated logical : If `TRUE`, the call is from `fg_pip()`, for `FALSE` `rg_pip()` #' @return data.frame #' @keywords internal subset_lkup <- function(country, @@ -13,7 +12,6 @@ subset_lkup <- function(country, lkup, valid_regions, data_dir = NULL, - is_interpolated, povline ) { @@ -44,8 +42,8 @@ subset_lkup <- function(country, lkup <- lkup[keep, ] #browser() - con <- duckdb::dbConnect(duckdb::duckdb(), dbdir = "demo.duckdb") - cached_data <- return_if_exists(lkup,is_interpolated, povline, con) + con <- duckdb::dbConnect(duckdb::duckdb(), dbdir = Sys.getenv("PIP_CACHE_FILE")) + cached_data <- return_if_exists(lkup, povline, con) return(list(lkup = cached_data$lkup, data_present_in_master = cached_data$data_present_in_master)) } diff --git a/R/zzz.R b/R/zzz.R index 86ebb03c..d0d7dd8c 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -10,7 +10,7 @@ logfile = NULL, max_size = as.numeric(Sys.getenv("PIPAPI_CACHE_MAX_SIZE")), prune_rate = 50) - pip <<- memoise::memoise(pip, cache = cd, omit_args = "lkup") + #pip <<- memoise::memoise(pip, cache = cd, omit_args = "lkup") ui_hp_stacked <<- memoise::memoise(ui_hp_stacked, cache = cd, omit_args = "lkup") pip_grp_logic <<- memoise::memoise(pip_grp_logic, cache = cd, omit_args = "lkup") pip_grp <<- memoise::memoise(pip_grp, cache = cd, omit_args = "lkup") diff --git a/vignettes/duckdb-caching.Rmd b/vignettes/duckdb-caching.Rmd index 320ddf2c..331dcadd 100644 --- a/vignettes/duckdb-caching.Rmd +++ b/vignettes/duckdb-caching.Rmd @@ -2,28 +2,97 @@ title: "duckdb-caching" output: html_document date: "2024-12-26" +author: "Ronak Shah" --- ```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) +knitr::opts_chunk$set(eval = FALSE, echo = TRUE) ``` -## R Markdown +## Introduction -This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see . +Current caching mechanism for pip uses traditional caching where basically a hash is created based on the value of the arguments passed in the function and if someone calls the same function with the same arguments again the cached result is returned instead of doing the same calculation again. For `pip` we used the packages `cachem` and `memoise` to implement this system of caching. This traditional caching strategy works well in general however, `pip` is a special case and it would benefit much more if it had a custom strategy for caching. -When you click the **Knit** button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this: +## How caching currently works? -```{r cars} -summary(cars) +Consider these pip functions + +```{r, eval=FALSE} +# 1. +pip(country = "all", year = 2000, lkup = lkup) + +# 2. +pip(country = "AGO", year = 2000, lkup = lkup) ``` -## Including Plots +Now since these are separate set of arguments 2 files of caching are created and saved on the disk. Now if a call to `pip` is made again `pip(country = "AGO", year = 2000, lkup = lkup)` which is same as 2) then it would return the result from the cached file stored on the disk without doing any calculation. Needless to say, this result is much faster. + +However, notice that the 2nd call is subset of the 1st one. What I mean by that is the result of 2) is already present in result of 1). We have done the calculations for all the countries for the year 2000 in 1) we just need output of "AGO" from it to get the result for 2). + +## Custom caching for pipapi. + +What if we could take subset of an existing cache like how we need it as above. However, this is not how traditional caching systems work. We would need to implement something custom if we want to make this work. -You can also embed plots, for example: +We came up with an idea to implement this custom caching using `duckdb` in a table. Basically, all the queries that are called till now are saved in this table and whenever a new call is made it checks if the query is already called, if yes then it returns the result immediately or else it will do the calculation and then save the result to the table for next use and return the result. There are various scenarios that we need to consider and let's take help of an example to understand each one of them. -```{r pressure, echo=FALSE} -plot(pressure) +Consider that we are just starting out and there is nothing saved in the table. + +#### Scenario 1 - + +```{r} +pip(country = c("AGO", "USA"), year = 2000, lkup = lkup) ``` -Note that the `echo = FALSE` parameter was added to the code chunk to prevent printing of the R code that generated the plot. +Now since nothing is saved in the table this will go through the whole round of calculation and save the result in the table for future use and return the output. + +#### Scenario 2 - + +```{r} +pip(country = "USA", year = 2000, lkup = lkup) +``` + +Now this is something which we have already calculated in our previous call. In traditional caching this would be treated as a separate call and the calculation would have been performed again. However, in our custom caching it goes through the existing table and checks if we already have the result for this call. Since we do have it saved in our table we will just return the result in this case as it is from the table without doing any calculation. + +#### Scenario 3 - + +```{r} +pip(country = c("ARG", "USA"), year = 2000, lkup = lkup) +``` + +Notice this time it is combination of scenario 1 and 2 where one part of the calculation we already have ("USA") and another part we don't ("ARG"). In this case, we return the result for the country that we have in the table and send rest of the arguments for the calculation. We save the result from calculation in the table and return the output by combining both the result. + +#### Scenario 4 - + +```{r} +pip(country = "all", year = 2000, lkup = lkup) +``` + +In this scenario before we check in the table we need to decode this "all" argument to list of actual country names because in the table we save the data with actual country names. Once we have the list of country names we check which of those values are already available in the table. If we consider the 3 scenarios above then we already have result for `c("ARG", "AGO", "USA")` and we need to find result for the remaining countries. After saving the data for the remaining countries in the table, we return the result by combining the two. + +#### Scenario 5 - + +```{r} +pip(country = "AGO", year = "all", lkup = lkup) +``` + +This is similar to scenario 4 but instead of having `country = "all"` we have here `year = "all"` so in this case we need to decode the `year` parameter. However, the sequence of operation remains the same as above. + +#### Scenario 6 - + +```{r} +pip(country = "all", year = "all", lkup = lkup) +``` + +This is combination of scenario 4 and 5 where we need to decode both `country` and `year` parameter, check the values that are present in the table, query the data that does not exist, save it into the table, combine the result and return the output. + +These are 6 different scenarios that can occur. Note that I have not used all the arguments here in the `pip` call. We are using the default `povline` i.e 1.9 here but it can be something else. In which case, it will become scenario 1 where nothing is found in the table and the output is calculated and result is saved in the table. Similarly, we can also have situations where `fill_gaps` is set to `TRUE` which would also follow the same process. + +## Code overview + +We are creating a duckdb file to save our table. The location of this file is saved in an environment variable `PIP_CACHE_FILE` (Example `Sys.setenv(PIP_CACHE_FILE = "demo.duckdb")`). A table called `master_file` is created inside it where we save our cache. + +Based on `fill_gaps` parameter we call either the function `fg_pip` or `rg_pip`. Both the functions call the `subset_lkup` function to filter the data that from `lkup` that is relevant to our call. In `subset_lkup` function we call the function `return_if_exists` which as the name suggests returns the data from cache if it exists. A new file called `duckdb_fun.R` has been added to manage all the functions related to duckdb. + +A named list is returned from `return_if_exists` function where it returns the final output (if it exists) from the master file and subsetted `lkup` (Scenario 3 where we have a part of data in master file). The partial (or full) final output is again returned as a named list from `subset_lkup` function which is used at the end to combine the two outputs. If `lkup` is non-empty then after all the calculation is done we use the function `update_master_file` to append the master file with new data. + +## Speed comparison From 050fa117a54959a4fe518fb34869e710a7c94dff Mon Sep 17 00:00:00 2001 From: Ronak Sunil Shah Date: Thu, 9 Jan 2025 10:13:03 -0500 Subject: [PATCH 14/49] add data' --- vignettes/duckdb-caching.Rmd | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/vignettes/duckdb-caching.Rmd b/vignettes/duckdb-caching.Rmd index 331dcadd..1ce9569d 100644 --- a/vignettes/duckdb-caching.Rmd +++ b/vignettes/duckdb-caching.Rmd @@ -96,3 +96,13 @@ Based on `fill_gaps` parameter we call either the function `fg_pip` or `rg_pip`. A named list is returned from `return_if_exists` function where it returns the final output (if it exists) from the master file and subsetted `lkup` (Scenario 3 where we have a part of data in master file). The partial (or full) final output is again returned as a named list from `subset_lkup` function which is used at the end to combine the two outputs. If `lkup` is non-empty then after all the calculation is done we use the function `update_master_file` to append the master file with new data. ## Speed comparison + +```{r} +microbenchmark::microbenchmark( + duckdb_caching = pip(country = c("AGO", "USA"), year = 2000, lkup = lkup) +) + +#Unit: milliseconds +# expr min lq mean median uq max neval +# duckdb_caching 102.745 107.2221 112.5106 109.2035 114.8544 146.8841 100 +``` From 49d5d55374d13d3f5419861d2f61b93753a90ae8 Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Thu, 9 Jan 2025 20:49:18 +0530 Subject: [PATCH 15/49] Speed comparison --- vignettes/duckdb-caching.Rmd | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/vignettes/duckdb-caching.Rmd b/vignettes/duckdb-caching.Rmd index 1ce9569d..6f4c7f2b 100644 --- a/vignettes/duckdb-caching.Rmd +++ b/vignettes/duckdb-caching.Rmd @@ -98,11 +98,21 @@ A named list is returned from `return_if_exists` function where it returns the f ## Speed comparison ```{r} -microbenchmark::microbenchmark( - duckdb_caching = pip(country = c("AGO", "USA"), year = 2000, lkup = lkup) -) +country_list <- c("AGO", "ARG", "AUT", "BEL", "BGD", "BLR", "BOL", "CAN", "CHE", + "CHL", "COL", "CRI", "DEU", "DNK", "DOM", "ECU", "ESP", "EST", + "FIN", "FRA", "FSM", "GBR", "GEO", "GRC", "GTM", "HRV", "HUN", + "IDN", "IDN", "IDN", "IRL", "ITA", "KGZ", "LTU", "LUX", "MAR", + "MDA", "MEX", "MKD", "MRT", "NOR", "PAN", "PER", "PHL", "PHL", + "POL", "ROU", "RUS", "RWA", "SLV", "STP", "SWE", "SWZ", "THA", + "TON", "TUN", "TWN", "TZA", "URY", "USA", "UZB", "ZAF") -#Unit: milliseconds -# expr min lq mean median uq max neval -# duckdb_caching 102.745 107.2221 112.5106 109.2035 114.8544 146.8841 100 +t1 <- Sys.time() + +for(i in seq_along(country_list)) { + out <- pip(country = country_list[seq_len(i)], year = 2000, lkup) +} + +t2 <- Sys.time() + +cat('\nTime elapsed : ', t2-t1) ``` From f4295b281f88178dc6f647572ab096d189635ca5 Mon Sep 17 00:00:00 2001 From: Ronak Sunil Shah Date: Thu, 9 Jan 2025 11:46:34 -0500 Subject: [PATCH 16/49] time complete --- vignettes/duckdb-caching.Rmd | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/vignettes/duckdb-caching.Rmd b/vignettes/duckdb-caching.Rmd index 6f4c7f2b..0bdea11a 100644 --- a/vignettes/duckdb-caching.Rmd +++ b/vignettes/duckdb-caching.Rmd @@ -97,6 +97,25 @@ A named list is returned from `return_if_exists` function where it returns the f ## Speed comparison + +```{r} +microbenchmark::microbenchmark( + duckdb_caching = pip(country = c("AGO", "USA"), year = 2000, lkup = lkup) +) + +#Unit: milliseconds +# expr min lq mean median uq max neval +#duckdb_caching 102.745 107.2221 112.5106 109.2035 114.8544 146.8841 100 + +microbenchmark::microbenchmark( + pip_DEV = pip(country = c("AGO", "USA"), year = 2000, lkup = lkup) +) + +#Unit: milliseconds +# expr min lq mean median uq max neval +#pip_DEV 51.01007 53.67546 62.35531 56.08937 60.12046 354.7717 100 +``` + ```{r} country_list <- c("AGO", "ARG", "AUT", "BEL", "BGD", "BLR", "BOL", "CAN", "CHE", "CHL", "COL", "CRI", "DEU", "DNK", "DOM", "ECU", "ESP", "EST", @@ -109,10 +128,11 @@ country_list <- c("AGO", "ARG", "AUT", "BEL", "BGD", "BLR", "BOL", "CAN", "CHE", t1 <- Sys.time() for(i in seq_along(country_list)) { - out <- pip(country = country_list[seq_len(i)], year = 2000, lkup) + out <- pip(country = country_list[seq_len(i)], year = 2000, lkup = lkup) } t2 <- Sys.time() cat('\nTime elapsed : ', t2-t1) +#Time elapsed : 6.338488 ``` From 7e377aa9a0ca0179a07dcdd9ea914bfda9d77b63 Mon Sep 17 00:00:00 2001 From: Ronak Sunil Shah Date: Thu, 9 Jan 2025 11:53:53 -0500 Subject: [PATCH 17/49] add more stats --- vignettes/duckdb-caching.Rmd | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/vignettes/duckdb-caching.Rmd b/vignettes/duckdb-caching.Rmd index 0bdea11a..9562ee5b 100644 --- a/vignettes/duckdb-caching.Rmd +++ b/vignettes/duckdb-caching.Rmd @@ -134,5 +134,20 @@ for(i in seq_along(country_list)) { t2 <- Sys.time() cat('\nTime elapsed : ', t2-t1) + +## For Duckdb #Time elapsed : 6.338488 + +## For DEV version +#Time elapsed : 8.575941 +``` + +```{r} +microbenchmark::microbenchmark( + duckdb_caching = pip(country = "all", year = "all", lkup = lkup) +) + +#Unit: milliseconds +# expr min lq mean median uq max neval +# duckdb_caching 110.0725 115.0695 129.6408 118.4356 122.2621 494.3855 100 ``` From 82902af66186857b633bc1d17015a1152330d800 Mon Sep 17 00:00:00 2001 From: Ronak Sunil Shah Date: Thu, 9 Jan 2025 12:23:38 -0500 Subject: [PATCH 18/49] more comparison --- vignettes/duckdb-caching.Rmd | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/vignettes/duckdb-caching.Rmd b/vignettes/duckdb-caching.Rmd index 9562ee5b..4a0e57bc 100644 --- a/vignettes/duckdb-caching.Rmd +++ b/vignettes/duckdb-caching.Rmd @@ -116,6 +116,24 @@ microbenchmark::microbenchmark( #pip_DEV 51.01007 53.67546 62.35531 56.08937 60.12046 354.7717 100 ``` +```{r} +microbenchmark::microbenchmark( + duckdb_caching = pip(country = "all", year = "all", lkup = lkup) +) + +#Unit: milliseconds +# expr min lq mean median uq max neval +# duckdb_caching 110.0725 115.0695 129.6408 118.4356 122.2621 494.3855 100 + +microbenchmark::microbenchmark( + pip_DEV = pip(country = "all", year = "all", lkup = lkup) +) + +#Unit: seconds +# expr min lq mean median uq max neval + #pip_DEV 14.42378 14.78717 14.98249 14.96088 15.11043 17.44418 100 +``` + ```{r} country_list <- c("AGO", "ARG", "AUT", "BEL", "BGD", "BLR", "BOL", "CAN", "CHE", "CHL", "COL", "CRI", "DEU", "DNK", "DOM", "ECU", "ESP", "EST", @@ -141,13 +159,3 @@ cat('\nTime elapsed : ', t2-t1) ## For DEV version #Time elapsed : 8.575941 ``` - -```{r} -microbenchmark::microbenchmark( - duckdb_caching = pip(country = "all", year = "all", lkup = lkup) -) - -#Unit: milliseconds -# expr min lq mean median uq max neval -# duckdb_caching 110.0725 115.0695 129.6408 118.4356 122.2621 494.3855 100 -``` From 2726d0e9dfcded803c2ea92f0fb7e5245736650b Mon Sep 17 00:00:00 2001 From: Ronak Sunil Shah Date: Fri, 10 Jan 2025 00:10:25 -0500 Subject: [PATCH 19/49] update vignettee --- pipapi.Rproj | 1 - vignettes/duckdb-caching.Rmd | 23 ++++++++++++++++++----- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/pipapi.Rproj b/pipapi.Rproj index 92f8cfee..4e3ca1bc 100644 --- a/pipapi.Rproj +++ b/pipapi.Rproj @@ -1,5 +1,4 @@ Version: 1.0 -ProjectId: bd9bbc88-fff8-48c9-bbe3-6ff0f016ecca RestoreWorkspace: No SaveWorkspace: No diff --git a/vignettes/duckdb-caching.Rmd b/vignettes/duckdb-caching.Rmd index 4a0e57bc..0c88a5ce 100644 --- a/vignettes/duckdb-caching.Rmd +++ b/vignettes/duckdb-caching.Rmd @@ -143,19 +143,32 @@ country_list <- c("AGO", "ARG", "AUT", "BEL", "BGD", "BLR", "BOL", "CAN", "CHE", "POL", "ROU", "RUS", "RWA", "SLV", "STP", "SWE", "SWZ", "THA", "TON", "TUN", "TWN", "TZA", "URY", "USA", "UZB", "ZAF") -t1 <- Sys.time() +tictoc::tic() for(i in seq_along(country_list)) { out <- pip(country = country_list[seq_len(i)], year = 2000, lkup = lkup) } -t2 <- Sys.time() - -cat('\nTime elapsed : ', t2-t1) +tictoc::toc() ## For Duckdb -#Time elapsed : 6.338488 +#16.69 sec elapsed ## For DEV version #Time elapsed : 8.575941 ``` + +```{r} +tictoc::tic() + +for(i in seq_along(country_list)) { + out <- pip(country = country_list[seq_len(i)], year = "all", lkup = lkup) +} + +tictoc::toc() +## DEV +#178.97 sec elapsed + +## Duckdb caching +#6.96 sec elapsed +``` From 819dc36c32c873378874e2fbc93298c29c2a114a Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Fri, 10 Jan 2025 10:46:09 +0530 Subject: [PATCH 20/49] update numbers --- vignettes/duckdb-caching.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/duckdb-caching.Rmd b/vignettes/duckdb-caching.Rmd index 0c88a5ce..84c1e513 100644 --- a/vignettes/duckdb-caching.Rmd +++ b/vignettes/duckdb-caching.Rmd @@ -155,7 +155,7 @@ tictoc::toc() #16.69 sec elapsed ## For DEV version -#Time elapsed : 8.575941 +#9.26 sec elapsed ``` ```{r} From 33bb2f808262bd073f07c6192652be4ba355084b Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Sat, 11 Jan 2025 19:29:58 +0530 Subject: [PATCH 21/49] call connection object only once --- R/fg_pip.R | 7 +++++-- R/pip.R | 9 +++++---- R/rg_pip.R | 7 +++++-- R/utils.R | 7 ++++--- man/fg_pip.Rd | 5 ++++- man/return_if_exists.Rd | 4 ++-- man/rg_pip.Rd | 5 ++++- man/subset_lkup.Rd | 7 +++++-- pipapi.Rproj | 1 + 9 files changed, 35 insertions(+), 17 deletions(-) diff --git a/R/fg_pip.R b/R/fg_pip.R index b735459d..941bf778 100644 --- a/R/fg_pip.R +++ b/R/fg_pip.R @@ -3,6 +3,7 @@ #' Compute the main PIP poverty and inequality statistics for imputed years. #' #' @inheritParams pip +#' @param con duckdb connection object #' @return data.frame #' @keywords internal fg_pip <- function(country, @@ -12,7 +13,8 @@ fg_pip <- function(country, welfare_type, reporting_level, ppp, - lkup) { + lkup, + con) { valid_regions <- lkup$query_controls$region$values interpolation_list <- lkup$interpolation_list @@ -28,7 +30,8 @@ fg_pip <- function(country, lkup = ref_lkup, valid_regions = valid_regions, data_dir = data_dir, - povline = povline + povline = povline, + con = con ) data_present_in_master <- metadata$data_present_in_master diff --git a/R/pip.R b/R/pip.R index 7001ce78..3b14ac51 100644 --- a/R/pip.R +++ b/R/pip.R @@ -118,7 +118,7 @@ pip <- function(country = "ALL", # only run pip code if there is data that is not present in cache #if(nrow(result_from_cache$absent_args) > 0) { # use result_from_cache$absent_args$country_code reporting_year and poverty_line and pass it further. - + con <- duckdb::dbConnect(duckdb::duckdb(), dbdir = Sys.getenv("PIP_CACHE_FILE")) # mains estimates --------------- if (fill_gaps) { ## lineup years----------------- @@ -130,7 +130,8 @@ pip <- function(country = "ALL", welfare_type = welfare_type, reporting_level = reporting_level, ppp = ppp, - lkup = lkup + lkup = lkup, + con = con, ) } else { ## survey years ------------------ @@ -142,7 +143,8 @@ pip <- function(country = "ALL", welfare_type = welfare_type, reporting_level = reporting_level, ppp = ppp, - lkup = lkup + lkup = lkup, + con = con ) } cached_data <- out$data_in_cache @@ -284,7 +286,6 @@ pip <- function(country = "ALL", # Order rows by country code and reporting year data.table::setorder(out, country_code, reporting_year, reporting_level, welfare_type) - con <- duckdb::dbConnect(duckdb::duckdb(), dbdir = Sys.getenv("PIP_CACHE_FILE")) update_master_file(out, con) } #} diff --git a/R/rg_pip.R b/R/rg_pip.R index ba815d89..de8368dc 100644 --- a/R/rg_pip.R +++ b/R/rg_pip.R @@ -3,6 +3,7 @@ #' Compute the main PIP poverty and inequality statistics for survey years. #' #' @inheritParams pip +#' @param con duckdb connection object #' @return data.frame #' @keywords internal rg_pip <- function(country, @@ -12,7 +13,8 @@ rg_pip <- function(country, welfare_type, reporting_level, ppp, - lkup) { + lkup, + con) { # get values from lkup valid_regions <- lkup$query_controls$region$values svy_lkup <- lkup$svy_lkup @@ -26,7 +28,8 @@ rg_pip <- function(country, lkup = svy_lkup, valid_regions = valid_regions, data_dir = data_dir, - povline = povline + povline = povline, + con = con ) data_present_in_master <- metadata$data_present_in_master metadata <- metadata$lkup diff --git a/R/utils.R b/R/utils.R index 4177fb5f..9772bd60 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,5 +1,6 @@ #' Subset look-up data #' @inheritParams pip +#' @inheritParams rg_pip #' @param valid_regions character: List of valid region codes that can be used #' for region selection #' @param data_dir character: directory path from lkup$data_root @@ -12,7 +13,8 @@ subset_lkup <- function(country, lkup, valid_regions, data_dir = NULL, - povline + povline, + con ) { # STEP 1 - Keep every row by default @@ -41,8 +43,7 @@ subset_lkup <- function(country, lkup <- lkup[keep, ] - #browser() - con <- duckdb::dbConnect(duckdb::duckdb(), dbdir = Sys.getenv("PIP_CACHE_FILE")) + cached_data <- return_if_exists(lkup, povline, con) return(list(lkup = cached_data$lkup, data_present_in_master = cached_data$data_present_in_master)) diff --git a/man/fg_pip.Rd b/man/fg_pip.Rd index 9012b01b..3f671bdf 100644 --- a/man/fg_pip.Rd +++ b/man/fg_pip.Rd @@ -12,7 +12,8 @@ fg_pip( welfare_type, reporting_level, ppp, - lkup + lkup, + con ) } \arguments{ @@ -32,6 +33,8 @@ poverty line} \item{ppp}{numeric: Custom Purchase Power Parity value} \item{lkup}{list: A list of lkup tables} + +\item{con}{duckdb connection object} } \value{ data.frame diff --git a/man/return_if_exists.Rd b/man/return_if_exists.Rd index d94283e6..e18664b1 100644 --- a/man/return_if_exists.Rd +++ b/man/return_if_exists.Rd @@ -4,12 +4,12 @@ \alias{return_if_exists} \title{Return the rows of the table if they exist in master file} \usage{ -return_if_exists(lkup, is_interpolated, con) +return_if_exists(lkup, povline, con) } \arguments{ \item{lkup}{list: A list of lkup tables} -\item{is_interpolated}{logical : If \code{TRUE}, the call is from \code{fg_pip()}, for \code{FALSE} \code{rg_pip()}} +\item{povline}{numeric: Poverty line} \item{con}{Connection object} diff --git a/man/rg_pip.Rd b/man/rg_pip.Rd index 4a4f1881..5d107e95 100644 --- a/man/rg_pip.Rd +++ b/man/rg_pip.Rd @@ -12,7 +12,8 @@ rg_pip( welfare_type, reporting_level, ppp, - lkup + lkup, + con ) } \arguments{ @@ -32,6 +33,8 @@ poverty line} \item{ppp}{numeric: Custom Purchase Power Parity value} \item{lkup}{list: A list of lkup tables} + +\item{con}{duckdb connection object} } \value{ data.frame diff --git a/man/subset_lkup.Rd b/man/subset_lkup.Rd index e39880be..a1657e43 100644 --- a/man/subset_lkup.Rd +++ b/man/subset_lkup.Rd @@ -12,7 +12,8 @@ subset_lkup( lkup, valid_regions, data_dir = NULL, - is_interpolated + povline, + con ) } \arguments{ @@ -31,7 +32,9 @@ for region selection} \item{data_dir}{character: directory path from lkup$data_root} -\item{is_interpolated}{logical : If \code{TRUE}, the call is from \code{fg_pip()}, for \code{FALSE} \code{rg_pip()}} +\item{povline}{numeric: Poverty line} + +\item{con}{duckdb connection object} } \value{ data.frame diff --git a/pipapi.Rproj b/pipapi.Rproj index 4e3ca1bc..20664e2d 100644 --- a/pipapi.Rproj +++ b/pipapi.Rproj @@ -1,4 +1,5 @@ Version: 1.0 +ProjectId: cf4563bf-f15d-4f36-a587-51bf840c7f41 RestoreWorkspace: No SaveWorkspace: No From 21d4331e1d7c89919bc050d58d8adee6e521ea8d Mon Sep 17 00:00:00 2001 From: Ronak Sunil Shah Date: Tue, 14 Jan 2025 06:20:32 -0500 Subject: [PATCH 22/49] update speed comparison --- vignettes/duckdb-caching.Rmd | 44 +++++++++++------------------------- 1 file changed, 13 insertions(+), 31 deletions(-) diff --git a/vignettes/duckdb-caching.Rmd b/vignettes/duckdb-caching.Rmd index 84c1e513..44aec933 100644 --- a/vignettes/duckdb-caching.Rmd +++ b/vignettes/duckdb-caching.Rmd @@ -100,38 +100,20 @@ A named list is returned from `return_if_exists` function where it returns the f ```{r} microbenchmark::microbenchmark( - duckdb_caching = pip(country = c("AGO", "USA"), year = 2000, lkup = lkup) -) - -#Unit: milliseconds -# expr min lq mean median uq max neval -#duckdb_caching 102.745 107.2221 112.5106 109.2035 114.8544 146.8841 100 - -microbenchmark::microbenchmark( - pip_DEV = pip(country = c("AGO", "USA"), year = 2000, lkup = lkup) + pip_DEV = pip(country = "all", year = "all", lkup = lkup) ) -#Unit: milliseconds -# expr min lq mean median uq max neval -#pip_DEV 51.01007 53.67546 62.35531 56.08937 60.12046 354.7717 100 -``` +#Unit: microseconds +# expr min lq mean median uq max neval +# pip_DEV 593.396 628.3455 1355.73 647.4085 664.028 61786.03 100 -```{r} microbenchmark::microbenchmark( - duckdb_caching = pip(country = "all", year = "all", lkup = lkup) + duckdb_caching = pip(country = c("AGO", "USA"), year = 2000, lkup = lkup) ) #Unit: milliseconds -# expr min lq mean median uq max neval -# duckdb_caching 110.0725 115.0695 129.6408 118.4356 122.2621 494.3855 100 - -microbenchmark::microbenchmark( - pip_DEV = pip(country = "all", year = "all", lkup = lkup) -) - -#Unit: seconds -# expr min lq mean median uq max neval - #pip_DEV 14.42378 14.78717 14.98249 14.96088 15.11043 17.44418 100 +# expr min lq mean median uq max neval +# duckdb_caching 97.96608 100.8852 115.458 103.0205 107.466 1135.287 100 ``` ```{r} @@ -151,11 +133,11 @@ for(i in seq_along(country_list)) { tictoc::toc() -## For Duckdb -#16.69 sec elapsed - ## For DEV version -#9.26 sec elapsed +# 9.71 sec elapsed + +## For Duckdb +# 9.75 sec elapsed ``` ```{r} @@ -167,8 +149,8 @@ for(i in seq_along(country_list)) { tictoc::toc() ## DEV -#178.97 sec elapsed +# 185.28 sec elapsed ## Duckdb caching -#6.96 sec elapsed +# 15.62 sec elapsed ``` From 83cc628fd7517247a969bde5a00453f9c53028cc Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Tue, 14 Jan 2025 16:52:37 +0530 Subject: [PATCH 23/49] include dcos --- .gitignore | 2 - docs/404.html | 121 ++++++ docs/CONTRIBUTING.html | 127 ++++++ docs/LICENSE-text.html | 95 +++++ docs/LICENSE.html | 99 +++++ docs/PULL_REQUEST_TEMPLATE.html | 107 +++++ docs/articles/debug-caching.html | 237 +++++++++++ docs/articles/duckdb-caching.html | 356 ++++++++++++++++ docs/articles/images/clipboard-2235948010.png | Bin 0 -> 41108 bytes docs/articles/index.html | 96 +++++ docs/articles/new-endpoints.html | 163 ++++++++ docs/authors.html | 122 ++++++ docs/bootstrap-toc.css | 60 +++ docs/bootstrap-toc.js | 159 ++++++++ docs/docsearch.css | 148 +++++++ docs/docsearch.js | 85 ++++ docs/index.html | 233 +++++++++++ docs/link.svg | 12 + docs/news/index.html | 283 +++++++++++++ docs/pkgdown.css | 384 ++++++++++++++++++ docs/pkgdown.js | 108 +++++ docs/pkgdown.yml | 12 + docs/reference/Rplot001.png | Bin 0 -> 1011 bytes docs/reference/add_agg_medians.html | 121 ++++++ docs/reference/add_dist_stats.html | 115 ++++++ docs/reference/add_distribution_type.html | 120 ++++++ docs/reference/add_pg.html | 121 ++++++ docs/reference/add_spl.html | 121 ++++++ docs/reference/add_vars_out_of_pipeline.html | 118 ++++++ docs/reference/assign_serializer.html | 111 +++++ docs/reference/available_versions.html | 111 +++++ docs/reference/censor_stats.html | 109 +++++ .../change_grouped_stats_to_csv.html | 111 +++++ docs/reference/citation_from_version.html | 111 +++++ docs/reference/clear_cache.html | 116 ++++++ docs/reference/create_countries_vctr.html | 128 ++++++ docs/reference/create_etag_header.html | 119 ++++++ docs/reference/create_lkups.html | 115 ++++++ docs/reference/create_return_cols.html | 111 +++++ docs/reference/create_versioned_lkups.html | 116 ++++++ .../create_vintage_pattern_call.html | 131 ++++++ docs/reference/empty_response.html | 103 +++++ docs/reference/empty_response_cp_poverty.html | 103 +++++ docs/reference/empty_response_grp.html | 103 +++++ docs/reference/estimate_type_ctr_lnp.html | 115 ++++++ docs/reference/estimate_type_var.html | 109 +++++ docs/reference/extract_identity.html | 111 +++++ docs/reference/extract_ppp_date.html | 111 +++++ docs/reference/extract_release_date.html | 111 +++++ .../fg_assign_nas_values_to_dup_cols.html | 115 ++++++ docs/reference/fg_pip.html | 154 +++++++ docs/reference/fg_remove_duplicates.html | 122 ++++++ docs/reference/fg_standardize_cache_id.html | 119 ++++++ docs/reference/fillin_list.html | 141 +++++++ docs/reference/filter_lkup.html | 130 ++++++ docs/reference/filter_md.html | 119 ++++++ docs/reference/get_additional_indicators.html | 111 +++++ .../get_additional_indicators_grp.html | 111 +++++ docs/reference/get_aux_table.html | 119 ++++++ docs/reference/get_aux_table_ui.html | 120 ++++++ docs/reference/get_caller_names.html | 105 +++++ docs/reference/get_ctr_alt_agg.html | 158 +++++++ docs/reference/get_grp_to_compute.html | 123 ++++++ docs/reference/get_impl_ctrs.html | 128 ++++++ docs/reference/get_md_vars.html | 133 ++++++ docs/reference/get_metaregion_table.html | 111 +++++ docs/reference/get_param_values.html | 122 ++++++ docs/reference/get_pg_table.html | 115 ++++++ docs/reference/get_pip_version.html | 115 ++++++ docs/reference/get_spr_table.html | 115 ++++++ docs/reference/get_svy_data.html | 119 ++++++ docs/reference/get_user_alt_gt.html | 115 ++++++ docs/reference/get_user_x_code.html | 111 +++++ .../get_valid_aux_long_format_tables.html | 105 +++++ docs/reference/ifel_isnull.html | 115 ++++++ docs/reference/index.html | 378 +++++++++++++++++ docs/reference/is_empty.html | 124 ++++++ docs/reference/is_forked.html | 135 ++++++ docs/reference/lkup.html | 103 +++++ docs/reference/pip.html | 211 ++++++++++ docs/reference/pip_aggregate.html | 127 ++++++ docs/reference/pip_grp.html | 169 ++++++++ docs/reference/pip_grp_logic.html | 167 ++++++++ docs/reference/pipgd_lorenz_curve.html | 162 ++++++++ docs/reference/reporting_level_list.html | 103 +++++ docs/reference/return_correct_version.html | 133 ++++++ docs/reference/return_if_exists.html | 123 ++++++ docs/reference/rg_pip.html | 154 +++++++ docs/reference/select_country.html | 129 ++++++ docs/reference/select_off_alt_agg.html | 120 ++++++ docs/reference/select_reporting_level.html | 124 ++++++ docs/reference/select_user_aggs.html | 123 ++++++ docs/reference/select_years.html | 137 +++++++ docs/reference/start_api.html | 119 ++++++ docs/reference/subset_ctry_years.html | 137 +++++++ docs/reference/subset_lkup.html | 154 +++++++ docs/reference/ui_cp_charts.html | 134 ++++++ docs/reference/ui_cp_download.html | 133 ++++++ docs/reference/ui_cp_key_indicators.html | 128 ++++++ docs/reference/ui_cp_poverty_charts.html | 124 ++++++ docs/reference/ui_hp_countries.html | 129 ++++++ docs/reference/ui_hp_stacked.html | 119 ++++++ docs/reference/ui_pc_charts.html | 156 +++++++ docs/reference/ui_pc_regional.html | 136 +++++++ docs/reference/ui_svy_meta.html | 115 ++++++ docs/reference/update_master_file.html | 115 ++++++ docs/reference/valid_years.html | 111 +++++ .../validate_input_grouped_stats.html | 119 ++++++ docs/reference/version_dataframe.html | 111 +++++ docs/reference/wld_lineup_year.html | 111 +++++ docs/sitemap.xml | 300 ++++++++++++++ vignettes/duckdb-caching.Rmd | 42 +- 112 files changed, 14315 insertions(+), 31 deletions(-) create mode 100644 docs/404.html create mode 100644 docs/CONTRIBUTING.html create mode 100644 docs/LICENSE-text.html create mode 100644 docs/LICENSE.html create mode 100644 docs/PULL_REQUEST_TEMPLATE.html create mode 100644 docs/articles/debug-caching.html create mode 100644 docs/articles/duckdb-caching.html create mode 100644 docs/articles/images/clipboard-2235948010.png create mode 100644 docs/articles/index.html create mode 100644 docs/articles/new-endpoints.html create mode 100644 docs/authors.html create mode 100644 docs/bootstrap-toc.css create mode 100644 docs/bootstrap-toc.js create mode 100644 docs/docsearch.css create mode 100644 docs/docsearch.js create mode 100644 docs/index.html create mode 100644 docs/link.svg create mode 100644 docs/news/index.html create mode 100644 docs/pkgdown.css create mode 100644 docs/pkgdown.js create mode 100644 docs/pkgdown.yml create mode 100644 docs/reference/Rplot001.png create mode 100644 docs/reference/add_agg_medians.html create mode 100644 docs/reference/add_dist_stats.html create mode 100644 docs/reference/add_distribution_type.html create mode 100644 docs/reference/add_pg.html create mode 100644 docs/reference/add_spl.html create mode 100644 docs/reference/add_vars_out_of_pipeline.html create mode 100644 docs/reference/assign_serializer.html create mode 100644 docs/reference/available_versions.html create mode 100644 docs/reference/censor_stats.html create mode 100644 docs/reference/change_grouped_stats_to_csv.html create mode 100644 docs/reference/citation_from_version.html create mode 100644 docs/reference/clear_cache.html create mode 100644 docs/reference/create_countries_vctr.html create mode 100644 docs/reference/create_etag_header.html create mode 100644 docs/reference/create_lkups.html create mode 100644 docs/reference/create_return_cols.html create mode 100644 docs/reference/create_versioned_lkups.html create mode 100644 docs/reference/create_vintage_pattern_call.html create mode 100644 docs/reference/empty_response.html create mode 100644 docs/reference/empty_response_cp_poverty.html create mode 100644 docs/reference/empty_response_grp.html create mode 100644 docs/reference/estimate_type_ctr_lnp.html create mode 100644 docs/reference/estimate_type_var.html create mode 100644 docs/reference/extract_identity.html create mode 100644 docs/reference/extract_ppp_date.html create mode 100644 docs/reference/extract_release_date.html create mode 100644 docs/reference/fg_assign_nas_values_to_dup_cols.html create mode 100644 docs/reference/fg_pip.html create mode 100644 docs/reference/fg_remove_duplicates.html create mode 100644 docs/reference/fg_standardize_cache_id.html create mode 100644 docs/reference/fillin_list.html create mode 100644 docs/reference/filter_lkup.html create mode 100644 docs/reference/filter_md.html create mode 100644 docs/reference/get_additional_indicators.html create mode 100644 docs/reference/get_additional_indicators_grp.html create mode 100644 docs/reference/get_aux_table.html create mode 100644 docs/reference/get_aux_table_ui.html create mode 100644 docs/reference/get_caller_names.html create mode 100644 docs/reference/get_ctr_alt_agg.html create mode 100644 docs/reference/get_grp_to_compute.html create mode 100644 docs/reference/get_impl_ctrs.html create mode 100644 docs/reference/get_md_vars.html create mode 100644 docs/reference/get_metaregion_table.html create mode 100644 docs/reference/get_param_values.html create mode 100644 docs/reference/get_pg_table.html create mode 100644 docs/reference/get_pip_version.html create mode 100644 docs/reference/get_spr_table.html create mode 100644 docs/reference/get_svy_data.html create mode 100644 docs/reference/get_user_alt_gt.html create mode 100644 docs/reference/get_user_x_code.html create mode 100644 docs/reference/get_valid_aux_long_format_tables.html create mode 100644 docs/reference/ifel_isnull.html create mode 100644 docs/reference/index.html create mode 100644 docs/reference/is_empty.html create mode 100644 docs/reference/is_forked.html create mode 100644 docs/reference/lkup.html create mode 100644 docs/reference/pip.html create mode 100644 docs/reference/pip_aggregate.html create mode 100644 docs/reference/pip_grp.html create mode 100644 docs/reference/pip_grp_logic.html create mode 100644 docs/reference/pipgd_lorenz_curve.html create mode 100644 docs/reference/reporting_level_list.html create mode 100644 docs/reference/return_correct_version.html create mode 100644 docs/reference/return_if_exists.html create mode 100644 docs/reference/rg_pip.html create mode 100644 docs/reference/select_country.html create mode 100644 docs/reference/select_off_alt_agg.html create mode 100644 docs/reference/select_reporting_level.html create mode 100644 docs/reference/select_user_aggs.html create mode 100644 docs/reference/select_years.html create mode 100644 docs/reference/start_api.html create mode 100644 docs/reference/subset_ctry_years.html create mode 100644 docs/reference/subset_lkup.html create mode 100644 docs/reference/ui_cp_charts.html create mode 100644 docs/reference/ui_cp_download.html create mode 100644 docs/reference/ui_cp_key_indicators.html create mode 100644 docs/reference/ui_cp_poverty_charts.html create mode 100644 docs/reference/ui_hp_countries.html create mode 100644 docs/reference/ui_hp_stacked.html create mode 100644 docs/reference/ui_pc_charts.html create mode 100644 docs/reference/ui_pc_regional.html create mode 100644 docs/reference/ui_svy_meta.html create mode 100644 docs/reference/update_master_file.html create mode 100644 docs/reference/valid_years.html create mode 100644 docs/reference/validate_input_grouped_stats.html create mode 100644 docs/reference/version_dataframe.html create mode 100644 docs/reference/wld_lineup_year.html create mode 100644 docs/sitemap.xml diff --git a/.gitignore b/.gitignore index 52b9f47f..3ac36758 100644 --- a/.gitignore +++ b/.gitignore @@ -5,10 +5,8 @@ TEMP/ TEMP.R renv/ .Rprofile -inst/doc tests/testdata/app_data/ .Renviron -docs logs/ /sessionInfoLog demo.duckdb diff --git a/docs/404.html b/docs/404.html new file mode 100644 index 00000000..00f29df8 --- /dev/null +++ b/docs/404.html @@ -0,0 +1,121 @@ + + + + + + + +Page not found (404) • pipapi + + + + + + + + + + + +
+
+ + + + +
+
+ + +Content not found. Please use links in the navbar. + +
+ + + +
+ + + +
+ +
+

+

Site built with pkgdown 2.0.7.

+
+ +
+
+ + + + + + + + diff --git a/docs/CONTRIBUTING.html b/docs/CONTRIBUTING.html new file mode 100644 index 00000000..d890b0ca --- /dev/null +++ b/docs/CONTRIBUTING.html @@ -0,0 +1,127 @@ + +Contributing • pipapi + + +
+
+ + + +
+
+ + +
+ +

Development is a community effort, and we welcome participation.

+
+

Issues

+
  • Before posting a new issue or discussion topic, please take a moment to search for existing similar threads in order to avoid duplication.
  • +
  • For bug reports: if you can, please install the latest GitHub version of pipapi (i.e. remotes::install_github("PIP-Technical-Team/pipapi")) and verify that the issue still persists.
  • +
  • Describe your issue in prose as clearly and concisely as possible.
  • +
  • For any problem you identify, post a minimal reproducible example so the maintainer can troubleshoot. A reproducible example is: +
    • +Runnable: post enough R code and data so any onlooker can create the error on their own computer.
    • +
    • +Minimal: reduce runtime wherever possible and remove complicated details that are irrelevant to the issue at hand.
    • +
    • +Readable: format your code according to the tidyverse style guide.
    • +
  • +
+
+

Development

+

External code contributions are extremely helpful in the right circumstances. Here are the recommended steps.

+
  1. Prior to contribution, please propose your idea in a discussion topic or issue thread so you and the maintainer can define the intent and scope of your work.
  2. +
  3. +Clone or fork the repository.
  4. +
  5. Follow the GitHub flow to create a new branch, add commits, and open a pull request.
  6. +
  7. Discuss your code with the maintainer in the pull request thread.
  8. +
  9. If everything looks good, the maintainer will merge your code into the project.
  10. +

Please also follow these additional guidelines.

+
  • Respect the architecture and reasoning of the package.
  • +
  • If possible, keep contributions small enough to easily review manually. It is okay to split up your work into multiple pull requests.
  • +
  • For new features or functionality, add tests in tests. Tests that can be automated should go in tests/testthat/.
  • +
  • Format your code according to the tidyverse style guide and check your formatting with the lint_package() function from the lintr package.
  • +
  • Check code coverage with covr::package_coverage(). Automated tests should cover all the new or changed functionality in your pull request.
  • +
  • Run overall package checks with devtools::check().
  • +
+
+ +
+ + + +
+ + + +
+ +
+

Site built with pkgdown 2.0.7.

+
+ +
+ + + + + + + + diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html new file mode 100644 index 00000000..1fdea57f --- /dev/null +++ b/docs/LICENSE-text.html @@ -0,0 +1,95 @@ + +License • pipapi + + +
+
+ + + +
+
+ + +
YEAR: 2020
+COPYRIGHT HOLDER: World Bank
+
+ +
+ + + +
+ + + +
+ +
+

Site built with pkgdown 2.0.7.

+
+ +
+ + + + + + + + diff --git a/docs/LICENSE.html b/docs/LICENSE.html new file mode 100644 index 00000000..ec5770de --- /dev/null +++ b/docs/LICENSE.html @@ -0,0 +1,99 @@ + +MIT License • pipapi + + +
+
+ + + +
+
+ + +
+ +

Copyright (c) 2020 World Bank

+

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

+

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

+

THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

+
+ +
+ + + +
+ + + +
+ +
+

Site built with pkgdown 2.0.7.

+
+ +
+ + + + + + + + diff --git a/docs/PULL_REQUEST_TEMPLATE.html b/docs/PULL_REQUEST_TEMPLATE.html new file mode 100644 index 00000000..91f84d87 --- /dev/null +++ b/docs/PULL_REQUEST_TEMPLATE.html @@ -0,0 +1,107 @@ + +Prework • pipapi + + +
+
+ + + +
+
+ + + +
+ +
+
+

Related GitHub issues and pull requests

+
  • Ref: #
  • +
+
+

Summary

+

Please explain the purpose and scope of your contribution.

+
+ + +
+ + + +
+ + + +
+ +
+

Site built with pkgdown 2.0.7.

+
+ +
+ + + + + + + + diff --git a/docs/articles/debug-caching.html b/docs/articles/debug-caching.html new file mode 100644 index 00000000..25bec325 --- /dev/null +++ b/docs/articles/debug-caching.html @@ -0,0 +1,237 @@ + + + + + + + +Debug caching and API endpoints • pipapi + + + + + + + + + + + + +
+
+ + + + +
+
+ + + + +
+

How caching works? +

+

In pipapi package in file zzz.R we have +.onLoad function which looks like the following -

+
+.onLoad <- function(libname, pkgname) {
+  if (Sys.getenv("PIPAPI_APPLY_CACHING") == "TRUE") {
+    d <- rappdirs::user_cache_dir("pipapi")
+    # log <- sprintf("%s/cache.log", d)
+    cd <- cachem::cache_disk(d,
+                             read_fn = qs::qread,
+                             write_fn = qs::qsave,
+                             extension = ".qs",
+                             evict = "lru",
+                             logfile = NULL,
+                             max_size = as.numeric(Sys.getenv("PIPAPI_CACHE_MAX_SIZE")),
+                             prune_rate = 50)
+    pip <<- memoise::memoise(pip, cache = cd, omit_args = "lkup")
+    ui_hp_stacked <<- memoise::memoise(ui_hp_stacked, cache = cd, omit_args = "lkup")
+    pip_grp_logic <<- memoise::memoise(pip_grp_logic, cache = cd, omit_args = "lkup")
+    ui_cp_charts <<- memoise::memoise(ui_cp_charts, cache = cd, omit_args = "lkup")
+    ui_cp_download <<- memoise::memoise(ui_cp_download, cache = cd, omit_args = "lkup")
+    ui_cp_key_indicators <<- memoise::memoise(ui_cp_key_indicators, cache = cd, omit_args = "lkup")
+    assign("cd", cd, envir = .GlobalEnv)
+    packageStartupMessage("Info: Disk based caching is enabled.")
+  }
+}
+

What this means is that if this environment variable is set +Sys.getenv("PIPAPI_APPLY_CACHING") we create a caching file +which saves the result on our disk. This disk is your local computer if +you are working locally or server if the package is deployed there. This +caching file is generated at location d i.e +rappdirs::user_cache_dir("pipapi") in this case however, +you can change it to any local folder while debugging. Rest of the lines +include the functions that we are caching like pip, +ui_hp_stacked, pip_grp_logic and so on.

+

So how this works is that we let’s say call any function which is +cached, for example - pip.

+
+pip(country = "CHN", year = 2017, lkup = lkup)
+

Now a file is created at the cache location which has it’s output and +looks like this.

+

+

This is the file whose name is hashed based on the arguments passed +to the cached function (pip) and the log file has all the +logs of caching operation.

+

Now if a new call is made like :

+
+pip(country = "all", year = "all", lkup = lkup)
+

then a 2nd file is generated whereas the log file is updated.

+

If you call the 1st pip call again :

+
+pip(country = "CHN", year = 2017, lkup = lkup)
+

then this time it will use the cached result and give the output +instantly. No processing is done at all in this. No new file is +generated this time around but the log file is updated.

+
+
+

Debugging caching +

+

In pip-precaching-script repository, I have created a +branch called debug-ronak and in this branch if you look at +the file main.R you will see the exact script that I used +to debug the API. Note that there are two levels where we need to check +caching or any general PIP issue. One is when you are using +pip directly as a function like how we showed above like +pip(country = "all", year = "all", lkup = lkup) and another +one is via the API like how it is shown in main.R file. +Also note that I am using pip function as a general example +here. This is true for all the functions in pipapi package. +The API calls the functions from pipapi package so the +basic code is the same across both the levels. However, API has some +additional layers on top of these functions which might make them +different.

+

The recent case of caching not working only for country = “all” and +year = “all” was visible via API, however when we used the +pip call directly caching was working perfectly fine. So in +this case it was something in the API that was causing the trouble. It +is very rare but it does happen every now and then. And just to conclude +the topic the issue was that for intensive calculation like country = +“all” and year = “all” we were using +promises::future_promise function for asynchronous calling +and we forgot to include promises package in the +DESCRIPTION file of pipapi package so +promises package was not available and it did not work when +using country = “all” and year = “all”.

+

Moreover, it is very important to kill the API you +are launching if you are using the same port for debugging +(apis$kill()). We are using callr to launch +the API in new session so we can’t actually “see” that a session has +been launched so it is important to understand about this. For example, +if we launch the API on port 8080 with callr, the session +is busy in the background. In the current session that we have access to +is working normal and we can execute our code. If we do some changes and +run the same code to launch the API it would not reflect the changes +because the background session is still busy with the previous code and +has not been killed yet. In such scenario we have couple of options +:

+
    +
  1. Use new port number to run code in different session
  2. +
  3. Kill the previous session and launch the API again to see the +changes
  4. +
+

Step 2 is what apis$kill() is doing.

+

All the best!

+
+
+ + + +
+ + + +
+ +
+

+

Site built with pkgdown 2.0.7.

+
+ +
+
+ + + + + + + + diff --git a/docs/articles/duckdb-caching.html b/docs/articles/duckdb-caching.html new file mode 100644 index 00000000..a0263d04 --- /dev/null +++ b/docs/articles/duckdb-caching.html @@ -0,0 +1,356 @@ + + + + + + + +duckdb-caching • pipapi + + + + + + + + + + + + +
+
+ + + + +
+
+ + + + +
+

Introduction +

+

Current caching mechanism for pip uses traditional caching where +basically a hash is created based on the value of the arguments passed +in the function and if someone calls the same function with the same +arguments again the cached result is returned instead of doing the same +calculation again. For pip we used the packages +cachem and memoise to implement this system of +caching. This traditional caching strategy works well in general +however, pip is a special case and it would benefit much +more if it had a custom strategy for caching.

+
+
+

How caching currently works? +

+

Consider these pip functions

+
+# 1.
+pip(country = "all", year = 2000, lkup = lkup)
+
+# 2.
+pip(country = "AGO", year = 2000, lkup = lkup)
+

Now since these are separate set of arguments 2 files of caching are +created and saved on the disk. Now if a call to pip is made +again pip(country = "AGO", year = 2000, lkup = lkup) which +is same as 2) then it would return the result from the cached file +stored on the disk without doing any calculation. Needless to say, this +result is much faster.

+

However, notice that the 2nd call is subset of the 1st one. What I +mean by that is the result of 2) is already present in result of 1). We +have done the calculations for all the countries for the year 2000 in 1) +we just need output of “AGO” from it to get the result for 2).

+
+
+

Custom caching for pipapi. +

+

What if we could take subset of an existing cache like how we need it +as above. However, this is not how traditional caching systems work. We +would need to implement something custom if we want to make this +work.

+

We came up with an idea to implement this custom caching using +duckdb in a table. Basically, all the queries that are +called till now are saved in this table and whenever a new call is made +it checks if the query is already called, if yes then it returns the +result immediately or else it will do the calculation and then save the +result to the table for next use and return the result. There are +various scenarios that we need to consider and let’s take help of an +example to understand each one of them.

+

Consider that we are just starting out and there is nothing saved in +the table.

+
+

Scenario 1 - +

+
+pip(country = c("AGO", "USA"), year = 2000, lkup = lkup)
+

Now since nothing is saved in the table this will go through the +whole round of calculation and save the result in the table for future +use and return the output.

+
+
+

Scenario 2 - +

+
+pip(country = "USA", year = 2000, lkup = lkup)
+

Now this is something which we have already calculated in our +previous call. In traditional caching this would be treated as a +separate call and the calculation would have been performed again. +However, in our custom caching it goes through the existing table and +checks if we already have the result for this call. Since we do have it +saved in our table we will just return the result in this case as it is +from the table without doing any calculation.

+
+
+

Scenario 3 - +

+
+pip(country = c("ARG", "USA"), year = 2000, lkup = lkup)
+

Notice this time it is combination of scenario 1 and 2 where one part +of the calculation we already have (“USA”) and another part we don’t +(“ARG”). In this case, we return the result for the country that we have +in the table and send rest of the arguments for the calculation. We save +the result from calculation in the table and return the output by +combining both the result.

+
+
+

Scenario 4 - +

+
+pip(country = "all", year = 2000, lkup = lkup)
+

In this scenario before we check in the table we need to decode this +“all” argument to list of actual country names because in the table we +save the data with actual country names. Once we have the list of +country names we check which of those values are already available in +the table. If we consider the 3 scenarios above then we already have +result for c("ARG", "AGO", "USA") and we need to find +result for the remaining countries. After saving the data for the +remaining countries in the table, we return the result by combining the +two.

+
+
+

Scenario 5 - +

+
+pip(country = "AGO", year = "all", lkup = lkup)
+

This is similar to scenario 4 but instead of having +country = "all" we have here year = "all" so +in this case we need to decode the year parameter. However, +the sequence of operation remains the same as above.

+
+
+

Scenario 6 - +

+
+pip(country = "all", year = "all", lkup = lkup)
+

This is combination of scenario 4 and 5 where we need to decode both +country and year parameter, check the values +that are present in the table, query the data that does not exist, save +it into the table, combine the result and return the output.

+

These are 6 different scenarios that can occur. Note that I have not +used all the arguments here in the pip call. We are using +the default povline i.e 1.9 here but it can be something +else. In which case, it will become scenario 1 where nothing is found in +the table and the output is calculated and result is saved in the table. +Similarly, we can also have situations where fill_gaps is +set to TRUE which would also follow the same process.

+
+
+
+

Code overview +

+

We are creating a duckdb file to save our table. The location of this +file is saved in an environment variable PIP_CACHE_FILE +(Example Sys.setenv(PIP_CACHE_FILE = "demo.duckdb")). A +table called master_file is created inside it where we save +our cache.

+

Based on fill_gaps parameter we call either the function +fg_pip or rg_pip. Both the functions call the +subset_lkup function to filter the data that from +lkup that is relevant to our call. In +subset_lkup function we call the function +return_if_exists which as the name suggests returns the +data from cache if it exists. A new file called +duckdb_fun.R has been added to manage all the functions +related to duckdb.

+

A named list is returned from return_if_exists function +where it returns the final output (if it exists) from the master file +and subsetted lkup (Scenario 3 where we have a part of data +in master file). The partial (or full) final output is again returned as +a named list from subset_lkup function which is used at the +end to combine the two outputs. If lkup is non-empty then +after all the calculation is done we use the function +update_master_file to append the master file with new +data.

+
+
+

Speed comparison +

+
+microbenchmark::microbenchmark(
+  duckdb_caching = pip(country = c("AGO", "USA"), year = 2000, lkup = lkup)
+)
+
+#Unit: milliseconds
+#          expr     min       lq     mean   median       uq      max neval
+#duckdb_caching 102.745 107.2221 112.5106 109.2035 114.8544 146.8841   100
+
+microbenchmark::microbenchmark(
+     pip_DEV = pip(country = c("AGO", "USA"), year = 2000, lkup = lkup)
+)
+
+#Unit: milliseconds
+#    expr      min       lq     mean   median       uq      max neval
+#pip_DEV 51.01007 53.67546 62.35531 56.08937 60.12046 354.7717   100
+
+microbenchmark::microbenchmark(
+  duckdb_caching = pip(country = "all", year = "all", lkup = lkup)
+)
+
+#Unit: milliseconds
+#           expr      min       lq     mean   median       uq      max neval
+# duckdb_caching 110.0725 115.0695 129.6408 118.4356 122.2621 494.3855   100
+
+microbenchmark::microbenchmark(
+  pip_DEV = pip(country = "all", year = "all", lkup = lkup)
+)
+
+#Unit: seconds
+#    expr      min       lq     mean   median       uq      max neval
+ #pip_DEV 14.42378 14.78717 14.98249 14.96088 15.11043 17.44418   100
+
+country_list <- c("AGO", "ARG", "AUT", "BEL", "BGD", "BLR", "BOL", "CAN", "CHE", 
+  "CHL", "COL", "CRI", "DEU", "DNK", "DOM", "ECU", "ESP", "EST", 
+  "FIN", "FRA", "FSM", "GBR", "GEO", "GRC", "GTM", "HRV", "HUN", 
+  "IDN", "IDN", "IDN", "IRL", "ITA", "KGZ", "LTU", "LUX", "MAR", 
+  "MDA", "MEX", "MKD", "MRT", "NOR", "PAN", "PER", "PHL", "PHL", 
+  "POL", "ROU", "RUS", "RWA", "SLV", "STP", "SWE", "SWZ", "THA", 
+  "TON", "TUN", "TWN", "TZA", "URY", "USA", "UZB", "ZAF")
+
+tictoc::tic()
+
+for(i in seq_along(country_list)) {
+  out <- pip(country = country_list[seq_len(i)], year = 2000, lkup = lkup)
+}
+
+tictoc::toc()
+
+## For Duckdb
+#16.69 sec elapsed
+
+## For DEV version
+#9.26 sec elapsed
+
+tictoc::tic()
+
+for(i in seq_along(country_list)) {
+  out <- pip(country = country_list[seq_len(i)], year = "all", lkup = lkup)
+}
+
+tictoc::toc()
+## DEV
+#178.97 sec elapsed
+
+## Duckdb caching
+#6.96 sec elapsed
+
+
+ + + +
+ + + +
+ +
+

+

Site built with pkgdown 2.0.7.

+
+ +
+
+ + + + + + + + diff --git a/docs/articles/images/clipboard-2235948010.png b/docs/articles/images/clipboard-2235948010.png new file mode 100644 index 0000000000000000000000000000000000000000..47f4e915a0d467db111a5d6962c6940bda3b0b2c GIT binary patch literal 41108 zcmd43cQ{;K_XeybkqFU3g6KljNc2uHI->_eLiFA{L5M!lMK^i`(MBf`y@t_8N%S^) z9ql{vyzlcq<@ewB{qbFME_1Hgb7rr-*4k^|`(A4cQ&R!q-=VmJfq{W9FDI>zfq~tQ zfq``w_a^YkG>jk@1A`VrURqMq19P*<(}4!$wsN~h^r#+X*I;{G%hX`rwD7V4HHmMp zwAry~Eadh%EGj+fHt68IQnaC5n4sG&XOshMt_UMor)83yrwfF!#1P8_GRlErZ@Lrj zepGkwwYAhA871AurpF}2ki=qS!en$@KsA2uK{xgF)ccu_-kUROyXR`!?6V#BGWcBE zY_u$R2NU@2A6Gycp@8$Ul8Q(Gr689LFPHhz$wa<8+8S<%0yDOgXhRm?Gk<&4yn zYr}6Xw0i4wREk4ht;A=-h5*ZPV^ATfP;~!FdsxS{P zx9BbJEm5nhvo9=S?v5#Zm;SFM`&vD^qwiT*nT_H1kO#?QaVoqCIRb?$azQFsj&Mwc z6L`=KBeTO&9CKdLN}^&)gTB4hb9m1elcL31Tl$P&BAq!Ge4Wj{4dl~(P$2cUog``{ zzx(6n|NSB}rnLB#wfjHS{!hojau_fVx-lW+n}2@x-y1a?+VCO5KZo__4H-LT===@+ zLtLu=4EWC$2@Kc++yAQJf1kekM*Nn=@QW7otK`2Lfi7DRS0vw5g(;h>o7?bmf9l$#a;n8b<>2u1L7QBBRdMAFew(ZT4*k`gUkvo_`=wT; zjcvF7rI;*f+DexEA#zK<15*ld5ArDA#!yb`+AvO<`h>^OSxJPb`%qt)w`HmOlm1Mv zT1|y;UNRf^8P_D@jwi9gabIY~L`9UGot@W(h513iA5i6adTP$@uH#g>$*seqqhYu$ z#AS8obwNpOdUZWFp^h0-=cjM**u5C?f6B6_kaZV)jx|)L6DsB#^w>+=Bd7hAeRkOQ znxTGe ziHT(5b_NCpg$MSN^$7zRqN&~EdNU8YmZi}nqpwn%d2(_7>rQ{nYL+AkMMXT*(?oq0 z>OIFa8o!g*Ft)T1#Mrt<2#a5ag-ZcfoEyiCjk}mqoirjt%2#4FkJI55B zgbwWxmRWh8in!HHKG=R4Vq$hjyx4Sq(s;r_i^zJJCheA%VVYF3R> zKxxWi)%|UT(9Yph;LfmH?o+s?twsrJ_j^=C%*U*R%DNGz&q%p!X`#=}$A?auDOdNt zrCRQF+4A_FtE7mQzgtFAcP*Oh{US4uH~PfO!}CTdbu6b%oz!Uk^h3wog|#vGd}5to zeULr;S{6>YLUf*KFGHJ1N-q|CiY9d4iX>R{zhM}#Uw|3o2bonf`BZal&fSeEo#F>Q zgzfC14WS|YBvH*unZ5_U{&x|~rF25RdhJ)d3mgxQcs`c{Gk-1FA+*(=woAbuSi@FqW5z!P-Xq>FmF$xJXy4*_& zS{vmgvQ=#{OVuOYo~l;7Z7&uuNq~%_`8e)skl!x!SIgn@PzJC|@heU>I>*xMmX3el zekQ%^W*_h2!7$Xti9YylJv#UtGVRZxOx2u_AMG`GfADZNsr!njm7|;XYIFE!7IS!0 z%x$@6r!waRTnd2*<4mTb7rXFZvViwpVu{spVtMl}D;H+0iqbDEz) zQVw}+Ue-<_@*(i?G9wz?mmDr-&FFR7ClPv9^l{C+4x2rgmk~MO+>*5Z3(LsUTeKIr=?NI zwMV-CqUOiQ5YjS#g)>Fi(?0*fFm4TF(7A9Mc2JPZ?o85}(;#R)3K2jnc0}ikPSP1R z_jhH2h@dec8OfS3kJFw6)6n^;=?5Qn`};{`d`;_lHY_I+*(3xr$>5GT{dy9WJEJsY zeRC%r{3D8u0$gRiF(&End%ylYw2@)`p>g^zY|$)Y2>G_0wt{hz^fegXc1_Wf5p>;r zQBKWJ9}rfy7KvfyO+}LhM0WBWd?FV-_;>FP*R?cCMcv%*dfKA9L5w$FT$Vd{HLp;= zh-b_ZP6KHUL{~(Uj<0q|sQWBYtA6K;U`fj2*cePd@eSG5Tag`KwW6Lb$OYN~q`7=B>ZR!2q+z z|Cvx`oJ5=dnNXBrA!8)?rA$IiF$1X6Xs=R_&4rD%;eb?&y{>x}=o90^o9kW?^95sN z1sd4c>&!_WE+?)2ms}coS7kO?pHd`R5KG{ z$u)rg5w&|U>WIuPAg2Fw~9{6^5Ygx7N7MyNyBb6A!1=Fn=laDJuER}H>c%~Yq<$=NlIU=kC=X)&| zBj7OUayBl!wTA8HhR5PW?MuI~|7TXfzMlV)tVUomlLyAHoT6aT%NhPYV^22h*4Xgr zJ^QR7_bY`+(mA;}kX_!|&9HgqG0`GFXOC7I{IQ%?r*30ie^2y5&^=M5rlyAls@E>2 zX{t>hI%%c{?z21$<;;AAW|JxN$w)rl$kd(pL4R-6T^Y4c9dlcd=3YtioDA{dPJ%<3 z{vLhUFlV4D4eZZuMaRh44}meh1+l0I|T_kMK8F;|km45GxIOqWY~;k?8o z_UDBD^M(m)DPlf0D!4&=i4q%+oRR)-bL)D&>L7f}Vt3a){?6b0=K2RdU}F)s3l`$k z{v}fU=ZAkh0GgtHMXUc)H5fQf0J0Uhm4HG1PxWKqP`to^)pAdRS^wq?*O)?r09S%^ zC5!kc*1wxffF}MBVTBuijS9c-yD$NCO{Ixt@Sk1KU%p^*7n3QxtajhGmA@~GTPyJY z5OiGz5Og_2qKOrA>F>DaS_YmF3~6p@M-x5je{~!{1*j~9WkB-zJG?&?{a3{Rs3+`=_F4$8k6f8|4-j7I^$TV}<;ELYte; zwa`J^wlFR~?-KN$(Ds4}fCHVV&#=LF!8SFwZG!$9j;{4tdOI>IYH0Oz|KRAGhRbLy z;jDLZs))V6xB?b&G3xF-$G0#91vzJuXu05G6L6z0Z1$MeA&D~S^V8**M* z`^bNU7%!ca6Zc1L95{VKE?XF{W|UQo1IF{^1ZVI$@txWbJQ$MoT{Hn8ZL=d=dGX1d*cqaw>)D2<%}0C z9bJsH!rXU>&{27vmNO0Hs|yGcm(@N-Umvg7=Hs3(#67J2s533@<&!w@iMiU{cWS#=?tOwYail_aWZ7^T)m#{@Cp`I}?pf>{2_;O_9wH9W3{b-tPR> zV3`%5b)Ht+$gK<&rR~{Ft6Lc=Ny~65*gX5a-r*BF^Ej)g)286djbe z@}8Uq+?oW6XsKO%a4~h!xyofEqWpePW$3DTjr3xECWB>JzX0v z`!<7OsPyse^>x*nF(w^&B(cY@3--5xV1#vjA&>Wj>Y1D68!de5iVu6Bm;K+`{MPGb zkj)qG`tj!H6PQYy zfm7@KvgoqR`ou8Go3fkQVso#!Jv?0PBCdwvlZNyr#?ry(~AY`&3S9u4; z-$J2g=3BC+QX&&UslL_;rj!K{^n4A?u~F$#i>V&&d}hNw?!~}&(JFL0Ue)mdBZsHS zJ3rLrLPzMct2ZRbADKcC#*oi7cUue;qe**RMXe0xhKvzNry8H^{LEC|L@&~DQV~P> zMpZn?GmmMwZ)AG`XTkIXt0dK#yB`9L?wfYLvRCw9LZ9i2Hh^(LpyZ zs1l<+jq@7bTd$Dw-JYx0Uie3K@LyMZiG9K!ttWX&hIeEmA!ik`RLtT2OS2{;N8{fQ zDzoM5o4ntZdKA9bWw|AeXv3;6hfOe9<(*WI+lrK)k7n|f%=y?R8P38vURB+%%N(!9 zh!*0!5!Khbw!~MqQH1b%vTt>Es^>MY`~1W1#4gbdDWOn}PV*InC-1GMk+uDHT7_vZ z!=8!xbzQtShR;-0*pfUW9$x-6-t#=i6lFc%>*YfH`u-k>ig6kI(I&+FzA66UmKPbS zYi0MMhs2F8!_XfezaS*HMpuz`Rg*O9VKgK<-jBIVCVl=aX_{-?m4d^}11UybV@rcL z+G~D8RFHWOZbFFcu=vz3n~T@#iozpUwhO)?uIf0}8yobDpw3Vsd)&k7u%c!z-}I8l zidU*{FlWPZREX{$o_q^S?}7bXbNY#X7_FImj$u{%{ISlKsKV9CktF))h;ANi@`7-m z#c{~6%2Y(dpwm38TB20tER^`^vfp8(#Z%X@L#1-68J_(q*rdm7m5`+7To1DM^NPNy zl2dvF88VUONH&U>GDgM20GE2oi1)(tqZgTOvwq*JuhJ)ZHemXs$#Cufjg3f(ehfTS3e&z+dnWcJh3E?c0X>E zYjHqrj5wY7S-tcTAZ$60ExC7-c_e9E5q9!L;rC+oO8>lQAp%bE{DvMBomlCjg>@A0 zkLC0?9Eey4I3#rE3+w}=e|b#v&P>bkoHXk8!klj{sOe3FQC0g!28-C|VIX>b2VgJR zDq%Y}qJmPR|KCVJf;RkXo??>4!L+B>a@55^wKd=HpwXGoZq;fc^4>5x8BIRdoWGK< zXZrCjx5{@*!F$)i=z6jLjKuo8djQx>4HwJA#DvG|Ql!auhZIu%hWuhZ(>~v*7M|Md zu+T8uwF|8Fe`1CKHgUps!uAT$SC9S)Q0_?noNte?qBuBNl3zdHr9&U>>8#yrt_f>C zK~=B!ZZtxnjuaWs`aL)A0obT~=J6N&e}cwf+OLB*HptD|1^X-yp0~LPT?2sz^O{EJ*@deoa6Ef>Q{oO>aWCF_iTf7I<@QOqYcH%L} z{Eq8E7XJVJVz1)Mk}fZaFaMX^z>nbpT=cF~p=lfLKL=$Gv9|J3QTZX;s};fCwUaTub9Y-clMVLNsE-f z>HOApz~1eVdKz%_h0|WVDuy6{7AWLtdrt|#(_JY&o|7*r)TyOjON#JB+jBh7jNm|i z`-9|o^_sBIRUAfjwzT;2L3J>TWY#I}8^Jlc@YJ%)Vlr@Ih-S4E+fz-BU)p%!moBNJ z#&}=@NoG7QntDEX|#w%otD@!XrE)pNVRU5u5WC zjoNOA;e1pZ{5X1AHm;Klalm~1qp$nSvef=z|Gm=aDo6zOD6yRk`IqXQ7CjrfaOh{m^EmgUND@Xb%kenM^-)zCMGPN;`;cHPMnuUifa{x-%$FU-qhI@#NG61(QX zlMKDW%*@SuyEBBKDF5}y`;Xac3>+MT${r_a6>Hx>B_W-72a0=>xDZ-85E&o8pV(L_ z5^uxtzPTXsiJhdCEEL6a=Zs_`7x+)$xdz}lLn*M2|0WqbI2m*W}tVkZ0j z%g>qYE8fb|yXWU3_yh!IhYhdSn3ydx6#D1(e`;V7uU%I-`K;CM)zP{dyGqMsr z)^+f6KHmtRQ?GW4J~`MX&sZB-Iub4HwaLK>SQ3W_HP>P^^S}UZ^O82Q?utd6t8oYG zk%))d*0ZsNP0HKtvST8HqQ_JmdQ~hZdu|M3?6?^|d-6gqt7^;hSHEa3>2@+r>W*i< zd$%apTiJF&$gjjWHdxc88D8s8^cp=JS9{MY?<@fN8I|ES`f(EYExjoq08@Bu+P0E* z3x&$7G#^|ZsQFAV8Ca|SnriZcYz70xq@;@*9+3K2+s}l67S8OldT-h1cQe!s9RM8V z5s>=W@TRrKy`KjqvE$6vS@iWJZWod23lxZY%306i#;ZowCL)y|T)`G)O#>yYqz!q> zxAr!o`-Qz$kYb&!))`^VJC^92N(ZRS5kqMFgzwLQTO+(5?0@c8tNEeN&PK&loPU1B z^kmV6R?;w5Xb&$B!3N2`iC?U#K6mfqAn%zPxqL&^E(GNWoRlRK=6rvh6{xi_nQLYG z%FxOlO5=L~MlZ&xT3A^raCoL3@9(l_R;>slKYwCL4D?tTNPjc8+e<7hiinPkBsF}X zDEaxSZb$q<(?D+oIhR>0#pH*0+AVsDStTCyGvA}3Gt{DC?;p zcGIy4g{$q_V@VUVOPcsIF62*JPBu;`Dk7ppDNg$VD9kv+s`ruG<+^}vb`fILYaHq5 zd34S2lO2+pJ|76-1SMH>dA{j`TNi{)m1?e9%vGFZK2{a2mErz%0XUh)ZF13KI!i|W zqOHm2ld~s2rvFI?q~B}I92A8!i$5#ds8xzMoF@rg2Sq}mi0}8rKVNpX><#S4~1j+lFz(^c#W`k(Xgs;iF^BT_fmmN z8%}GsTYMvTBRN{o@*&`HaPOS9@04M(ZbJn$hQ)pvYx9?smh(F$Jgy_&r5>fn&AIgS zQ0psKEN}hoB*i2LMBs<^ClLY}Vtp`Oyce^b8|d2EWrFnCm7e|8ltTs0j!C78*Gau_ z10xmYU=h_|Gvu=nMt=iDLEN64k%7&Uqv3YLQ<-};;;^Y%Yx~Q#hl*(|%gMFh`)Fpq znPC=)>-BY`az!@A%kBv+f%)QWCYILs4r46{$)EqX767U!et21>*o=qnVxX?cJYQtC ztSoXdu+zRmEEe6W$(8ulu3Tuq$!>tMM3fT8u4fE_|C{6ZnU)}xgiL9C-R8OErk82p zXKdt)>$CIiRyo3^4*Ja!CI-F;rMXxIt^2nF*4NUezCBy-T8xOy@ICoXbNSRoq{ojX zdm@JKUS|ixXL*KfC#$_j?$eU$()&W&2t-+)#3(HG)KSohM`voWaWhEeYPXYyExY#& zwI`kYX?FO8W+ex%6DI!5KCPvmot>Ny(@#38o$IPBj;8rZ-`S~0qUzLsx~;YhDLI7D zy0)6QgWc!l$ZjwMnxSas(>SdfznVb*j8$3vV+?*+Cet!_9-&H-J|j;qc2b%mDI-`yFot6fQ03VeIen zc-lx>L=5ooZ93VeHuiH}p~?MEqQ2^+cbFQ{Eaah}O&o~z!&)9rxDX9jF}J!6p5m5` z|HX#UJ!uY_ao9&BhCTCAIr5Hst7(Bq6n=w&C4vlhPZw|o*~q=sm&nD0`n5qTWDy01{2LA!2Y-?37_=)_c3?szn45 zc}8>uI^qy0*&rE({xruv^%qu!DxnV!4~weUSx~C+zQXC#1F;}HA;WsHMx#3rvNHAQ zJ-HgPZoQH9wF$3Od+9YEgPX{BzLSaFg-&cRdoU9+c1e-O)(F^FQ?HohT{7;*_yMRq z`7QO6t+h4ow>w?j5yly^=}U1-K4rS3ijD>CiixI)Uwho2I`omVe#b90MXDr|EknqQ zvY)CYZdrwsj^au{8Jx`nzdCYbc@YW3$D|u+96Yv;R`v~hJ7VzO92Ybj$Qd!=koUS_ zuNNyrru@RphBI})x61KS`u1$C$go=)iwaxlq~Ww&`KFTd#ZYqIg9zFqPJ^BcB15 z&g{f|{5czCdUu-7t)4qhf3n5>2}-B4TCG4<+qD zw-YDaF+PGCk#^AQ+%cXl+mP(|O3tE^dSr`?xe^HB_snT~p-PPJK74wtOX{yGs$B>cd&!=6!*GT@< zfZnTIk?Q%x#QU|i@>)GIcQoB^=hYPTI!k9fZ=`l*kjo@j|Jn0wVNdon=pu^w>&4Bp z8O9^A*=V%%A;SS>0qRLCr*K-x)z!Xp`+vfJQr!Ff%C5yHz0>Y-NH41@0|js&Ooti~ zs1afCkx!+oez=mwjIXgu+!4N61s3a=l-qm1-&4CJMDzU?fI>c-YL^+7v~l6RI@1rW zQB{21H~4axFQsgD0EwE&IOOKMJz~3&RYDG$i^vR0U83E0J zEtA-S^9^eXyf!X|{V4oxz-f=6oDoT!aI^=nUqNzk3db{)N)^X)?4HAW1J#)k+xMWa zZ{CY{-&|4DD1~R5zKohnFG;+LbuFpQGsWh#Q%`Ujf+Zl8MwpAX^17;REzZC&0nRd8 zJme|;tB6-mVi$0@K}4g_ABX}^Li)AumbI$iJvaXd44ll+0WKMfv{#DUQQ}8YZ+Sd(X*l6~9#gLH1+)Vw z-DLQYm8f1fy|%x>JP#*y=JtxM-|Y_^;R`knlNAQY{G$?ZFU+BspT@zFWPBw-0PGlh!&Ho5H$g3$XM^e=aq!c@fURBe}uCI!G$=Ff{+#?}tI;J6IB%6sL+ z{_}e`30~d62_3P~5tZsw^ggm?`T#4HWxX{B$KkSF_^eR~cEHUe_+1BQ|c&!OIqEE8b+AL}xH* z77HvPz@jr^-!ite=F>Q08FwDT=hB=jGR4QWK^K1N$-VV?L!b_uL%h*8Vlh`F%@ApV zM6VdvJ|Ccge|+;Iu~;S${v=8Lu3U?CUB>8D6}L{RKF7mao1ln@@ETR7n$<(B-<`$p($&sZfs1BNCXgQ1V6p#NjD^moYo2R-VnRa*^G^X zqbq1S*=(uFT#tlxbw_%VUX}lhN3u&dG&GDVmfL{4kms?V1;BlNNIt<#pCK|lr;GC5 zA7_W^5gw5I*+D*o*4d*Ngw8xPU1x8o`BmMQ+Y=0JrTdMNEiHdB8S%wojOO$+C!WTIvx1PyEVE4M0JH6Sjj}Vfux~7jnYU2p0XTe7 zfTs8tG&0!tgHM$&JNaYGOVqAj6rnW?7`KX%E4WYg<%4_o?pgagwJ7bw-cR3pEvTyj z_WAW*xcq_edJ?(y4;8eF{}YA#BR-s02SWSGf)pl;6y6OQikO4j=T-hc_BIb76 z5ySac59!2A^5vmkfRG%1aZgnG9NBa}?KoWMLUG2|;q+b#8o~xx~5-Fd|NEhLtNtWJnfpT5$-D`5!5+Pf}T9SXI}&P|=;c z4}hx5LgC-!j^j*mfxrzjOF7TbPkOB-lQ--HceNroq#daTNKby}n-A&KHvFUok9xZ` zX!KZa09p4boQkwdeSj#s`57yG|dWR?Ps)eMy7H;3{L+YAtRUFkR1Ww6roMBQ=v!_E!tD$GA$ z0gdp0&ljH>J1|=uqO4yMl|ewNd+ie`=D(@rTqX>Ffg>IM zcMH!WAfKIItd4{8F9z~MB2Xx+r!IHvUoAge07^8qRDp=_pET-Q2|XZ7Zz)gr=-({} zA|UHu&nhGFSE2v7x4ixTVo)3`SR^DQc&vsvc-N9vKYq%?qq#x<${&+hqCxL%g5moQ zL#68&f-i^;3-5)_-@PB0i}xPjU0aK+rGW|rH5#nL)sYeA*!85nlLcreg|ZNJYF+m| z7=+jJ20x2PRq@RLC<~nW5ED#=Nz(xATSDi9Ay=W)fT__q>qA9F&EtpVCje$2I&MA| zHGkch30+ut{MK!0Ij|{#NTT6Wq*1l=c#artcrnEK%Q)$^?qC!|I6%rg8h0c|BQD3` zdBZ$$2Vq?TW7_*~+7*kptg{_aLw3??Ru179l zP~WAH|Mc>vuyfh6UyU=r=;g zeCGj<1W=#}*r%Hx(?bUjP-%76om2~*Q!DJ>lK6}YHg?|*@Cuz5rQCvvH+bQrC|#5r7VxZ&)2nY?Q$!>xQ|je|GqC``U@)^EDQlj~>Ca6c|&nxQW(4!s+AVZ9e!4 zKcgW6&(Fzb2pt);a(fz6bnVO{4*NgkLoA{3azrqh?apf!t{Y|b&W)pvjpuFo%BVx; z06Yq{i4(iQ(4Z`QOS)Vc2T3a-eMsES--u3J-<-^$mD$eCBDxL^Z+1&>P6p`npJW)BUlKc&4& zfqApK?~iiQV__0n-~{lbr~If^6|yp7i`qyP+~`P!vy^@`c6r+CS|&6;wqrs3-t?q) zkz>k4ly4ycrA*21%Q+cm$;0~G_M|>TvlOloZwd%$0xpk$zp{50Y9G9!DL$o8D!FQr zq(0{wO-?obI9=NKko{BHeWG$Z@%1nDeqq+Jc3dz~3e@MDl~VN6>}^|%=t7aZq4PZt zTct>d3jrvY&T!UhVY5v-Nc|q9G$QWih>6en`J4E*1NAA?NGp5hmrbjej2vLND25OD z8abtk^Hb&?)=_-CHHeAa#t&1IVp8H&Ax#ii{e`h!57aG}Y4xekR zvn-Gg=Tfe_xsPR&YpEl2_kPPODmw;4h-G%puu(sM`r&ibuX-ip(%y~=U7X=6HmzgX zv!^CigQ}h*cKk#x$wMiv{E>pAL=3ELBhr7!at)$GBWu5JqM2czxa%LVuo=UWOc zg;H0m-Hx{qUEezumpqsI2i`&$;n(#US;SbDLcF`AD~O5<`%BPL6| zN9*8Ifn$%}L$nKDq_c!W+?HHtT&HQuqtE;q2v;0=l^cfrkdZHJTaBqy$~M$%7|1oc z4U!^Fhj4T4bUyWaFnrBRWj3tJcGi8N=IKH?pKbx7P#$^a>MQ+FuABxYaD$QYr3cPB zXL!jI{nF^{se%IJj=IZHE4g9k4Q;mvo{%Zx4K%@NYq6{JT_9d7UQ1ePZS+&4kik+M z%h-_oMb2AOgX5kCcuG!P#?^d^&26qnnUA~+Z9rlS%IZGR=LT>{=}`eoFAqwgv$b*4 z$nC>jTe7`m_-ou`#^pREUA6}S>?nocd}Ltlw?#YgS!I$J5*v*$uKY8L)i_UlW@q2{ zp;_AS-nSv_{i<0+u!%F!hBD&W6}PiFYjY)e^G-NHaw%u?suf)c=hxEcRF%lzd1B2Z zi4>J6PgG@3()d$VS}CA9$?Kf1GDA>&vy^A6;J-#iscH5&1Xg0WrU_3by7I+7pOsW_rA8m6E^b1p_o z=F-B$jPyySqo7>gV<5}PPreEHi9^85MZkx)`nT=Q07+12P z9zLH%HHrBk8$!8r=0!n%E$S!}FufK?+tDcnWnr7z;Ty1V%!F})iw^TTuFHCzYU>T+ z(~8G)_k81G7r^PJjx)z)Lmi}w_bU%(Dj5(gMCQ1{&&6CtvZvR_yig^1mO4{-_gid0 zmzY|EievGY+u;`?QM|Pe7Y5b3S`Ct-Ov`Z@;cQ|wCGgx*qxo|U6sw1PDs!?aGDcbx zN$MHa=d6zJ!W_4R1)Acw7 zXR1ec`cK{T>_3(I*?q7N{3Mc?*}*Fq%wl$LVL_2oXP=|z888dWS@;6WXKCuY=V^A< z=g#ZC{|s^cAQ)CB7dqNXC}*dW(S<+o+O_Ffnbscqb!L*J*bp^}r{mP-m3~@P1POrh(5$j=0c*KFz6Jy7FQgm=Y1KR%oEq^bpm;!~=KX0}a#njC ze5XeFh>~m2!Ed=Bsha@c!_r6^r|ZJPx5`=6=@bo0HvHlN3R7B7AzDhW6WVCQnWBQL z4dom3baU59Y=5I8G1oBlJF)=seRcn)I}pj7RDmZ`reC}b(qew=rsbOZR8IPO(ZBhZ zMIV44X_`3GKq45ZhXn@5gU^l=n*zWOD^0oN0O0+muB#kSnE}Z2LIxH(4+zF-nEuTK zP=*glA*kT-9L`S&$2A%XWVL}omWuI5a>VX1g&T=E1^AuDK=-v2ck-VA7>eJtnw|xC z>LxMpRDQs(?A4n9AhaSpdCWzIVl}#N07ZADvown+R$ZM_HJx(4;g z2WxHYzOGA#=;82-(@#tui1=&dSfNKm3ZOFVYgC5#x})Y21jxaQ>k~&F0id!w0ESys zAq8oDQ}&-z_5Wg;?B_e0UC0=A<}IXGl$R&tgDe#0PWqVD5Vc>>J^tcR3CBocHAba? z4R0s_7K6U#=w5@k-v|@Y`r=g#&W}?1p`Q{rfOponIzgWg3!w&K^F@NN*>XXzvjZNK zz%!NJxkq`K1O~Y^Ug8C1B5ui-C_9*uzUP;_BN-C5mZ&W182yOFDO{aUSaMMaSnx<(tmugkV``@b8l-$&IxsnY7ePQ=Bo zZeJiep~E`kRC#lo0#YsI=%>C*&Y|9uAFyAI9nr(T?TzHH>WJG@#i{d* zj9foleow7Dpz)JFXlatS>fyOgVqV7Sgk7H4hh8F+!u+0)`k%t%60Akg95mEh3EF2ssfNzRIFn26lm_=Lc&w|l=;TH!5WrMPJ z=F4nPL>TET6E0znMbD(0fYHwyzQj^#+sGImhf|)eVOu#DkQ~Lv zyp~MZ)Z@_26<+_EuzGII!g=c*i`{^ij3y8${&&41=?Z3nr%yGyzw>kNr$g(MDTT;} zw(LpMT&z8(=~POBf6gOqpN(IQgb9qnnZ@+=Pou_-NynX3?WQ!cu)$P1D)Zc@aMW;p z+I&Bvxjl;qs$a}+{{C3eKGhp2RjAt_ANt!a`0e8#4qy>(MVFmcswN-f*hK%GhZl<@ zOjE)>G0i7koph3Y8u^~@1dz;jj0qgw3)RH!M;0ZI0#;to1}5km=L&#NWJ9-VeTHxF zKHEGgDk{8qW*=Aj0tjjC^z47DCHjKPV?e5ZS=fK|dZzQBlgmJJknqZI-27k=4lOnh zEt{4(h`4vrm%!v^G<{&Y{}S?^1^$i+dB?CSPue8@Cx^1eQBSBs0W}Ms;d1-8LJ=>t zi-F)0H6GDE2wsvDx&~gLT~=#ep2h@EryYhD{y~QP=kM3YKh(gVL19LMg7R!)Cy^l? z<`##l?^iQRh2{Ov=W%>Jm*~)lsIygm>Zun-GeRmxz5%P`)P0=chZbMBiHm~^cHx%( zQw~p)CTPChM`YrI`xg3a$8fE8Vv7NqYc>A_{^=VT+^RuFL<2BEhlvPpeg9R{@)jSr zt=dCQa{*d5Ndz&KcGvHKW^Zqc28jhI-!*7Ga+| zD;jz36ddIBFC6Umid9tR%^u;{tkvmlGskZd8F0MwEicBgvh=Fp2(NQMl){;%^S85A z$Sv%C)iI~|&Uw5`1{EI4l_$(C)xgtz`l#19cX4Hds#%Wx=~*YZ&TWshLg&rOUbp?7 zl^rQ zhAs@1D3#t_&P>mq#UqKqL_MO&NxGL2B@&HItHuG)Bct@UuZw~HcYUzGjsd5#H@3-$ zuh|7nWawWj`CriiYpP*aHFSdEHjfnPbzvq@ha!%p;|C1XSgBs*|Kufo+m8GtK z2o$QCDW;5g{+vft^o0fjG3#!X^0=V!u)Lg`DI6p#8~=2+!j)}T=%2?%madmrD5Vpx zV1R6>amKR}>D?;e3}eV6n8#92LfafKR9pocJ4o4(68GNULcG;A>E&kgy6!6u1rV zUXqGM|`eX+T#jOS70-CUphp7LcB|#TY@?pwbe;Vm)oSl+f;h} zQz0pzza2&Ww-(?i^>GvncW1G=h5H%3URBqX+g8Mps}VDY)ts`qz>TsEkq9#InO*gG zS3{Y`#OP@x>7{Jsj0}OlF-p&`ZtoUnVHIl8K~!$OL^pEYct|iE&LJE<;o|1Jwi3ZI zzCc9YDYm{?uK>w->BXdP?p8yIdNTx5ajJfrqe9|?p9iT5b+MQ&SAW=uvPw;8d)G)y zDF4-UR)3t)-Mgw3pJ+uWsd(VY%+G2DzlT$9Cyl);o*3ZIY=nx0guhM2fe2y`zj%Te z3iJqn{8$9s2%I-{S=Zc?bDScm+?+@?hP@Y>%KeJUhm+}Bcn5yl`hoZPiFS1%Ss>_k zG~YUbleekjRs6|aoLc)AjB+IYjZr+iw+L>s0Vk>4^?lL;e*IhyTGR}IvUgzFF>RDq zdx){$L7=xPN?AYNC8CM>?6M%!u_Z_KFts5{ko`deY!DyD$QMG_DNEeIgW^}F01XKu zm)zR|2@;z)W)7p*a?gYmTZ6htAm|4x>gg3(CP`yk52t3f$^LGZ4nIiRfwJb~Kn`BpsbnEh`6OR(qwy%Kkn6f8{h;O} zGhV&+*2nBZkQ>QTU&(Juh1TyeqV}cVP*zvB;ZZZG1y)d99=WI$e~F>;ufRvT3;Uf3 z*n98BFE+9%Pm4VtQ17WqoA*6vd;PZj&1`vMx@!D3!Ge{Jp7xqCG@1kA*V{Yc%pgqJ z>t%s3OPcm;idpZmlM9|MydoI=7&|{k{;G6HL`UyeIkA}}h=V=0DLVDV+s6}H=?xOP zre8MB%$B(XH(e=&ixBHy6V*Mevj_6hzm}t_P~nK`!PlyI!f}pIvV{g<<5~cF0x<`F z_Q78-S!64FA~vPd{&k46kG&&t`SoVxL{}_dD~?-`1S{6S{%nj!I_0yM&`Iyo!<*5G@OR}N}Tmi3I`1yX~=`z+;T?Y%mseT?A= zv_CYm7(lp>(tUJnA2K)&(My~0_S6)hSm}yoJS$iTa3nscCcf#&k_iSkM~VmITS7`G zcfCOg$wWR4!g~Efs#+=sJoXKDQ8ne_I|(L{E4leTy#jf3@2`9&Ugd7Euppjhnc+!? zStV`f+alau*?W>X`F0rSjzT9`=vvk?t@Y|AOXKwelu?XaB`&PWr-!94DK1|graX>1 zQ1i4Dbt1GX>jT%Unf$oDGu=?NOhPM$t*+f6-+qePK6GnRu$)4T016u zvwIa}GIDx2@Ri_ZLa3aOBtkGrkGFhyr{adN!N zv;0F6p>hbHc7;@ML3)3uQuqd`SxX|w?zl{aP33S#-DOS=)CmWNRb%?>$o?;d4(!; z9=VSSFk+7s2EAoFGFwBUzJ(fzYp@1~laU5Gs)ABIw5RsDUp?S_tM?{ty*839Anr&$ zo-5)9y~5VMbW%=q#U8jEPgu|9{Ym`rgFq-!t!w+ByulcOC>5=I?G{t6C;^{XGI=tg zski;9gek4Xa=1%P6RqZpyM<00AiSQ7_-alXz z2svD>@F3%>o>3Q>CgN$&+I;sozU-t5vE@5^(Dos8xHG7;$i$|=46+9)dC0AJmgiDm zSuE5#hL5q3e7ogM)|B;tZogEOQKL@8NEHHPYKUMJU$;|nt*Lntxe2$XY~#aaB>u2s z+REE*XOE^Jc8zk3FS)^0W4k}4t?(Iv!G2t>HGi&&t86%97-l`evx>WS@?D8pJwFb&S8;ihf;1DN4 z*?0>go2g^$gx!KKL_deH1_?iNX8Lm5IVXikEqSG-Ifh7=HYnN7E|wjm74Lh?fAI2d zsz~1oIbWp8kQ`YzHT81tNP%}% z=KPSRGx(b`{Azsnlvr}N^I4)GN1V>sD80{~>4KBx>W?@!PI*kK_#A?S{Vy$2ncAk; znkw%8GQn;B>hNVgjEOrIrL_HJLmzjkM#inJu+~9Bw3r!`0^xJ@s*JO8kJhLd>p0KT z7OVt z-5?8)V36L`*cdP7m#37{y*MAv^(rUBdwtR5;wcq2|DMK^r<#Q`j2wc!6&)m!r)lvXlbmm)^}VJd3aLD&N&zK5Xb9rKuy)m z4;hNfcRkSPnRXG86ze7PGww#L%pR&I%jdJocZeE%ok8TrmHRjBZO)WG%94XqWT`j7 zY>$Pv&rP$bUxLhzSZ%`yQr887gKJnIiw(V$%;(80=_ZSx zQ!D{g5nEF(_^-+Cvp^{crBZQ4^O$8g?hPIht&+DQimF1}}wY!+! zGl{1>ly5iSOZ;%oT52YAQ8lkop)XbaQ(90wd#S!$sCc1g@cVdY!!p?gon-lv&*3T% zVfSDst~k!_o-rQ3%(k^_>54H+Cb@6JM+j0=@WU4G+9=^i*2%mZd*80bQV!icJiLeH z-1G4;^j(;NS3|q8Pkd6z)e&u3Kl{%nt6vXorBk3Z%uKjHNd})M%F;h9qqmHFl)+w} zy=+4mEd1tPlMjKlX|ZO02-!&cmI2u}2CPpEk7k@XM?U6fxjIY>a_EH3-YK>oz}b2p zLz|PI`NQPC>J>(~v9Hj4ekFhRtZPmh%ALPE#ZJu#H4X~zqi*MCq~u96->^|m#XUXMNl)4 z2{jFAyFq`1M@&%kt*D#4O1K--ySZg{qY`8sXc3^urZvT2s<&d4{q=R;YWqy`HHSZ( zE~~v}FF<#h)4U`zy{rBMQxku`!`oWz(A_qAw)10_H(8249eh5Vv{|N*#?$*mV>7l* zZ(b?9*oO>fPAmcJh(fLAM5Sb59g`S-l!*_cR5Ki-ae?{xZ_Y_Xq2 z1Rk)^p<#1Uh|?wa3$K^ifxZcoe2{=9VP93 z+2^gcLvpM%s2}f)$i;K)v)D8&Mm2v*as^dASWo8c+4){>vNS1Whx(>qx7(S;H|40w;>v;A}Lfi zCQVXRRaM>lC&mMFt*-H|j*J6csE|3y7H<_9y}+VjD$fo~F3|y}uE>Jt0fANvbDh@v zKkc4PL@Y~a`w0IsMTQiH(s^)KQ?~^>!$R|z)07w+n3>Pk<%>}*7nUo?$2yE{-0|;!E{CnfER-x{yl=8?C73oy#AWlm79y1p z-;Kg5XO@)99E0pCST#|pYkm7{$-brEBKMU_-_m7Icei6qAxKSujRfiiX zvV9yH-#4u~{#aHk>9x!7E7@_Yv!05xdu|uUKbY?xM7+u&%SB88zZUO?1y~r7q7N#W zrC{bV{PqQKWAgj!#=OKb4SJ4K%kyp0jH4b_kNsFD3RXs4l~Zby>gDRETMTr4wH0$5 z{BPA3U8kUtBl7$d^aQ2lQDpCm@ni=%Ch%+Vr4%G&OGBT9Sy4ys# zPoMU_@<5J8iOAe|Tr}-n?s9j;oO`-B!3A z_xLSA?Du*h&gOJS&g`9aey*Tf@cZ^g#cCO%h(a=jw6&B8=#^B+Vi(TD9UNlzb3rvr zZmH*b9%{U0>n<`hW^2yo-0~I?%GnHiuU<|UQa~Rta1?4TS#Eb*eO)L^i1_=1T~=$l ziyG?Dn#sd_+|Oi;)(2Dq6}RHPE&Uj->0lixpjo*od?o8UM{>4zS!nA4p8JbdQK~0p z#KFaOy<$ezwz4B>-EA>pRGD59&KJYlUAGJGfDrMb#+hT$Jk7K1_VVrk$F@`_^xmPv z^+ynKC7!Y#l6vbeT}U}ht5pA5@eiL396MVX9eb9V_D!m+HH_fWzAAP{83SY{onu?A zH)FVK*FE=zvKB`}Wl8z(L2{fAU<=EOYMh-&ez~;W@oGD=%`Juy2)}U<9Sh)H%m^PH zUtqthr?4L}Y=aG#wX`|X9Ln;s{t9))$g54L^>A4}|KTE_lcYYpOmbrEYE(?hAMtCi zRx|)EHelcR$|gHl<_^4XT(+hvc?ODkY_FFKUmS0I$(6;CZy0~$E`%akTIgD2AMLl~ zIyS#ee$%=Vf+Q#V267h*d(12s3*IdCLU?2jD99yTcaSf45NYc1nOOY(KO`sMf!wc& zNK{r<_DNi%06Y&)+{=B>r@9{o>8bnHGE^NhrfHV`bC0?Al>|x>)o3e8FYMQDbwg;I z#r-IM(Zp>6*KYSm8;>)n-X`Y3*p{Gv&({6g^BIz{8^6-FRNOGBz>Tt$CpTWP`=#6t zcBu!lm0bB)19D-`VZBqJ-Sd1p#1KmGFT-=?F3`dIOE8kIUq(q?QSv0Seg9pd9Vnv_ePeYo~hl7v;3P}T9AGH@|2+AVY}g9>n=uu zNQ9`z>pejsdYxZ!`bJmZbiBy(d^z>Rl-+weETnqE z|6Zx8v`AkncQcGILiLIgb4*r2@7Ue(7Fz{qrf#eL$lDRGUy+<_Yi|lj3soI7DLJzU zOz+e(nOr6=72(yq^3*k6COBLSAMx<@i$BZu#hmDwOI)rMlx|975GoJnaQmfWrg5N( z+@2+_meNx{P_@WH!1X5`xpb^Ku6wKb>oY&M&rRV0y!FF-9gyX!dXhvOP`1-fr)y2Dv0o!=tG{-hqDs_ z#E9zs0}=Pn0doVzM2G!&Nt|QKUf`wx@8e9(pu5RI?9I;;tSEReAAq z{EqlKx#4q*ocM?JmD6y2;*ac*)<==a_5SQNBsB{k^{wNVoj4JBuXEYq$<5IH!cL=Q zN{?ODf)9#zCBLu>XlGL5WGgoj)eosCBw^q*@y~m`Zx|klKwD3E1=J6r7#ek%WIX6W zx$tb?+-)4>gmc1N^^nvCCMSXW^3fg1TNCRVyEn#faH1 zHoMC{Pp14-=uj?`U*~e*;u4{en@c=|`*gc^? z7-}XyFQh;6V;ZQo_KwfF8AZ(+I9XK8Mv6_5F$^&(*`>=ZLIP;U`YT)jKbOg}{06cCObfNjaf&dN^FXsZPAS&N_M~%It8a()u%V&#}e4 zR-N|_#kKno&bj`C8{i}es4mUd*VM{N(cO4<<^lN2B(mDpmPbrXQP^SW!}z%Fa8XtO z9*tnw4VIh#RbJ>THbe{gd2nX9V1IN+a{y_n1ns5&kn3$p zC$%x)2WHR;n`v3(YD@!7nwQsi2*C~PKc9aYAOVxgWhGy_;Kf2<0^~6E>wA!E@b0|~ zeAn<0f-&{G_cgye9sy>n$ts=y@lczKjWr?*N`v$ra?{lyjC0cT?N zq^&WtoKQ!-w67WKWXuOaq~!sWM|H!62R75!v>%y{H2rjWTc?#via;dU3R!A7Q0C4r z&2)E=K(ia@IfC7Lo9rXXMDjN>wWJUI_>zX@sN4kd%g0P&zl{3v#qjcd!t@#@_|?At zNm;VcS~%2mtY_fafZ&IazV_p#$Q$r{?)$-)Bu-eui94bKJd(->2VajjrTuD?>xhWL z>yn)M3^L|>KTn08lo;qVA)v#8B)sG-`iDQhz$0w2FC_f=W};}<_4haE`j~MfwKdBD zbDzdprqY$5GZY_;65wk3Txr+s8y1w#8)X^mRY_$i_x;n>mV;TyFO`5! zCYorn*_A*NT3^M#b`|dnzXAn)UH?BN7l&w=b*pR) zhmycKcZ8; z!Vc!GQL3t&%jxGJiWj0u80-Zw^MVG&@}GPg!$T*dZM(dd#%7 z@0a%7*KKohqCpvcA5%fNa1g%Wm*H|PRW*v#Wq&#-ChVtxJA$ z4=NC4E3`iLiJIIEc^vcDDu>;N#+ErH{g_-5b{L&phk*}P;`pzPo&|;Ot^8W(h=}`M z*EDN-bzrEz5`HoC;pOc|GUfp{oFZtgK<A1jyNx{0b^#r8At*xUd@SVBbm^ z?>X-;nEkGs5cKDS{6^m5n@Y!><)Roc0JKyxUwTQVa`7$>>PH8 zN5i0ETiH~(Cbxd%mfXCZv6}tsP27veUtz639yRN=6yEl9HLLUx)4{pVDJ{Bo9p3cg z=ho;gEbn(^iih=|yUFR7ba&S--(zZv9xKoi75-r&t}2?bgHT}T=%?elB$6be5khJE zBRNrb@QLBz{ImnUi0Ycc%aQQO`pD@5tBGt|ImSNI4Y1|Lau2@bN>POY9sR}f@`Cft zPAq7JNA2ZJ22+pPf};g~g^U#OdjK;XOiyroOm$87PD~{Fb;gN}DOj>+^%A7(h`TCb z78aR&TvAMaeMb^TXMGbG3X`uy$^8|3WL9vrvY!)GiIm7=L&tCNfJH1AgpLdKL?E&J z35;EQbtWX4s@e0VnJhjJcE=D37`bu83cVV7Sa?ZUwtx72_4C*;jr9D*hN+a?W(Y=G zL%=9fw-;gyCN+TqaX6SBf95?uvBC7yZOzC zl1iXcP^;{dBVzGFk4l$tyWYmHm5EZQ+7OW#9Y_X&9=nGjFV+nUtf|N3yfs&$+8Exv zU`xV6^h-!zcJxcTONp`-PX5~qUz9oOV!N6KKC}Nc@Dq$rEqJ0&M@qH6omSBDxo7%X zgXWrqxQwNv2vt%YYiF6@kG%L+pza z&p6IZD>BZ$IG39%>oJNiGg4_wQxvf)U4D;FD77iRn6Dj#W&%=P)!@B7?R^!R)k}gQg`MG zyHzabQS>;=8S=AKSL=y|vkU~`eOfJmJi1GnKqcssk+}X2qh8%{$jJ3Z|J@sAh{cVv zOJQsi__|&STlqH=>}7`8K5l7QL6^Qb_K;AO2P&sLxUr*e5V0G->@P=9!+R$vLb06Y z*R}(xeX;}gh>#LXqMd%uA(=w6Ra$pKyN}^{tigc?#VvX68S5=Dst8jIRtI5hJRYk{ zNhPo)s7>%c)0?~UaMSIpC>>=Xh~?jXsZ*<}0(;W>m|oOu%=_`Y7mJ^i#uyE z&+ICE=lOjR0imNvb-z3cLa4M?lf&dXMZG-AfadWC-g$j_Lwln&Mjghzi{)P{$W_6_yI5Rp+&h3kZ#yqH=gbWO_Q85@KkQka`> zjT6K_&uG-N@R7{536ZIuHpxA3H_fp$nZO-i`fy7~M@$9>V}Dn7fX6l8E$2bY#1APg z-Qn(cPgX_V=Dt~w4!&nT%TAE-{?@ODl+1iEOZYWKOdd~%UiE7EFwKyE^$h!b)6f2Jr$+tQ zLRqW$z#U=rI~O1dvjbu-aZ)1oNyN(&&V@kkBQoT@>3NfP=Cec{L;-fXv*U?JtU?yD zy`@Vmq4NnGZ21ktxcXenjR&_#h+bzTKC-`kq1+qF0JY#Ia*)NlWX>W@LLX7H^utDz zolf)uWt6G-=tDgCmw7n6_WVS8t<(BZFiGEP5mtWb66i3t8T56zkOkpNUanX~&~u7$_r7YF&i6M>G% z3nCpcJ62Fp{8g&!N)jlc-L1X)I7WQ)1@%siFD1cykZl6?AD>0cUNilrpz4>28#9SI z$YS|*xxyVXklxZp54f3C*B|E>mX{T-D|FI|g|@GBrCE1Mc3iP8*!cm<AW4gisWSrD*r19lK&SSSwvhDUmPXZgxKR$)WY{lK%g!A3(S4{I7+3 z?$GBa=Jx8-9%LFC^;|bjA1LTEI%Ig{94Muaxb4G~UD8zZXE`&7_X&rhm=qc> zXGFh8jHoLO9)C9`wTbs^Yd4W@7Wu{DTXxSS-q`!Gb0-{zQ zCgZPxzi8d<2dbkQR)qdp*b9E|gHw{zlweXQ6?iwGUL(YghwOQ(Rxg*i{W6LXv7u0) z0o1D!+sD!>%sT^KT9ACQWA+MMw?z&~*v!J!JA>ZpS0>Yq18=}DxT*i~5eBbRhDQ?6 zR_~Gk6DRJjxt5I~nThSRM-!4nG9D6(Wz@<2GUF-zTaNRdsA{FkDsh!t(gFHN8iy3( z#c0_GF{kmoH=3_aR67jdmUPy28Y?Ujv;HeZR=Y$3S&)P0nJiUrmP|)s(-|*8+NR1L zqo-{@W70vdsitsxd2*j$ZGe`zn%UmFD`po6q~#qC^Pgx;EFMD|9JcXPhNo7;V)2XSe|5i;sKGbbqZsVsI+I~3^K<5%P^PQ0V#m`03;Tn>11YacW)dgJXGy;Bxx!+Qi@^Jkmkn6T7Twr-mUF&G~o^ZbmtI@&= z{??lFov)44pSvEP8(9>wiQDjEQlxu*ax)G6r<3wz`}|KmYOzC}uAz9lpRZQGF!z!G z0`sGmNn5|lhIOK-f!n~5&UKKdFyEIHA>+PTW`ZRvxp!}f@Ex1kh>%b^FMe6K52Jce z=u=Zxp2uwK((gDOFf)7}IgUKi*S=<~C3exlrmDs|Dwwd|^dgrpCNyqc*2fhs#YukJ ztrK>^SjgY$-Oh$f61R@s&qF@mH=5#Vqq&CH-EAkyCd*UFlhonb4NuH+#6ERyyv|PJ zbQTXVgpJ~--^tR;&lAytq_Iro6UFFfNjbm%p)y~{)Zg*AxZu&(3|ZWqej|0#5wM)! z{r?axKNZHAP@UnDwefaGqQr;^<&Nv?Ek)Fe+ET|e(cJaLlZ7mS5IKU_$L}sS@^UaESlDH5z09ryU6^POPYui zyR8T{`LB8g)|z8?J)K4tHzwHKqmE|NI!GrxcfXZybXXs0m6dWIX*^D&eX_ZT-TJhk zmmGl`^HOwgA>f&ZvkL7?jld%NM=}|d?rlu)o_E&!NeNt5?EX_&6!jbPw6}FpZdZ}F zgQv~!vez~ydaR$9LZJDTXy2{k#+9z5C zwh7`&Aw1E?nVjyay|hd&0Y16P*U! zV`j=a&A3GJ>QMwavk}y7{#%fpdxPfQr0-Vuami8O-ci?m*Lj~d6n$OUl?bMOaU}>_ za@y6A`4p2rz6hs?8fEb`kSi-YhaVT2UMR(7EXOh-&@R@<{3B_Un95wM*x5h=$Mm>_ zRgB7*I`UPmai47+Ti~)%uM+3b{05(7E`!&f1txj;Pl&qj_L3LO{VofcfP*vHB&St- zaN-vPICe}sod%)}-)k!>6N~(BEG_8lu9SH*GeQsQ7^(t`^NGD+XhkYFGjnXsHpSl}`8E~q;m`-Ki) zzUriGc81sw+&{RYUbqC86j#st-Aq;arz97#l1PKjgccB8h%b8~;m9{oalgE~AorBw zA~oWVg7y;ep!)$b0XXYrkOHgUOH}bkA!jdH(5xnlzZ-jUsa08()%)ab%IP03eDDxH z;nKPDHQWf3i+|YMR|z07=Uce+`ys5Sp9Cm4KmGAB9j_#i1j=F7_I))5{3n}Pm-NpZ z(b1FJtqLmNLPbE1YP+)KH~%-YZY6iw6`k`7dUf^?k_R8 z7)?sM?9CgSXsM9|1t+k&8Qyr=jCC*VJMuO9i0I) z`LqeKu{c%vwzhUF-B&@e)-I6Q3_c1}+goag6BHa=ud1me!&+ES@VY9CN;G%(#aGgc z3CAM?raH61Pijb$N=WUmaQ-aNq)l!D)OJy=YlC3nYkb$10F$M zkj>oKh(#r2cT!=HvPI4xS!Bhkz+cGxaa4MXfb|)QzkDP;rSMe@iuhazVLomD#3up+ za7CKmSn|(NnPLG?6(SDyi!yom~kU|>u$(YJvc85x_}%gC@Ln?U6y*+{ioyO5oB=DsFHJx&ju zHcBG`HHJ>^!z}$Lf4Z2l{*ZZ$B}Xz>rn=7nCEf#Jg-4LG``=S?lZBE~E8wNf z&ws$<#T1TbvLdJv#4rE8G2^%9Jz4a2K)(d?im=APrPLB3KSq`Mw&UmSH~xE0W(AOM zTS6g}jNrI19P6jRAD)m06S!=BkI45j{^0Xdb`Fd>J3q=7*n18hvwQfIY_JCr0&;>d z02z@%9gtP14wZl02q>U#(&cF$_D`7FQY@+&pQYh#^TT+q^>)U-3ZBEov#%K8K~X+r zM|*TaH{mJ2_>&tKiK}&lWJ%Kq>L^JhVY=od9)H|wk+46BX^HbUk)TZ4qJO(x*PHE; z-VP36_>WtX8|bRp;88j1WOAIUkQS&ifIb~x@l!)h1yUZQiRNAW!tbtGwbmRe1J_qc zNSLwUNMYiWEDL;CYFKR|7s4Uq@4D=E6bS+sI4Np21+{(uN5Kb3?8)))F9z^=pSg!g zKNW?Jbt$qYzIv%c*4JzI$n-se!BqiHB_};%LQa<}dq zt{D8WOk3+LsMdSJ?NuC%G%e{-<|&(ae?L3?zy$DaL4wH(_OAILJF5=IxzB0+86>8a zdkYzLdc<|bOL1v3<+FWjW!cU#3DF<7WT=C(vP{6|snh}^hv_qizfdj*$tHJSL1nIM z(h03UfLS^kB*cZbM2rEx(7fgu4tJyQTv`h)ve0c_q^E@QbaN$`DpI+*`9=o2>f%^Npw zJbOqhL7OO8{KeW)#opdgrR>WhM^0S0a{L#qZI9|4>%MENqgs6hnu$xdh%GjsGN0F4?m9)hW?toD?hK`M7OdM4h5; z9;^GdVpT+0pLkM<3iAj7t;EZ7=P@o~;Wrjb93?q5O!wv2jTkkIH&m0kQt#PlyUfQm zlbEI$!^7oLC)`^bkTfB}3}TLZYLo9;jKDoT%-O>i-F;txugnoA-`|uh{Hb-kQNO%c zRcH3R$BZ;B5k<8>GrKSg@7c%HPv2r&fAFJ!)!IA1rBE-Bm%M)7BV2Fsod-whLr~?k zT!e7Z|E^8E{T>YlJ%hpi7stlt>Y~+Am0e00VSSKin01QPYzUK;OiK|E_+QD_Sl?DN zjf*krk01y}E*s3|ajGQkgbn4hLgPF9GMdE`$ zKpWmjj+3eWkLE5x@DZ1^pD4#vy<|$L`Y3w(F8sz<0HV~gkWB{S_#w#{9VO5Yi4x!= zM4vf!W`dT?oVO^n{mccvWLgPKu7t>gDkK(n6?{(>`|Tn|k`o50W0aNzB+B)$E#Y^| z2N8^<5)pIDR7)&BB;QvHS@cjFFp>ls0*vRzi0~0(`Gv-42{Z`t5gMAf22G&9_SVOw zSm=>vU-3g`uMIovUINZlt-S=To&v_V5mrzRotRmf}Xs3{u5%GO9tuB_L zn#wof$g1M7b$jwxzwLVuw^r{pKnmJ*cDN-yoH)Qw+4Ei_L~ehY89)yvpGp%kxL`g(8KxQb-eLNv`X z@}|qaIg69iSMj~Qh1!zocsyK5z2m37JoYV0h0kU$h2_^h#82}~6Ea|f3s+y>I%DVo z7`p+;v1oME0>>nty5A5X#mnH2YyKOueShx&PM_-SpXZUC$n@QRhKX_In8)v zJVm%bH2K2^{L?epIm3;NXE!cn7g2$Pc#0&s@HsDqEqZ}mA;C~-JwT-s>v~!NWPE^Jt$nf)-Yd889Y^lvd$|2KzlHy z(S5RtDT4!U-Z9AMI!5^nG9cwJO^O?(AEM2oGnVbbAWUXEwjL9d_I&%+vr#7>0qkkj zV?U&7iiHe%F-@G6s94CWlV zQ@7zc!GkcVx~(CC4m~PPv4e;%?r(ZKD{7O+39kBA$33(%-Hr;KEE=>>8!gb?ST<#u z`PH5|$?$s3>F-*HU)6FNwo@vUd#+ zX1o?iSnYY>;j7` zX-SP07az!Jrzdr@#7|RFsukE(UHJddGgXvY2cdN=l`GA$(US9AOS17xahv8g6spGx zKFOhpKQNxpvZ7bXt78Fk2p+KMk{Nu*b-DM58eR=a$Lm2XTW2}Hb*JDuMu?|G3@QJb%>Pg48Q_&^hb7{kFuhfAhhb^^#>H}!sx_cc+p{fuB9Xb%y zsFI9*_@L$H>RkAN#8blaP!a54-!m@t!O24TK?4H(rZ)!rtIy9BBp}9?Qi13QC*gQ; zjSS(B_!w1Aub*>~?jFRI3>{9v0kubR3FV?eq$r48qTY_`=vn1vqlFujj{aN&a9}-G zQTpNO*q)Yws9ENnp!WdO=x8+LWkx?Vc~KH2KhT#K+GTOEwo0zYY03vq=7@<^cIBEPJPlVr!Vy40O6DaIc)C% z!l}nVdn4L+6wuScN4oMXO#u!ujU1LdZIR`mh07gb+&-KEk-ap)^|S#C0+dE#DbyP7LJ?q|21rL=gj-P0YiGDAxm`Qw;!Ut{aIoq zNnoorpw`tY?tGLf+m9RD|zydd(7;^oEA%U zFsOpRA61c(E_AOf^AQ0R5w$$UN580y1H34KBG=WbE?ZA(Ne4eCTSN>`BwG8PyA2qT zX@p$*gvX34s=ePPfV=i}?Rt_+rJ}kXNKa{wk78VSRu8~Gq5#;?1CXSur5lm|4L8lx z7jr*3$Md$&;?_<0qQI5CYf$aZH@xgVxHqlC4pCT{mJA4SJuXXE_v7ZYZKr@dvXt4%7wZ!sZ6YcCaY>P*Ft*Zp(= z+&9q60=lCueeE*Vgql%KLwx+HfokWTh_zYUq_~+2P}02V&!6AtaR%aWu=uV4{`Tbh0xM&dEx+H4f9LYhTn~ zrGE&aQQOsqjYV0C+8R+>ZMPIjbAC*i>z==;3{g4?W2sLD?AM|R@tH9bHa>&sQRPm; z{g(6T6RL%D-F8?bI<8ljE$ld10t{R4qm0Z_NK>owDqCjCzG&@v6;Hu#-mZojp5_9* z{u2h(eyzfTiJlQ<9AGx#9fXX={>2^swI{n-BWMMeKazV`TsiKhwGFnt=kc6j?fC!-TK?hh}(f7g9B{>6V#o8k5sRAJ)El>(&JIf!PSB@pmP76=t%e#cuDBa0} z+w8$kD@lH{9-0?DZ5z{p1DvZNB}V@e-pK%oUS2S#FVgLNcBP#DHUllZf$iYT5E?Od z1Ch_A@G7;Z?751m!LEp!^JOg>ZN9(Zbs&wpdR|ao5O`6+fw9a}m)OjB#X_s^ zQ&v1X+K-3@U^;E4$u2wY9fW-~&Af6|U3-&}#yx*Lifr zc@Oq{BJ{0zVf^4%im!9)q9;uT3?ZboE&mGe)Hqe&Qqkz?siXwNl^NfJ4v@(oN&3f&T3AAW!at$rq8W3*%t8Bf%B#2e@O;m5u4=I7rpfpIuvj{xbBI|QE z^g}`*cnD9La9#piyGvS+dd|3MFR&9mS3fpQjE|Vs^~`7dJyxa=u&eFRj5VbI!2_2q zmhd!^H|By_Lm|mrwn2Z}7R%9S2_nHyB2{bE?|}Ze^C6ef5$8h&Mg-G%3Cv)p<(v7_ z$+=iyU5|IX(2st+B$<1R|BHr>H^*yZj03KWU5u6=4vfj%yD|E)aZ68eIg{e8QM5Qr zMK76%5dh&;T<-o`TQ)YGF+84GaRP*+{{k{e>JVXN?RD}8e`_mY@QY)1NEL7|4?sSt zVMm*&KF)JK_(0YK{vw}MusubB4TEw?$Dldw-+vL4GoI)|)HrpNjyV6y{^5NL4bqMW zq-oZU&^XzWTfenU`^dtpRT>q?njP4|;lFdx?Z-L(mEp3{1aXwM?R_?#B5hinsq9NO zdk!Rs&#$%6wuK;Ww1as5tOCEUhdb%1L5a$Br5A&3!gsb4M+$TIb{Z@pe_eYx&k z1N`47407N>6eRVT$$QAb42B~SOM9AzdKJt1xg7(V>-ogN;_WpWGQ5GghX%VM8%_iC zTo3=Eo9G~<9B1{NU=4DV^|ng#PEP>6Sm|vf{kG)6;iI~VQ7Tk%ln7y>$l{*BeiEyG zQ{J?4f@r3s*{#>+7t9uz#KKBs6T&H1(=~8p8dQQNhHqz6m*?c%@Xir(=zbvHjdIdm zF5^EI{JTPp^&FhttW0_R#EWmz`6HFs;X7$y$OW{>bYc3VU;TdK?@3U;>vX~5)56Ip zLFmDz5WFY9+muO-ZZ};c_iGz>CsA9+VcY#;s;G_S|6yovhnT8%x?5NhnZ;ZlKCxnj zR@_F?_|P3KZTX3p@ghU419rkk=#Dmu6eVFU{d}dpWm^_2JKR+iIYLDN*z zH4aP@8JUnBR5XtL`|uKM5gn5T69SxK_NcI(Z&~{pp!~vS-h6dcK6&G6DJ!zlQL18f zWnKp6^3e8HI?GRC;!Jzk$gjcY=T)~WLLX2c9(-k5aX!bdRsQ!56uqanTcTua$Tnw> z504S9Rd&)9+rBtR_u{3HZAQI3UhyPPk2=g5nS(LwG#Z5cgxJ^%z7jF329Eq}SHKg@qbzS5~;~0e5z&_cCC*V1Lwp&@4EdeIz)Z^ z=y#3teG?NkcA=~QW0x>&u)o3X{tw>Qj?3DIqfh&$#3{=R-&KuC!B~DqqJ2>AB^v_7 z$*T5M%?X*#;H#t0;IFH>wcWt4C`>Q5k2v-_Iu@fB18K?ehgGOXqokeBL9zOWscpF)liaA@)HH+1dWGaf!OEFE zy?XZIF{*1>PInU2kL@zm`6=&T6QR78txi+z{v)*cR=;@JFJ#v;Bm8Ugbn7Ht!Awg| z_lI~k_@amJi||=cgL6+xE9kq{;pK&@;g&DqvjbFM0UU{T?KvQi{xVgaJxsc#W-7N{ z58zq*+t+F}9O{Tyq;R~oj&U^qG3Nyba93L>!o0DHWKcWLkBeXh=x>yjc53|q;lleo`kJ=E# zDzyd|ARX=t;!aHk=Fu)$V69QE%m2mAhz22tN#)wfiwPT9`7DAP`mf>`7>AUv{ncMb z`y=ZmbW5hHcE;9xCy%B>f)nmmS?|(w9gicp>^f?)FG80|4@4Zc3gn6ZA|P!zRGZ=% zt7@j-rELzl>x$0J*YQ*KL(U9)wIG!H+i=Yya0DWV)x$cF(w0$5%AQF;>2u$=OAjQ@* zWat38#spA9iCO)qJ%BX!`k1G|oc1j?z$^ZSxBW7)0ll2%heAV|xnMvq2WM2yXoExs zxAVA(=#>%xK!FrfY=j)3m(45NPS?%8U!H7 z=M3-ai~L%`fi3P)p}rMWqV)KW#KiXU%aQdFu?F*z@wqPhE13f1GT#9R`%c_zhu%ZW z(}Jj`yLk|R`mr^Jn#)$x?>XYvA8Wy!?DpkrXfU3jI2Lg-9?jc}E7o%F|IhfXe|vNYbLY?@nQmCiW%_S53PA2+P6(iM}1$6Ckqft4ff`a4W8qh zc9ga)djfm)DyVrq5z53eeN(p$W5)(e&ld&B#_)#nl-#+Hai8l<+RQ;2Ip+%~9!px`N)x z1J8RY2i;W&!ptwothWwlWf znsO&5F}EXjloxkS&~}0ylydzQ>*NJJVS{eN3~AI0Nt?)GYg#kTNGoDQ#Qt8GT=pVo zIko6yoEYD2NW}g^wmJ2eI>%+5poa7bPrcgdk$H`u3m#u46;|caG!7jnwYz1;6oHdc zn&_lPo9WbDAgtR)$Sk6IOf3j`0)%I!2#)Qn>o9%k_N-mm#bDFPu=-I$*kP7qLxh}K zgN%op)%LgzYMiDZ*;7f zRGJD8H%L9sybQB^A9B&E?Cz7}F~I7tlg;fhWw(BG2n=r@eI?-e_ zj+MVbq_mrdsU#udIFAWAxh;J#>Frm&Z}|^e!(693>cHKfEWybRc2=?qI$Avv8Mp23 zKyp^C9quG^Cg86}9(X+79cBtX;Wc)LD+f^b!;VmewdpRG?kX!wj;HP#ilWvl~{ zk-YU4~4U#^%8 zd-~`I7Uh|yRkEB3++k)~CE5=+ev#-%-bw6;rFeMN(*d^0ji9MyMCPmpIt^G7csjV& zB~IvyY8?Q8>vo7NMI{hyW*9&$wn8gFt@Z=AL;S|XNEM1z1-B~E;D4V8v zWDcK=%nJ3Gj7_~GC^>B$==x9)D!>t)atLTs(u~Tg&}T4i{c+R_i$>9rBmDxoouB8R8p6 zDXC$&jounpkXdLq1;NnthcgR$4@rr=Ap$4L*!L`EYp?p%z4MDXe^Qi$BVc>%I*Q*> zrs8o6LS6(1iB>s{w1{$@awRh>g#uN>D!?V6VuEFl4*IUkdTj$(};B^Z)Q z`q{CkY&j};#9S7Z=BiCJrnS<=c_65tmk1bJ6ST@rTR{FMqK|Xz%W>-5^R&JfRQvL% zx#^AwdesYsYl|^vinFQ-+I02sYyG#WBbT1-dO$JFSw|>1$)?csi^t^<3n1pwXA^2+9~FVt)2J4x|GgMuGoM9+vHRaozC?;832Tg zv)T%90lvB%sesnNlsW)UPN=JuuAo2HqOCLMCvre1av9IqqCtPr8dol$Ijc?;@ER3e z;f@SIL@hRx@uPLU1=hZ+ZV?B_yx?fM7t8RwMPeQhYV&#+=VE}=Ml37zXyQc#%wz>G zZ!#WOqN&Wzd@L!~qiE*f+F-mi7ZeGTPt$obw6>4+Y;I ze%J*h=~C@=Lq`xEp#L_VQ7s$L;Tp-yd9-@Kmx`i{J}VSZfwu-nTAopl8c>hpa$v;? z-VP{nP&l1c+WSB~_XD0W{XR@OqaFqD8U>yG_ADUWayOX(TDT%0l$JIhM*vVyi)<|8 z?-q$6pq?SU^D`4qPHM5kP8-g7)c}Rnuz$w3zXz=MUY8hpJkLP@H}(t){0d-Q3FaIg z`W)@Uzdwm~2i9}Qd*|L6>j5Sg|EM73_iNGN!UTZu*u0YbCGfVToOA1^Ae`x6X^pi& z3p)qJsowA#{~e(dKMax{TKY!c7!%+n|M8 z26N|7V4(#*!>%2Z!Uz4`;uQugI;{Gfd^JGCRx8Le+^!VZhDd2a*XR9!ufR%2RhsAm$vZTDLV=P~|G8PcQ%>PaZ%Q~urJ^?Si90dAV{CG5<4{Ou&{fQdWScj=z7 z9vxsk?r#Ef{`E9xGG6L0NumP7g`ZLbA*ofHNYU#-{$I>^1#D+=O43u$Y%hQ3S|tsd zpDvP@5`y3rg8f5mBj6C$(tGHP1{8r#6iM2@mjCN$(XKZSV88r4!un8Vpn=cRw@H7G z^E`$Qm~EZp!pYw*9j$8wk32bTkp-iXt2|&3 z4+S3fLH!|m65zK$u3lk+QO^xv_BS4squtwE5Uk=*m+n-IgVj1-B76o_MFS#%n|;Np z6gZhub%s}Hhke@;Xy5XlFjErne8?{`M87@XIWH7YTZO+X)@!CMy3<8P`v?zcG1#83 zg%6A-_9@?~H==#34EpbR!@aZu)V8uWER23tCXgsI^EK>uT=zjQxB)Qq0h48c4xrhN z4C5vANHxKN&e9snlLWIN8DRP4v?T?MCPmjHJQ2_cN;fAx&_;j&ISmL+rTlr2K(K=U zY3z*C0lA3nI~ZiT)}TM@QL0nw;pI2^6Q@J$SfLGs)Mg96dupNh@4*R50^dg}2oxA3 zOr_2LRP1j~KyBl7q8CAS1p&GjCSRb%2BBEPSRk{)?O60OX#5QNpRXZva0F^|bYDJ0 zil_pKR8?yP?tq!)E~LDSHun$a;5Ft24spgnv$K8N4(O3)0ipY|EHuF4(N03}sr+v7 zdJGyKb5=Jg0SCqy1`|h5f<6dX`dR;H%WswdgRPw7QXW!~102UnuW962Zw|E2{9PYX zk+Te#LwvfejD-z-8)u2Llij)zye+_|B)P%V%J^{ zge_B?ff+t|?%7SCYFKT?S%gtffR*d>l7~FIcLVL;z3&@RhSLTn3s3`aCNO4aM)5Tx zlKTZypfRRV*5@_@L-zA4Nu=a<4H(9+!?x)FGmqZIq}lL{;II-Xv2yzTC+a}cgir23 z8n@dB^xwv7mvgLu!Dh{pX%0_pEJnaQYP`>(?YcD3KIw9I#MG3*Wnd0jc2rhh9Oy{# z(`sqcDsFGk8cnZO)!`BC3= zV2f~0cdC&GLephn{=KZWrCAABPAIKgWr=9&NC0PbkI&xG?$D8>&xFtbYtiQfV~!m` zdI9f%y{LEV*6&5ia9zM;amq`=eHk!jmrXmIfJlfX(Ei3};CiRed#a8hlG_4ctNzZe zsk+|4RN%ek#xi(D2!M8LegaGCk1j^<5T#y#4loC<`D!wo4;XBGj<_C90h?%RWSVO9E7ubfMNaE z_l~oI%SLfVgoYwuu~oEYbL2Kn;5sOl&6Wt6Il%Cpqc1XRl4>FVdQ&MBb@0J|X1 +Articles • pipapi + + +
+
+ + + +
+
+ + + +
+
+ + +
+ +
+

Site built with pkgdown 2.0.7.

+
+ +
+ + + + + + + + diff --git a/docs/articles/new-endpoints.html b/docs/articles/new-endpoints.html new file mode 100644 index 00000000..0a4bbf83 --- /dev/null +++ b/docs/articles/new-endpoints.html @@ -0,0 +1,163 @@ + + + + + + + +How to include new endpoints in pipapi • pipapi + + + + + + + + + + + + +
+
+ + + + +
+
+ + + + +

In the recent update to pipapi we added three new +endpoints grouped-stats, regression-params and +lorenz-curve. The goal of this post is to share the process +of adding new endpoints in pipapi as there are lot of +checks in place in the code and it can be quite challenging to do +so.

+

Arguments : Every argument and it’s value passed to +the API is validated. This enhances the security of the API by making +sure only expected arguments and it’s value are passed through API.

+

The arguments are validated in function +validate_query_parameters which has a list of all valid +arguments for all the endpoints. If your endpoint is using an existing +argument then you don’t need to do anything since both the argument and +it’s value is already validated. For eg - If your endpoint has an +argument ppp which is an existing argument in the API then +you don’t need to do any changes since ppp is already +validated.

+

Values : Validate your input values in +create_query_controls function by adding range or list of +accepted values. If the arguments is character then you need to give all +possible values that it can take. If the argument is numeric, then you +need to supply min and max values to ensure +that the numeric values stays in range. Based on the type of argument, +check_param_chr, check_param_num or +check_param_lgl is called. This also ensures that the +argument name should mean the same everywhere. So it is not possible +that the same argument can have two different meaning. For example, it +is not possible that the argument requested_mean accepts +value 0 to 1 in one endpoint and c("yes"/"no") in another +endpoint again ensuring consistency.

+

Another thing to note is that the argument and values are available +in both req$args as well as req$argsQuery +however, all the validation is performed only on argsQuery +and only argsQuery is used the entire API. So we suggest to +continue using argsQuery for consistency purposes.

+
+ + + +
+ + + +
+ +
+

+

Site built with pkgdown 2.0.7.

+
+ +
+
+ + + + + + + + diff --git a/docs/authors.html b/docs/authors.html new file mode 100644 index 00000000..99139eb2 --- /dev/null +++ b/docs/authors.html @@ -0,0 +1,122 @@ + +Authors and Citation • pipapi + + +
+
+ + + +
+
+
+ + + +
  • +

    Tony Fujs. Author, maintainer. +

    +
  • +
  • +

    Aleksander Eilertsen. Author. +

    +
  • +
  • +

    World Bank. Copyright holder. +

    +
  • +
+
+
+

Citation

+ Source: DESCRIPTION +
+
+ + +

Fujs T, Eilertsen A (2025). +pipapi: API for the Poverty and Inequality Platform. +R package version 1.3.11.9000, https://github.com/PIP-Technical-Team/pipapi, https://pip-technical-team.github.io/pipapi. +

+
@Manual{,
+  title = {pipapi: API for the Poverty and Inequality Platform},
+  author = {Tony Fujs and Aleksander Eilertsen},
+  year = {2025},
+  note = {R package version 1.3.11.9000, https://github.com/PIP-Technical-Team/pipapi},
+  url = {https://pip-technical-team.github.io/pipapi},
+}
+ +
+ +
+ + + +
+ +
+

Site built with pkgdown 2.0.7.

+
+ +
+ + + + + + + + diff --git a/docs/bootstrap-toc.css b/docs/bootstrap-toc.css new file mode 100644 index 00000000..5a859415 --- /dev/null +++ b/docs/bootstrap-toc.css @@ -0,0 +1,60 @@ +/*! + * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/) + * Copyright 2015 Aidan Feldman + * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */ + +/* modified from https://github.com/twbs/bootstrap/blob/94b4076dd2efba9af71f0b18d4ee4b163aa9e0dd/docs/assets/css/src/docs.css#L548-L601 */ + +/* All levels of nav */ +nav[data-toggle='toc'] .nav > li > a { + display: block; + padding: 4px 20px; + font-size: 13px; + font-weight: 500; + color: #767676; +} +nav[data-toggle='toc'] .nav > li > a:hover, +nav[data-toggle='toc'] .nav > li > a:focus { + padding-left: 19px; + color: #563d7c; + text-decoration: none; + background-color: transparent; + border-left: 1px solid #563d7c; +} +nav[data-toggle='toc'] .nav > .active > a, +nav[data-toggle='toc'] .nav > .active:hover > a, +nav[data-toggle='toc'] .nav > .active:focus > a { + padding-left: 18px; + font-weight: bold; + color: #563d7c; + background-color: transparent; + border-left: 2px solid #563d7c; +} + +/* Nav: second level (shown on .active) */ +nav[data-toggle='toc'] .nav .nav { + display: none; /* Hide by default, but at >768px, show it */ + padding-bottom: 10px; +} +nav[data-toggle='toc'] .nav .nav > li > a { + padding-top: 1px; + padding-bottom: 1px; + padding-left: 30px; + font-size: 12px; + font-weight: normal; +} +nav[data-toggle='toc'] .nav .nav > li > a:hover, +nav[data-toggle='toc'] .nav .nav > li > a:focus { + padding-left: 29px; +} +nav[data-toggle='toc'] .nav .nav > .active > a, +nav[data-toggle='toc'] .nav .nav > .active:hover > a, +nav[data-toggle='toc'] .nav .nav > .active:focus > a { + padding-left: 28px; + font-weight: 500; +} + +/* from https://github.com/twbs/bootstrap/blob/e38f066d8c203c3e032da0ff23cd2d6098ee2dd6/docs/assets/css/src/docs.css#L631-L634 */ +nav[data-toggle='toc'] .nav > .active > ul { + display: block; +} diff --git a/docs/bootstrap-toc.js b/docs/bootstrap-toc.js new file mode 100644 index 00000000..1cdd573b --- /dev/null +++ b/docs/bootstrap-toc.js @@ -0,0 +1,159 @@ +/*! + * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/) + * Copyright 2015 Aidan Feldman + * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */ +(function() { + 'use strict'; + + window.Toc = { + helpers: { + // return all matching elements in the set, or their descendants + findOrFilter: function($el, selector) { + // http://danielnouri.org/notes/2011/03/14/a-jquery-find-that-also-finds-the-root-element/ + // http://stackoverflow.com/a/12731439/358804 + var $descendants = $el.find(selector); + return $el.filter(selector).add($descendants).filter(':not([data-toc-skip])'); + }, + + generateUniqueIdBase: function(el) { + var text = $(el).text(); + var anchor = text.trim().toLowerCase().replace(/[^A-Za-z0-9]+/g, '-'); + return anchor || el.tagName.toLowerCase(); + }, + + generateUniqueId: function(el) { + var anchorBase = this.generateUniqueIdBase(el); + for (var i = 0; ; i++) { + var anchor = anchorBase; + if (i > 0) { + // add suffix + anchor += '-' + i; + } + // check if ID already exists + if (!document.getElementById(anchor)) { + return anchor; + } + } + }, + + generateAnchor: function(el) { + if (el.id) { + return el.id; + } else { + var anchor = this.generateUniqueId(el); + el.id = anchor; + return anchor; + } + }, + + createNavList: function() { + return $(''); + }, + + createChildNavList: function($parent) { + var $childList = this.createNavList(); + $parent.append($childList); + return $childList; + }, + + generateNavEl: function(anchor, text) { + var $a = $(''); + $a.attr('href', '#' + anchor); + $a.text(text); + var $li = $('
  • '); + $li.append($a); + return $li; + }, + + generateNavItem: function(headingEl) { + var anchor = this.generateAnchor(headingEl); + var $heading = $(headingEl); + var text = $heading.data('toc-text') || $heading.text(); + return this.generateNavEl(anchor, text); + }, + + // Find the first heading level (`

    `, then `

    `, etc.) that has more than one element. Defaults to 1 (for `

    `). + getTopLevel: function($scope) { + for (var i = 1; i <= 6; i++) { + var $headings = this.findOrFilter($scope, 'h' + i); + if ($headings.length > 1) { + return i; + } + } + + return 1; + }, + + // returns the elements for the top level, and the next below it + getHeadings: function($scope, topLevel) { + var topSelector = 'h' + topLevel; + + var secondaryLevel = topLevel + 1; + var secondarySelector = 'h' + secondaryLevel; + + return this.findOrFilter($scope, topSelector + ',' + secondarySelector); + }, + + getNavLevel: function(el) { + return parseInt(el.tagName.charAt(1), 10); + }, + + populateNav: function($topContext, topLevel, $headings) { + var $context = $topContext; + var $prevNav; + + var helpers = this; + $headings.each(function(i, el) { + var $newNav = helpers.generateNavItem(el); + var navLevel = helpers.getNavLevel(el); + + // determine the proper $context + if (navLevel === topLevel) { + // use top level + $context = $topContext; + } else if ($prevNav && $context === $topContext) { + // create a new level of the tree and switch to it + $context = helpers.createChildNavList($prevNav); + } // else use the current $context + + $context.append($newNav); + + $prevNav = $newNav; + }); + }, + + parseOps: function(arg) { + var opts; + if (arg.jquery) { + opts = { + $nav: arg + }; + } else { + opts = arg; + } + opts.$scope = opts.$scope || $(document.body); + return opts; + } + }, + + // accepts a jQuery object, or an options object + init: function(opts) { + opts = this.helpers.parseOps(opts); + + // ensure that the data attribute is in place for styling + opts.$nav.attr('data-toggle', 'toc'); + + var $topContext = this.helpers.createChildNavList(opts.$nav); + var topLevel = this.helpers.getTopLevel(opts.$scope); + var $headings = this.helpers.getHeadings(opts.$scope, topLevel); + this.helpers.populateNav($topContext, topLevel, $headings); + } + }; + + $(function() { + $('nav[data-toggle="toc"]').each(function(i, el) { + var $nav = $(el); + Toc.init($nav); + }); + }); +})(); diff --git a/docs/docsearch.css b/docs/docsearch.css new file mode 100644 index 00000000..e5f1fe1d --- /dev/null +++ b/docs/docsearch.css @@ -0,0 +1,148 @@ +/* Docsearch -------------------------------------------------------------- */ +/* + Source: https://github.com/algolia/docsearch/ + License: MIT +*/ + +.algolia-autocomplete { + display: block; + -webkit-box-flex: 1; + -ms-flex: 1; + flex: 1 +} + +.algolia-autocomplete .ds-dropdown-menu { + width: 100%; + min-width: none; + max-width: none; + padding: .75rem 0; + background-color: #fff; + background-clip: padding-box; + border: 1px solid rgba(0, 0, 0, .1); + box-shadow: 0 .5rem 1rem rgba(0, 0, 0, .175); +} + +@media (min-width:768px) { + .algolia-autocomplete .ds-dropdown-menu { + width: 175% + } +} + +.algolia-autocomplete .ds-dropdown-menu::before { + display: none +} + +.algolia-autocomplete .ds-dropdown-menu [class^=ds-dataset-] { + padding: 0; + background-color: rgb(255,255,255); + border: 0; + max-height: 80vh; +} + +.algolia-autocomplete .ds-dropdown-menu .ds-suggestions { + margin-top: 0 +} + +.algolia-autocomplete .algolia-docsearch-suggestion { + padding: 0; + overflow: visible +} + +.algolia-autocomplete .algolia-docsearch-suggestion--category-header { + padding: .125rem 1rem; + margin-top: 0; + font-size: 1.3em; + font-weight: 500; + color: #00008B; + border-bottom: 0 +} + +.algolia-autocomplete .algolia-docsearch-suggestion--wrapper { + float: none; + padding-top: 0 +} + +.algolia-autocomplete .algolia-docsearch-suggestion--subcategory-column { + float: none; + width: auto; + padding: 0; + text-align: left +} + +.algolia-autocomplete .algolia-docsearch-suggestion--content { + float: none; + width: auto; + padding: 0 +} + +.algolia-autocomplete .algolia-docsearch-suggestion--content::before { + display: none +} + +.algolia-autocomplete .ds-suggestion:not(:first-child) .algolia-docsearch-suggestion--category-header { + padding-top: .75rem; + margin-top: .75rem; + border-top: 1px solid rgba(0, 0, 0, .1) +} + +.algolia-autocomplete .ds-suggestion .algolia-docsearch-suggestion--subcategory-column { + display: block; + padding: .1rem 1rem; + margin-bottom: 0.1; + font-size: 1.0em; + font-weight: 400 + /* display: none */ +} + +.algolia-autocomplete .algolia-docsearch-suggestion--title { + display: block; + padding: .25rem 1rem; + margin-bottom: 0; + font-size: 0.9em; + font-weight: 400 +} + +.algolia-autocomplete .algolia-docsearch-suggestion--text { + padding: 0 1rem .5rem; + margin-top: -.25rem; + font-size: 0.8em; + font-weight: 400; + line-height: 1.25 +} + +.algolia-autocomplete .algolia-docsearch-footer { + width: 110px; + height: 20px; + z-index: 3; + margin-top: 10.66667px; + float: right; + font-size: 0; + line-height: 0; +} + +.algolia-autocomplete .algolia-docsearch-footer--logo { + background-image: url("data:image/svg+xml;utf8,"); + background-repeat: no-repeat; + background-position: 50%; + background-size: 100%; + overflow: hidden; + text-indent: -9000px; + width: 100%; + height: 100%; + display: block; + transform: translate(-8px); +} + +.algolia-autocomplete .algolia-docsearch-suggestion--highlight { + color: #FF8C00; + background: rgba(232, 189, 54, 0.1) +} + + +.algolia-autocomplete .algolia-docsearch-suggestion--text .algolia-docsearch-suggestion--highlight { + box-shadow: inset 0 -2px 0 0 rgba(105, 105, 105, .5) +} + +.algolia-autocomplete .ds-suggestion.ds-cursor .algolia-docsearch-suggestion--content { + background-color: rgba(192, 192, 192, .15) +} diff --git a/docs/docsearch.js b/docs/docsearch.js new file mode 100644 index 00000000..b35504cd --- /dev/null +++ b/docs/docsearch.js @@ -0,0 +1,85 @@ +$(function() { + + // register a handler to move the focus to the search bar + // upon pressing shift + "/" (i.e. "?") + $(document).on('keydown', function(e) { + if (e.shiftKey && e.keyCode == 191) { + e.preventDefault(); + $("#search-input").focus(); + } + }); + + $(document).ready(function() { + // do keyword highlighting + /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */ + var mark = function() { + + var referrer = document.URL ; + var paramKey = "q" ; + + if (referrer.indexOf("?") !== -1) { + var qs = referrer.substr(referrer.indexOf('?') + 1); + var qs_noanchor = qs.split('#')[0]; + var qsa = qs_noanchor.split('&'); + var keyword = ""; + + for (var i = 0; i < qsa.length; i++) { + var currentParam = qsa[i].split('='); + + if (currentParam.length !== 2) { + continue; + } + + if (currentParam[0] == paramKey) { + keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20")); + } + } + + if (keyword !== "") { + $(".contents").unmark({ + done: function() { + $(".contents").mark(keyword); + } + }); + } + } + }; + + mark(); + }); +}); + +/* Search term highlighting ------------------------------*/ + +function matchedWords(hit) { + var words = []; + + var hierarchy = hit._highlightResult.hierarchy; + // loop to fetch from lvl0, lvl1, etc. + for (var idx in hierarchy) { + words = words.concat(hierarchy[idx].matchedWords); + } + + var content = hit._highlightResult.content; + if (content) { + words = words.concat(content.matchedWords); + } + + // return unique words + var words_uniq = [...new Set(words)]; + return words_uniq; +} + +function updateHitURL(hit) { + + var words = matchedWords(hit); + var url = ""; + + if (hit.anchor) { + url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor; + } else { + url = hit.url + '?q=' + escape(words.join(" ")); + } + + return url; +} diff --git a/docs/index.html b/docs/index.html new file mode 100644 index 00000000..9ef5d646 --- /dev/null +++ b/docs/index.html @@ -0,0 +1,233 @@ + + + + + + + +API for the Poverty and Inequality Platform • pipapi + + + + + + + + + + + + +
    +
    + + + + +
    +
    +
    + + + + +

    The goal of pipapi is to provide a high level API to the computations and methods that power the Poverty and Inequality Platform (PIP).

    +

    World Bank staff who have read access to the PIP data can use the functions from this package directly, without hitting the PIP API.

    +
    +

    Installation +

    +

    You can install the development version from GitHub with:

    +
    +# install.packages("devtools")
    +devtools::install_github("PIP-Technical-Team/pipapi")
    +
    +
    +

    Getting started +

    +

    The main function from the pipapi package is the pip() function. See ?pip for more information.

    +
    +

    Data access +

    +

    In order to use pip() you’ll need to have access to a PIP data_folder. The folder structure looks like this:

    +
    data-folder-root/
    +├─ _aux/
    +  ├─ pop_regions.fst
    +  ├─ pop.fst
    +  ├─ ...
    +├─ estimations/
    +  ├─ prod_svy_estimation.fst
    +  ├─ prod_ref_estimation.fst
    +├─ survey_data/
    +  ├─ survey_1.fst
    +  ├─ ...
    +  ├─ survey_n.fst
    +
    +# Create a list of look-up tables from the root data folder
    +lkups <- create_lkups("<data-folder>")
    +
    +
    +

    Usage +

    +

    Pass the lkups list to the main pip() function to compute poverty and inequality statistics in your R session.

    +
    +library(pipapi)
    +
    +pip(country = "AGO",
    +    year = 2000,
    +    povline = 1.9,
    +    lkup = lkups)
    +#>    region_code country_code reporting_year survey_acronym survey_coverage
    +#> 1:         SSA          AGO           2000            HBS        national
    +#>    survey_year welfare_type survey_comparability comparable_spell poverty_line
    +#> 1:     2000.21  consumption                    0             2000          1.9
    +#>    headcount poverty_gap poverty_severity     mean   median       mld      gini
    +#> 1: 0.3637448   0.1636806       0.09982393 4.100014 2.593394 0.5125765 0.5195689
    +#>    polarization     watts    decile1    decile2    decile3    decile4
    +#> 1:    0.4643401 0.2811239 0.00983246 0.02195307 0.03342455 0.04495307
    +#>       decile5    decile6    decile7   decile8  decile9  decile10
    +#> 1: 0.05662774 0.07048758 0.08808485 0.1134946 0.158687 0.4024552
    +#>    survey_mean_lcu survey_mean_ppp predicted_mean_ppp        cpi cpi_data_level
    +#> 1:        11.23264        4.100014                 NA 0.03385145       national
    +#>        ppp ppp_data_level reporting_pop pop_data_level reporting_gdp
    +#> 1: 80.9318       national      16395473       national      2195.631
    +#>    gdp_data_level reporting_pce pce_data_level is_interpolated
    +#> 1:       national            NA       national           FALSE
    +#>    is_used_for_aggregation distribution_type estimation_type
    +#> 1:                   FALSE             micro          survey
    +
    +
    +
    +
    + + +
    + + +
    + +
    +

    +

    Site built with pkgdown 2.0.7.

    +
    + +
    +
    + + + + + + + + diff --git a/docs/link.svg b/docs/link.svg new file mode 100644 index 00000000..88ad8276 --- /dev/null +++ b/docs/link.svg @@ -0,0 +1,12 @@ + + + + + + diff --git a/docs/news/index.html b/docs/news/index.html new file mode 100644 index 00000000..8707338a --- /dev/null +++ b/docs/news/index.html @@ -0,0 +1,283 @@ + +Changelog • pipapi + + +
    +
    + + + +
    +
    + + +
    + +
    • Hot fix with future dependency.
    • +
    +
    + +
    • fix bug for not including key_values into Lorenz quadratic function.
    • +
    +
    + +
    • update esimate_type filter for fillgaps at cl
    • +
    +
    + +
    • remove censoring and apply filter at the UI level.
    • +
    +
    + +
    • Add Prosperity Gap to both svy and lnp years.

    • +
    • Implement nowcast up to 2024

    • +
    • Add estimate_type variable to pip() and pip_grp*() calls

    • +
    • Add new aux file, metaregion

    • +
    • Change algorithms for MRV calls to return up to lineup year.

    • +
    • Modify unit tests to account for the changes above.

    • +
    • Fix distribution_type variables for both svy and lnp years.

    • +
    • improve algorithm of add_*() functions

    • +
    • Fix aggregation of medians

    • +
    • Update empty responses.

    • +
    • add end point wld-lineup-year to return nowcast year and tooltip text

    • +
    +
    + +

    remove censoring of official regions to account for cases like GNQ in which they are not counted as part of the total population of AFE and AFW

    +
    +
    + +
    +
    + +
    +
    + +
    +

    New features

    +
    • Add new SPR and SPL indicators
    • +
    • Add new endpoint grouped-stats to return grouped data
    • +
    +
    +

    Enhancements

    +
    • Address some linting issues
    • +
    • Increase test coverage
    • +
    • Improve caching for ui_cp_ki_headcount and ui_cp_poverty_charts +
    • +
    +
    +
    + +
    • Fix bug with ag_average_poverty_stats
    • +
    • Better control of returned columns
    • +
    • Fix bug that was causing wrong aggregates to be returned in some instances
    • +
    +
    + +
    +
    + +
    +
    + +
    +
    + +
    +

    New features

    +
    • New indicators available. Optional with additional_ind = TRUE +
    • +
    • /citation endpoint now returns additional information: version_id and accessed_date +
    • +
    • Limit accepted poverty lines to 3 decimals
    • +
    • Add new /version parameter that returns information about a specific data version
    • +
    • Povline is now limited to a maximum value of $2700 PPP (daily value)
    • +
    • +/aux endpoints now return tables in long_format by default
    • +
    • A new /ui_aux endpoint has been created with the opposite behavior: tables are returned in wide format by default.
    • +
    • +/pip-info returns additional information
    • +
    • New “etag” and “max-age” headers returned by the API to facilitate caching of API responses
    • +
    +
    +

    Enhancements

    +
    +
    +

    Bug fixes

    +
    • Hot fix to handle failing of create_vector_countries() when country="ALL" +
    • +
    • Fix cp-download
    • +
    • Fix creation of duplicated responses for some regional aggregates
    • +
    • Fix year selection for /pip-grp
    • +
    +
    + +
    + + +
    +

    New features

    +
    +
    +
    + +
    +

    New features

    +
    • Add /pip-grp as a new endpoint for aggregated statistics
    • +
    • Soft deprecate group_by argument in /pip
    • +
    • Change parameter version_length for vintage_pattern in extract_data_dirs(). The algorithm for extracting valid versions has been modified.
    • +
    • Add option to enable disk based caching of pip() and pip_grp() +
    • +
    • Add asynchronous processing of slow API requests
    • +
    +
    +

    Enhancements

    +
    • Increase maximum limit for povline parameter from 100 to 10 000
    • +
    • Remove unnecessary columns from API response
    • +
    • Add estimation_type and distribution_type to the API response when fill_gaps=TRUE +
    • +
    • Use pip_grp() to calculate aggregated statistics in UI functions; ui_hp_stacked() and ui_pc_regional() +
    • +
    • Add unit tests for pip_grp() +
    • +
    • Improve filtering of data version directories in create_versioned_lkups() +
    • +
    • Add country and region name to /pip response
    • +
    • Make sure latest data version is available as a specific version (not just as “latest_release”)
    • +
    • Use fs functions rather than base R’s.
    • +
    +
    +

    Bug fixes

    +
    • Duplicates are no longer created when fill_gaps=TRUE +
    • +
    • Add povline=NULL option for /cp-key-indicators endpoint. This fixes an issue with the UI ingestion.
    • +
    • Fix bug where distributional stats were incorrectly returned as missing for extrapolated surveys when fill_gaps=TRUE +
    • +
    • Add reporting_level to the output of ui_cp_poverty_charts() +
    • +
    • Make sure ui_cp_poverty_charts() only returns non-national observations when a country has no surveys with national coverage
    • +
    • Fix a bug in the application of censoring within pip_grp() +
    • +
    • Add a specific empty response for pip_grp() to ensure that the response is consistent when no data is available
    • +
    • Fix a bug in the selection of most recent value (year="MRV") when country="ALL" +
    • +
    +
    +
    + +

    Initial release of the API that powered the PIP soft-launch on February 9, 2022

    +
    +
    + +
    • Added a NEWS.md file to track changes to the package.
    • +
    +
    + + + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/pkgdown.css b/docs/pkgdown.css new file mode 100644 index 00000000..80ea5b83 --- /dev/null +++ b/docs/pkgdown.css @@ -0,0 +1,384 @@ +/* Sticky footer */ + +/** + * Basic idea: https://philipwalton.github.io/solved-by-flexbox/demos/sticky-footer/ + * Details: https://github.com/philipwalton/solved-by-flexbox/blob/master/assets/css/components/site.css + * + * .Site -> body > .container + * .Site-content -> body > .container .row + * .footer -> footer + * + * Key idea seems to be to ensure that .container and __all its parents__ + * have height set to 100% + * + */ + +html, body { + height: 100%; +} + +body { + position: relative; +} + +body > .container { + display: flex; + height: 100%; + flex-direction: column; +} + +body > .container .row { + flex: 1 0 auto; +} + +footer { + margin-top: 45px; + padding: 35px 0 36px; + border-top: 1px solid #e5e5e5; + color: #666; + display: flex; + flex-shrink: 0; +} +footer p { + margin-bottom: 0; +} +footer div { + flex: 1; +} +footer .pkgdown { + text-align: right; +} +footer p { + margin-bottom: 0; +} + +img.icon { + float: right; +} + +/* Ensure in-page images don't run outside their container */ +.contents img { + max-width: 100%; + height: auto; +} + +/* Fix bug in bootstrap (only seen in firefox) */ +summary { + display: list-item; +} + +/* Typographic tweaking ---------------------------------*/ + +.contents .page-header { + margin-top: calc(-60px + 1em); +} + +dd { + margin-left: 3em; +} + +/* Section anchors ---------------------------------*/ + +a.anchor { + display: none; + margin-left: 5px; + width: 20px; + height: 20px; + + background-image: url(./link.svg); + background-repeat: no-repeat; + background-size: 20px 20px; + background-position: center center; +} + +h1:hover .anchor, +h2:hover .anchor, +h3:hover .anchor, +h4:hover .anchor, +h5:hover .anchor, +h6:hover .anchor { + display: inline-block; +} + +/* Fixes for fixed navbar --------------------------*/ + +.contents h1, .contents h2, .contents h3, .contents h4 { + padding-top: 60px; + margin-top: -40px; +} + +/* Navbar submenu --------------------------*/ + +.dropdown-submenu { + position: relative; +} + +.dropdown-submenu>.dropdown-menu { + top: 0; + left: 100%; + margin-top: -6px; + margin-left: -1px; + border-radius: 0 6px 6px 6px; +} + +.dropdown-submenu:hover>.dropdown-menu { + display: block; +} + +.dropdown-submenu>a:after { + display: block; + content: " "; + float: right; + width: 0; + height: 0; + border-color: transparent; + border-style: solid; + border-width: 5px 0 5px 5px; + border-left-color: #cccccc; + margin-top: 5px; + margin-right: -10px; +} + +.dropdown-submenu:hover>a:after { + border-left-color: #ffffff; +} + +.dropdown-submenu.pull-left { + float: none; +} + +.dropdown-submenu.pull-left>.dropdown-menu { + left: -100%; + margin-left: 10px; + border-radius: 6px 0 6px 6px; +} + +/* Sidebar --------------------------*/ + +#pkgdown-sidebar { + margin-top: 30px; + position: -webkit-sticky; + position: sticky; + top: 70px; +} + +#pkgdown-sidebar h2 { + font-size: 1.5em; + margin-top: 1em; +} + +#pkgdown-sidebar h2:first-child { + margin-top: 0; +} + +#pkgdown-sidebar .list-unstyled li { + margin-bottom: 0.5em; +} + +/* bootstrap-toc tweaks ------------------------------------------------------*/ + +/* All levels of nav */ + +nav[data-toggle='toc'] .nav > li > a { + padding: 4px 20px 4px 6px; + font-size: 1.5rem; + font-weight: 400; + color: inherit; +} + +nav[data-toggle='toc'] .nav > li > a:hover, +nav[data-toggle='toc'] .nav > li > a:focus { + padding-left: 5px; + color: inherit; + border-left: 1px solid #878787; +} + +nav[data-toggle='toc'] .nav > .active > a, +nav[data-toggle='toc'] .nav > .active:hover > a, +nav[data-toggle='toc'] .nav > .active:focus > a { + padding-left: 5px; + font-size: 1.5rem; + font-weight: 400; + color: inherit; + border-left: 2px solid #878787; +} + +/* Nav: second level (shown on .active) */ + +nav[data-toggle='toc'] .nav .nav { + display: none; /* Hide by default, but at >768px, show it */ + padding-bottom: 10px; +} + +nav[data-toggle='toc'] .nav .nav > li > a { + padding-left: 16px; + font-size: 1.35rem; +} + +nav[data-toggle='toc'] .nav .nav > li > a:hover, +nav[data-toggle='toc'] .nav .nav > li > a:focus { + padding-left: 15px; +} + +nav[data-toggle='toc'] .nav .nav > .active > a, +nav[data-toggle='toc'] .nav .nav > .active:hover > a, +nav[data-toggle='toc'] .nav .nav > .active:focus > a { + padding-left: 15px; + font-weight: 500; + font-size: 1.35rem; +} + +/* orcid ------------------------------------------------------------------- */ + +.orcid { + font-size: 16px; + color: #A6CE39; + /* margins are required by official ORCID trademark and display guidelines */ + margin-left:4px; + margin-right:4px; + vertical-align: middle; +} + +/* Reference index & topics ----------------------------------------------- */ + +.ref-index th {font-weight: normal;} + +.ref-index td {vertical-align: top; min-width: 100px} +.ref-index .icon {width: 40px;} +.ref-index .alias {width: 40%;} +.ref-index-icons .alias {width: calc(40% - 40px);} +.ref-index .title {width: 60%;} + +.ref-arguments th {text-align: right; padding-right: 10px;} +.ref-arguments th, .ref-arguments td {vertical-align: top; min-width: 100px} +.ref-arguments .name {width: 20%;} +.ref-arguments .desc {width: 80%;} + +/* Nice scrolling for wide elements --------------------------------------- */ + +table { + display: block; + overflow: auto; +} + +/* Syntax highlighting ---------------------------------------------------- */ + +pre, code, pre code { + background-color: #f8f8f8; + color: #333; +} +pre, pre code { + white-space: pre-wrap; + word-break: break-all; + overflow-wrap: break-word; +} + +pre { + border: 1px solid #eee; +} + +pre .img, pre .r-plt { + margin: 5px 0; +} + +pre .img img, pre .r-plt img { + background-color: #fff; +} + +code a, pre a { + color: #375f84; +} + +a.sourceLine:hover { + text-decoration: none; +} + +.fl {color: #1514b5;} +.fu {color: #000000;} /* function */ +.ch,.st {color: #036a07;} /* string */ +.kw {color: #264D66;} /* keyword */ +.co {color: #888888;} /* comment */ + +.error {font-weight: bolder;} +.warning {font-weight: bolder;} + +/* Clipboard --------------------------*/ + +.hasCopyButton { + position: relative; +} + +.btn-copy-ex { + position: absolute; + right: 0; + top: 0; + visibility: hidden; +} + +.hasCopyButton:hover button.btn-copy-ex { + visibility: visible; +} + +/* headroom.js ------------------------ */ + +.headroom { + will-change: transform; + transition: transform 200ms linear; +} +.headroom--pinned { + transform: translateY(0%); +} +.headroom--unpinned { + transform: translateY(-100%); +} + +/* mark.js ----------------------------*/ + +mark { + background-color: rgba(255, 255, 51, 0.5); + border-bottom: 2px solid rgba(255, 153, 51, 0.3); + padding: 1px; +} + +/* vertical spacing after htmlwidgets */ +.html-widget { + margin-bottom: 10px; +} + +/* fontawesome ------------------------ */ + +.fab { + font-family: "Font Awesome 5 Brands" !important; +} + +/* don't display links in code chunks when printing */ +/* source: https://stackoverflow.com/a/10781533 */ +@media print { + code a:link:after, code a:visited:after { + content: ""; + } +} + +/* Section anchors --------------------------------- + Added in pandoc 2.11: https://github.com/jgm/pandoc-templates/commit/9904bf71 +*/ + +div.csl-bib-body { } +div.csl-entry { + clear: both; +} +.hanging-indent div.csl-entry { + margin-left:2em; + text-indent:-2em; +} +div.csl-left-margin { + min-width:2em; + float:left; +} +div.csl-right-inline { + margin-left:2em; + padding-left:1em; +} +div.csl-indent { + margin-left: 2em; +} diff --git a/docs/pkgdown.js b/docs/pkgdown.js new file mode 100644 index 00000000..6f0eee40 --- /dev/null +++ b/docs/pkgdown.js @@ -0,0 +1,108 @@ +/* http://gregfranko.com/blog/jquery-best-practices/ */ +(function($) { + $(function() { + + $('.navbar-fixed-top').headroom(); + + $('body').css('padding-top', $('.navbar').height() + 10); + $(window).resize(function(){ + $('body').css('padding-top', $('.navbar').height() + 10); + }); + + $('[data-toggle="tooltip"]').tooltip(); + + var cur_path = paths(location.pathname); + var links = $("#navbar ul li a"); + var max_length = -1; + var pos = -1; + for (var i = 0; i < links.length; i++) { + if (links[i].getAttribute("href") === "#") + continue; + // Ignore external links + if (links[i].host !== location.host) + continue; + + var nav_path = paths(links[i].pathname); + + var length = prefix_length(nav_path, cur_path); + if (length > max_length) { + max_length = length; + pos = i; + } + } + + // Add class to parent
  • , and enclosing
  • if in dropdown + if (pos >= 0) { + var menu_anchor = $(links[pos]); + menu_anchor.parent().addClass("active"); + menu_anchor.closest("li.dropdown").addClass("active"); + } + }); + + function paths(pathname) { + var pieces = pathname.split("/"); + pieces.shift(); // always starts with / + + var end = pieces[pieces.length - 1]; + if (end === "index.html" || end === "") + pieces.pop(); + return(pieces); + } + + // Returns -1 if not found + function prefix_length(needle, haystack) { + if (needle.length > haystack.length) + return(-1); + + // Special case for length-0 haystack, since for loop won't run + if (haystack.length === 0) { + return(needle.length === 0 ? 0 : -1); + } + + for (var i = 0; i < haystack.length; i++) { + if (needle[i] != haystack[i]) + return(i); + } + + return(haystack.length); + } + + /* Clipboard --------------------------*/ + + function changeTooltipMessage(element, msg) { + var tooltipOriginalTitle=element.getAttribute('data-original-title'); + element.setAttribute('data-original-title', msg); + $(element).tooltip('show'); + element.setAttribute('data-original-title', tooltipOriginalTitle); + } + + if(ClipboardJS.isSupported()) { + $(document).ready(function() { + var copyButton = ""; + + $("div.sourceCode").addClass("hasCopyButton"); + + // Insert copy buttons: + $(copyButton).prependTo(".hasCopyButton"); + + // Initialize tooltips: + $('.btn-copy-ex').tooltip({container: 'body'}); + + // Initialize clipboard: + var clipboardBtnCopies = new ClipboardJS('[data-clipboard-copy]', { + text: function(trigger) { + return trigger.parentNode.textContent.replace(/\n#>[^\n]*/g, ""); + } + }); + + clipboardBtnCopies.on('success', function(e) { + changeTooltipMessage(e.trigger, 'Copied!'); + e.clearSelection(); + }); + + clipboardBtnCopies.on('error', function() { + changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy'); + }); + }); + } +})(window.jQuery || window.$) diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml new file mode 100644 index 00000000..c232a2e4 --- /dev/null +++ b/docs/pkgdown.yml @@ -0,0 +1,12 @@ +pandoc: '3.2' +pkgdown: 2.0.7 +pkgdown_sha: ~ +articles: + debug-caching: debug-caching.html + duckdb-caching: duckdb-caching.html + new-endpoints: new-endpoints.html +last_built: 2025-01-13T15:48Z +urls: + reference: https://pip-technical-team.github.io/pipapi/reference + article: https://pip-technical-team.github.io/pipapi/articles + diff --git a/docs/reference/Rplot001.png b/docs/reference/Rplot001.png new file mode 100644 index 0000000000000000000000000000000000000000..17a358060aed2a86950757bbd25c6f92c08c458f GIT binary patch literal 1011 zcmeAS@N?(olHy`uVBq!ia0y~yV0-|=9Be?5+AI5}0x7m6Z+90U4Fo@(ch>_c&H|6f zVg?3oArNM~bhqvg0|WD9PZ!6KiaBo&GBN^{G%5UFpXcEKVvd5*5Eu=C0SJK)8A6*F U7`aXvEC5;V>FVdQ&MBb@00SN#Z2$lO literal 0 HcmV?d00001 diff --git a/docs/reference/add_agg_medians.html b/docs/reference/add_agg_medians.html new file mode 100644 index 00000000..0785a723 --- /dev/null +++ b/docs/reference/add_agg_medians.html @@ -0,0 +1,121 @@ + +Add Aggregate medians — add_agg_medians • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Add Aggregate medians

    +
    + +
    +
    add_agg_medians(df, fill_gaps, data_dir)
    +
    + +
    +

    Arguments

    +
    df
    +

    data frame from either fg_pip or rg_pip

    + + +
    fill_gaps
    +

    logical: If set to TRUE, will interpolate / extrapolate +values for missing years

    + + +
    data_dir
    +

    character: Directory path of auxiliary data. Usually +lkup$data_root

    + +
    +
    +

    Value

    + + +

    data.table

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/add_dist_stats.html b/docs/reference/add_dist_stats.html new file mode 100644 index 00000000..3f175285 --- /dev/null +++ b/docs/reference/add_dist_stats.html @@ -0,0 +1,115 @@ + +Add pre-computed distributional stats — add_dist_stats • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Add pre-computed distributional stats

    +
    + +
    +
    add_dist_stats(df, dist_stats)
    +
    + +
    +

    Arguments

    +
    df
    +

    data.table: Data frame of poverty statistics

    + + +
    dist_stats
    +

    data.table: Distributional stats lookup

    + +
    +
    +

    Value

    + + +

    data.table

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/add_distribution_type.html b/docs/reference/add_distribution_type.html new file mode 100644 index 00000000..185c116e --- /dev/null +++ b/docs/reference/add_distribution_type.html @@ -0,0 +1,120 @@ + +Add Distribution type — add_distribution_type • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Add Distribution type

    +
    + +
    +
    add_distribution_type(df, lkup, fill_gaps)
    +
    + +
    +

    Arguments

    +
    df
    +

    data frame from fg_pip or rg_pip

    + + +
    lkup
    +

    list: lookup table

    + + +
    fill_gaps
    +

    logical: If set to TRUE, will interpolate / extrapolate +values for missing years

    + +
    +
    +

    Value

    + + +

    data.table

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/add_pg.html b/docs/reference/add_pg.html new file mode 100644 index 00000000..de112e20 --- /dev/null +++ b/docs/reference/add_pg.html @@ -0,0 +1,121 @@ + +Add Prosperity Gap — add_pg • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Add Prosperity Gap

    +
    + +
    +
    add_pg(df, fill_gaps, data_dir)
    +
    + +
    +

    Arguments

    +
    df
    +

    data frame inside fg_pip or rg_pip

    + + +
    fill_gaps
    +

    logical: If set to TRUE, will interpolate / extrapolate +values for missing years

    + + +
    data_dir
    +

    character: Directory path of auxiliary data. Usually +lkup$data_root

    + +
    +
    +

    Value

    + + +

    data.table

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/add_spl.html b/docs/reference/add_spl.html new file mode 100644 index 00000000..7cd4c3c3 --- /dev/null +++ b/docs/reference/add_spl.html @@ -0,0 +1,121 @@ + +Add SPL indicators to either fg* or rg PIP output — add_spl • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Add SPL indicators to either fg* or rg PIP output

    +
    + +
    +
    add_spl(df, fill_gaps, data_dir)
    +
    + +
    +

    Arguments

    +
    df
    +

    data frame inside fg_pip or rg_pip

    + + +
    fill_gaps
    +

    logical: If set to TRUE, will interpolate / extrapolate +values for missing years

    + + +
    data_dir
    +

    character: Directory path of auxiliary data. Usually +lkup$data_root

    + +
    +
    +

    Value

    + + +

    data.table

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/add_vars_out_of_pipeline.html b/docs/reference/add_vars_out_of_pipeline.html new file mode 100644 index 00000000..d3d7c306 --- /dev/null +++ b/docs/reference/add_vars_out_of_pipeline.html @@ -0,0 +1,118 @@ + +Add all the variables that are estimated outside the pipelines — add_vars_out_of_pipeline • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    This includes variables such as the SPL, SPR, PG, and distribution +type. Any other variables will be included here

    +
    + +
    +
    add_vars_out_of_pipeline(out, fill_gaps, lkup)
    +
    + +
    +

    Arguments

    +
    fill_gaps
    +

    logical: If set to TRUE, will interpolate / extrapolate +values for missing years

    + + +
    lkup
    +

    list: lookup table

    + +
    +
    +

    Value

    + + +

    data.table from pip or pip_grp functions.

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/assign_serializer.html b/docs/reference/assign_serializer.html new file mode 100644 index 00000000..c1d1e852 --- /dev/null +++ b/docs/reference/assign_serializer.html @@ -0,0 +1,111 @@ + +Helper function to return correct serializer — assign_serializer • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Helper function to return correct serializer

    +
    + +
    +
    assign_serializer(format)
    +
    + +
    +

    Arguments

    +
    format
    +

    characer: Response format. Options are "json", "csv", or "rds"

    + +
    +
    +

    Value

    + + +

    serializer function

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/available_versions.html b/docs/reference/available_versions.html new file mode 100644 index 00000000..acfc36b1 --- /dev/null +++ b/docs/reference/available_versions.html @@ -0,0 +1,111 @@ + +Sorted available PIP versions in data directory — available_versions • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Sorted available PIP versions in data directory

    +
    + +
    +
    available_versions(data_dir)
    +
    + +
    +

    Arguments

    +
    data_dir
    +

    character: data directory

    + +
    +
    +

    Value

    + + +

    character vector of sorted available PIP versions in data directory

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/censor_stats.html b/docs/reference/censor_stats.html new file mode 100644 index 00000000..79c08891 --- /dev/null +++ b/docs/reference/censor_stats.html @@ -0,0 +1,109 @@ + +Censor stats — censor_stats • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Censor stats

    +
    + +
    +
    censor_stats(df, censored_table)
    +
    + +
    +

    Arguments

    +
    df
    +

    data.table: Table to censor.

    + + +
    censored_table
    +

    data.table: Censor table

    + +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/change_grouped_stats_to_csv.html b/docs/reference/change_grouped_stats_to_csv.html new file mode 100644 index 00000000..853b3632 --- /dev/null +++ b/docs/reference/change_grouped_stats_to_csv.html @@ -0,0 +1,111 @@ + +Change the list-output to dataframe — change_grouped_stats_to_csv • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Change the list-output to dataframe

    +
    + +
    +
    change_grouped_stats_to_csv(out)
    +
    + +
    +

    Arguments

    +
    out
    +

    output from wbpip::gd_compute_pip_stats

    + +
    +
    +

    Value

    + + +

    dataframe

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/citation_from_version.html b/docs/reference/citation_from_version.html new file mode 100644 index 00000000..5d6c05d8 --- /dev/null +++ b/docs/reference/citation_from_version.html @@ -0,0 +1,111 @@ + +Return citation from the version — citation_from_version • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Return citation from the version

    +
    + +
    +
    citation_from_version(version)
    +
    + +
    +

    Arguments

    +
    version
    +

    character vector of data version

    + +
    +
    +

    Value

    + + +

    character. Text containing citation for the version passed.

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/clear_cache.html b/docs/reference/clear_cache.html new file mode 100644 index 00000000..08d42251 --- /dev/null +++ b/docs/reference/clear_cache.html @@ -0,0 +1,116 @@ + +Clear cache +Clear cache directory if available — clear_cache • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Clear cache +Clear cache directory if available

    +
    + +
    +
    clear_cache(cd)
    +
    + +
    +

    Arguments

    +
    cd
    +

    A cachem::cache_disk() object

    + +
    +
    +

    Value

    + + +

    list

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/create_countries_vctr.html b/docs/reference/create_countries_vctr.html new file mode 100644 index 00000000..15ccdbae --- /dev/null +++ b/docs/reference/create_countries_vctr.html @@ -0,0 +1,128 @@ + +Create countries vectors — create_countries_vctr • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    This functions selects the correct countries to be used in the aggregates +selected by the user, either official or alternative aggregates.

    +
    + +
    +
    create_countries_vctr(country, year, valid_years, aux_files)
    +
    + +
    +

    Arguments

    +
    country
    +

    character: Country ISO 3 codes

    + + +
    year
    +

    integer: Reporting year

    + + +
    valid_years
    +

    list: Valid years information provided through lkup object

    + + +
    aux_files
    +

    list: List of auxiliary tables provided through lkup object

    + +
    +
    +

    Value

    + + +

    a list of vectors with countries and regions code to be used in +pip() and pip_grp()

    + + +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/create_etag_header.html b/docs/reference/create_etag_header.html new file mode 100644 index 00000000..ce6c97c6 --- /dev/null +++ b/docs/reference/create_etag_header.html @@ -0,0 +1,119 @@ + +create_etag_header — create_etag_header • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    helper function that creates a unique hash of code + data +this hash value will be used as the value of the etag header +to facilitate caching of PIP API responses

    +
    + +
    +
    create_etag_header(req, lkups)
    +
    + +
    +

    Arguments

    +
    req
    +

    R6 object: Plumber API request

    + + +
    lkups
    +

    list: pipapi master lkups

    + +
    +
    +

    Value

    + + +

    character

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/create_lkups.html b/docs/reference/create_lkups.html new file mode 100644 index 00000000..70ad52ee --- /dev/null +++ b/docs/reference/create_lkups.html @@ -0,0 +1,115 @@ + +Create look-up tables — create_lkups • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Create look-up tables that can be passed to pip().

    +
    + +
    +
    create_lkups(data_dir, versions)
    +
    + +
    +

    Arguments

    +
    data_dir
    +

    character: Path to PIP data root folder.

    + + +
    versions
    +

    character: Available data versions

    + +
    +
    +

    Value

    + + +

    list

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/create_return_cols.html b/docs/reference/create_return_cols.html new file mode 100644 index 00000000..620f3a2c --- /dev/null +++ b/docs/reference/create_return_cols.html @@ -0,0 +1,111 @@ + +helper function to create a list of return columns for various pipapi functions — create_return_cols • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    helper function to create a list of return columns for various pipapi functions

    +
    + +
    +
    create_return_cols(...)
    +
    + +
    +

    Arguments

    +
    ...
    +

    Named vectors of columns to be returned

    + +
    +
    +

    Value

    + + +

    list

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/create_versioned_lkups.html b/docs/reference/create_versioned_lkups.html new file mode 100644 index 00000000..0ab325b1 --- /dev/null +++ b/docs/reference/create_versioned_lkups.html @@ -0,0 +1,116 @@ + +Create one list of lookups per data version — create_versioned_lkups • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Create one list of lookups per data version

    +
    + +
    +
    create_versioned_lkups(data_dir, vintage_pattern = NULL)
    +
    + +
    +

    Arguments

    +
    data_dir
    +

    character: Path to the main data directory

    + + +
    vintage_pattern
    +

    character: regex that identifies the name pattern of +vintage folders

    + +
    +
    +

    Value

    + + +

    list

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/create_vintage_pattern_call.html b/docs/reference/create_vintage_pattern_call.html new file mode 100644 index 00000000..ec7c3cf3 --- /dev/null +++ b/docs/reference/create_vintage_pattern_call.html @@ -0,0 +1,131 @@ + +create vintage call to be parsed into get_vintage_pattern_regex() — create_vintage_pattern_call • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    create vintage call to be parsed into get_vintage_pattern_regex()

    +
    + +
    +
    create_vintage_pattern_call(vintage_pattern = NULL)
    +
    + +
    +

    Arguments

    +
    vintage_pattern
    +

    either NULL, chracter with regex or list of arguments +for get_vintage_pattern_regex()

    + +
    +
    +

    Value

    + + +

    list to be parses t get_vintage_pattern_regex()

    + + +
    + +
    +

    Examples

    +
    if (FALSE) {
    +vintage_pattern <- NULL
    +create_vintage_pattern_call(vintage_pattern)
    +
    +vintage_pattern <- list("r.*", "", "^hjkhj\\.d")
    +create_vintage_pattern_call(vintage_pattern)
    +
    +vintage_pattern <- c("r.*", "", "^hjkhj\\.d")
    +create_vintage_pattern_call(vintage_pattern)
    +
    +vintage_pattern <- c(vintage_pattern = "r.*", test_regex = "", int_regex =  "^hjkhj\\.d")
    +create_vintage_pattern_call(vintage_pattern)
    +}
    +
    +
    +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/empty_response.html b/docs/reference/empty_response.html new file mode 100644 index 00000000..bfe3ac52 --- /dev/null +++ b/docs/reference/empty_response.html @@ -0,0 +1,103 @@ + +Empty response schema — empty_response • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    A template for empty responses

    +
    + +
    +
    empty_response
    +
    + +
    +

    Format

    +

    An object of class data.table (inherits from data.frame) with 0 rows and 44 columns.

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/empty_response_cp_poverty.html b/docs/reference/empty_response_cp_poverty.html new file mode 100644 index 00000000..9a341507 --- /dev/null +++ b/docs/reference/empty_response_cp_poverty.html @@ -0,0 +1,103 @@ + +List of two datasets pov_trend and pov_mrv — empty_response_cp_poverty • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    List of two datasets pov_trend and pov_mrv

    +
    + +
    +
    data(empty_response_cp_poverty)
    +
    + +
    +

    Format

    +

    A list with 2 dataframes

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/empty_response_grp.html b/docs/reference/empty_response_grp.html new file mode 100644 index 00000000..45523957 --- /dev/null +++ b/docs/reference/empty_response_grp.html @@ -0,0 +1,103 @@ + +Dataframe for grouped empty response — empty_response_grp • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Dataframe for grouped empty response

    +
    + +
    +
    data(empty_response_grp)
    +
    + +
    +

    Format

    +

    Data frame with 0 rows and 12 columns

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/estimate_type_ctr_lnp.html b/docs/reference/estimate_type_ctr_lnp.html new file mode 100644 index 00000000..74b3e20c --- /dev/null +++ b/docs/reference/estimate_type_ctr_lnp.html @@ -0,0 +1,115 @@ + +Add estimate_type var to lineup at the country level — estimate_type_ctr_lnp • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Add estimate_type var to lineup at the country level

    +
    + +
    +
    estimate_type_ctr_lnp(out, lkup)
    +
    + +
    +

    Arguments

    +
    out
    +

    current data base

    + + +
    lkup
    +

    lkup list

    + +
    +
    +

    Value

    + + +

    out database with estimate_type variable

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/estimate_type_var.html b/docs/reference/estimate_type_var.html new file mode 100644 index 00000000..771cae1c --- /dev/null +++ b/docs/reference/estimate_type_var.html @@ -0,0 +1,109 @@ + +projection variables — estimate_type_var • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    It also censors specific stats

    +
    + +
    +
    estimate_type_var(df, lkup)
    +
    + +
    +

    Arguments

    +
    df
    +

    data.table: Table to censor.

    + + +
    censored_table
    +

    data.table: Censor table

    + +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/extract_identity.html b/docs/reference/extract_identity.html new file mode 100644 index 00000000..ab5d72b2 --- /dev/null +++ b/docs/reference/extract_identity.html @@ -0,0 +1,111 @@ + +Return identity from the version of the data — extract_identity • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Return identity from the version of the data

    +
    + +
    +
    extract_identity(version)
    +
    + +
    +

    Arguments

    +
    version
    +

    character vector of data version

    + +
    +
    +

    Value

    + + +

    character vector of identity

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/extract_ppp_date.html b/docs/reference/extract_ppp_date.html new file mode 100644 index 00000000..6a40e19f --- /dev/null +++ b/docs/reference/extract_ppp_date.html @@ -0,0 +1,111 @@ + +Return the ppp date from the version of the data — extract_ppp_date • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Return the ppp date from the version of the data

    +
    + +
    +
    extract_ppp_date(version)
    +
    + +
    +

    Arguments

    +
    version
    +

    character vector of data version

    + +
    +
    +

    Value

    + + +

    Date of ppp

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/extract_release_date.html b/docs/reference/extract_release_date.html new file mode 100644 index 00000000..9793f846 --- /dev/null +++ b/docs/reference/extract_release_date.html @@ -0,0 +1,111 @@ + +Return the release date from the version of the data — extract_release_date • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Return the release date from the version of the data

    +
    + +
    +
    extract_release_date(version)
    +
    + +
    +

    Arguments

    +
    version
    +

    character vector of data version

    + +
    +
    +

    Value

    + + +

    Date of release

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/fg_assign_nas_values_to_dup_cols.html b/docs/reference/fg_assign_nas_values_to_dup_cols.html new file mode 100644 index 00000000..7a6a061a --- /dev/null +++ b/docs/reference/fg_assign_nas_values_to_dup_cols.html @@ -0,0 +1,115 @@ + +Coerce variable causing potential duplicates to NAs — fg_assign_nas_values_to_dup_cols • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Coerce variable causing potential duplicates to NAs

    +
    + +
    +
    fg_assign_nas_values_to_dup_cols(df, cols)
    +
    + +
    +

    Arguments

    +
    df
    +

    data.table: Table of results created in fg_pip()

    + + +
    cols
    +

    character: Columns with potential duplicate values

    + +
    +
    +

    Value

    + + +

    data.table

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/fg_pip.html b/docs/reference/fg_pip.html new file mode 100644 index 00000000..10e07ad5 --- /dev/null +++ b/docs/reference/fg_pip.html @@ -0,0 +1,154 @@ + +Compute imputed year stats — fg_pip • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Compute the main PIP poverty and inequality statistics for imputed years.

    +
    + +
    +
    fg_pip(
    +  country,
    +  year,
    +  povline,
    +  popshare,
    +  welfare_type,
    +  reporting_level,
    +  ppp,
    +  lkup,
    +  con
    +)
    +
    + +
    +

    Arguments

    +
    country
    +

    character: Country ISO 3 codes

    + + +
    year
    +

    integer: Reporting year

    + + +
    povline
    +

    numeric: Poverty line

    + + +
    popshare
    +

    numeric: Proportion of the population living below the +poverty line

    + + +
    welfare_type
    +

    character: Welfare type

    + + +
    reporting_level
    +

    character: Geographical reporting level

    + + +
    ppp
    +

    numeric: Custom Purchase Power Parity value

    + + +
    lkup
    +

    list: A list of lkup tables

    + + +
    con
    +

    duckdb connection object

    + +
    +
    +

    Value

    + + +

    data.frame

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/fg_remove_duplicates.html b/docs/reference/fg_remove_duplicates.html new file mode 100644 index 00000000..efce9685 --- /dev/null +++ b/docs/reference/fg_remove_duplicates.html @@ -0,0 +1,122 @@ + +Remove duplicated rows created during the interpolation process — fg_remove_duplicates • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Remove duplicated rows created during the interpolation process

    +
    + +
    +
    fg_remove_duplicates(
    +  df,
    +  cols = c("comparable_spell", "cpi", "display_cp", "gd_type", "interpolation_id",
    +    "path", "predicted_mean_ppp", "survey_acronym", "survey_comparability",
    +    "survey_coverage", "survey_id", "survey_mean_lcu", "survey_mean_ppp",
    +    "survey_median_lcu", "survey_median_ppp", "survey_time", "survey_year",
    +    "surveyid_year")
    +)
    +
    + +
    +

    Arguments

    +
    df
    +

    data.table: Table of results created in fg_pip()

    + + +
    cols
    +

    character: Columns with potential duplicate values

    + +
    +
    +

    Value

    + + +

    data.table

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/fg_standardize_cache_id.html b/docs/reference/fg_standardize_cache_id.html new file mode 100644 index 00000000..6f48b67b --- /dev/null +++ b/docs/reference/fg_standardize_cache_id.html @@ -0,0 +1,119 @@ + +Standardize cache_id format to avoid duplication of rows — fg_standardize_cache_id • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Standardize cache_id format to avoid duplication of rows

    +
    + +
    +
    fg_standardize_cache_id(cache_id, interpolation_id, reporting_level)
    +
    + +
    +

    Arguments

    +
    cache_id
    +

    character

    + + +
    interpolation_id
    +

    character

    + + +
    reporting_level
    +

    character

    + +
    +
    +

    Value

    + + +

    character

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/fillin_list.html b/docs/reference/fillin_list.html new file mode 100644 index 00000000..32488f41 --- /dev/null +++ b/docs/reference/fillin_list.html @@ -0,0 +1,141 @@ + +Populate list in parent frame — fillin_list • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Fill in maned objects of a list with the value of named objects in the +parent frame in which the list has been created. This objects must have the +same names as the objects of the list

    +
    + +
    +
    fillin_list(l, assign = TRUE)
    +
    + +
    +

    Arguments

    +
    l
    +

    list to populate with names objects

    + + +
    assign
    +

    logical: whether to assign to parent frame

    + +
    +
    +

    Value

    + + +

    invisible list l populated with objects of the same frame

    +
    + +
    +

    Examples

    +
    l <- list(x = NULL,
    +y = NULL,
    +z = NULL)
    +
    +x <-  2
    +y <-  "f"
    +z <- TRUE
    +fillin_list(l)
    +l
    +#> $x
    +#> [1] 2
    +#> 
    +#> $y
    +#> [1] "f"
    +#> 
    +#> $z
    +#> [1] TRUE
    +#> 
    +
    +
    +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/filter_lkup.html b/docs/reference/filter_lkup.html new file mode 100644 index 00000000..67c3322d --- /dev/null +++ b/docs/reference/filter_lkup.html @@ -0,0 +1,130 @@ + +Helper to filter metadata +aggregate distribution need to be filtered out when popshare is not null +This is a temporary function until a full fix is implemented, and popshare is +supported for all distributions — filter_lkup • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Helper to filter metadata +aggregate distribution need to be filtered out when popshare is not null +This is a temporary function until a full fix is implemented, and popshare is +supported for all distributions

    +
    + +
    +
    filter_lkup(metadata, popshare)
    +
    + +
    +

    Arguments

    +
    metadata
    +

    data.frame: Output of subset_lkup()

    + + +
    popshare
    +

    numeric: popshare value passed to pip()

    + +
    +
    +

    Value

    + + +

    data.frame

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/filter_md.html b/docs/reference/filter_md.html new file mode 100644 index 00000000..fe21dc3c --- /dev/null +++ b/docs/reference/filter_md.html @@ -0,0 +1,119 @@ + +Helper function to filter missing data table — filter_md • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Helper function to filter missing data table

    +
    + +
    +
    filter_md(md, ctr_alt_agg, year)
    +
    + +
    +

    Arguments

    +
    md
    +

    data.frame: Table of countries with missing data

    + + +
    ctr_alt_agg
    +

    character: Countries from alternate aggregates

    + + +
    year
    +

    character: year

    + +
    +
    +

    Value

    + + +

    data.table

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/get_additional_indicators.html b/docs/reference/get_additional_indicators.html new file mode 100644 index 00000000..1d111e38 --- /dev/null +++ b/docs/reference/get_additional_indicators.html @@ -0,0 +1,111 @@ + +Add set of extra indicators to pip output — get_additional_indicators • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Add set of extra indicators to pip output

    +
    + +
    +
    get_additional_indicators(dt)
    +
    + +
    +

    Arguments

    +
    dt
    +

    data.frame: country level out from PIP

    + +
    +
    +

    Value

    + + +

    data.frame

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/get_additional_indicators_grp.html b/docs/reference/get_additional_indicators_grp.html new file mode 100644 index 00000000..8060b685 --- /dev/null +++ b/docs/reference/get_additional_indicators_grp.html @@ -0,0 +1,111 @@ + +Add set of extra indicators to pip output in aggregate data. — get_additional_indicators_grp • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Add set of extra indicators to pip output in aggregate data.

    +
    + +
    +
    get_additional_indicators_grp(dt)
    +
    + +
    +

    Arguments

    +
    dt
    +

    data.frame: global/regional level from PIP-grp

    + +
    +
    +

    Value

    + + +

    data.frame

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/get_aux_table.html b/docs/reference/get_aux_table.html new file mode 100644 index 00000000..397d250e --- /dev/null +++ b/docs/reference/get_aux_table.html @@ -0,0 +1,119 @@ + +Return specified auxiliary data — get_aux_table • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Return specified auxiliary data

    +
    + +
    +
    get_aux_table(data_dir, table, long_format = FALSE)
    +
    + +
    +

    Arguments

    +
    data_dir
    +

    character: Data directory

    + + +
    table
    +

    character: Name of auxiliary table

    + + +
    long_format
    +

    logical: do you want data long format ? (default FALSE)

    + +
    +
    +

    Value

    + + +

    data.frame

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/get_aux_table_ui.html b/docs/reference/get_aux_table_ui.html new file mode 100644 index 00000000..e5489978 --- /dev/null +++ b/docs/reference/get_aux_table_ui.html @@ -0,0 +1,120 @@ + +Return specified auxiliary data in wide format +Helper function to the UI — get_aux_table_ui • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Return specified auxiliary data in wide format +Helper function to the UI

    +
    + +
    +
    get_aux_table_ui(data_dir, table)
    +
    + +
    +

    Arguments

    +
    data_dir
    +

    character: Data directory

    + + +
    table
    +

    character: Name of auxiliary table

    + +
    +
    +

    Value

    + + +

    data.frame

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/get_caller_names.html b/docs/reference/get_caller_names.html new file mode 100644 index 00000000..238ac6d0 --- /dev/null +++ b/docs/reference/get_caller_names.html @@ -0,0 +1,105 @@ + +Get functions names in call stack — get_caller_names • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Get functions names in call stack

    +
    + +
    +
    get_caller_names()
    +
    + +
    +

    Value

    + + +

    character vector of calls

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/get_ctr_alt_agg.html b/docs/reference/get_ctr_alt_agg.html new file mode 100644 index 00000000..36b86a86 --- /dev/null +++ b/docs/reference/get_ctr_alt_agg.html @@ -0,0 +1,158 @@ + +Helper function to retrieve the required countries +needed to compute alternative aggregates requested by user +Get countries that belong to aggregates requested by the user that are NOT +official but alternative aggregates. We need to find out missing data +estimates only for those countries. For instance, if the user requested LAC +and AFE, we don't care about the the countries with missing data in the LAC +because their estimates are done implicitly. We DO care about the estimates +of the missing countries in AFE because we need the explicit SSA estimates. — get_ctr_alt_agg • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Helper function to retrieve the required countries +needed to compute alternative aggregates requested by user +Get countries that belong to aggregates requested by the user that are NOT +official but alternative aggregates. We need to find out missing data +estimates only for those countries. For instance, if the user requested LAC +and AFE, we don't care about the the countries with missing data in the LAC +because their estimates are done implicitly. We DO care about the estimates +of the missing countries in AFE because we need the explicit SSA estimates.

    +
    + +
    +
    get_ctr_alt_agg(user_alt_gt, user_alt_gt_code, user_alt_agg, cl)
    +
    + +
    +

    Arguments

    +
    user_alt_gt
    +

    character: Grouping type needed by user

    + + +
    user_alt_gt_code
    +

    character: Grouping type code

    + + +
    user_alt_agg
    +

    character: Alternate aggregates requested by user

    + + +
    cl
    +

    data.frame: Countries lookup table

    + +
    +
    +

    Value

    + + +

    character

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/get_grp_to_compute.html b/docs/reference/get_grp_to_compute.html new file mode 100644 index 00000000..57c05bbe --- /dev/null +++ b/docs/reference/get_grp_to_compute.html @@ -0,0 +1,123 @@ + +Helper function to retrieve the country/year pairs to be computed — get_grp_to_compute • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Helper function to retrieve the country/year pairs to be computed

    +
    + +
    +
    get_grp_to_compute(user_off_reg, md_off_reg, year, md_year)
    +
    + +
    +

    Arguments

    +
    user_off_reg
    +

    character: Official regions requested by user

    + + +
    md_off_reg
    +

    character: Missing data for official regions

    + + +
    year
    +

    character: Years

    + + +
    md_year
    +

    character: Years with missing data

    + +
    +
    +

    Value

    + + +

    data.frame

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/get_impl_ctrs.html b/docs/reference/get_impl_ctrs.html new file mode 100644 index 00000000..31b9341f --- /dev/null +++ b/docs/reference/get_impl_ctrs.html @@ -0,0 +1,128 @@ + +Helper function to retrieve the implicit country surveys present in both +alternative and official aggregates — get_impl_ctrs • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Helper function to retrieve the implicit country surveys present in both +alternative and official aggregates

    +
    + +
    +
    get_impl_ctrs(user_gt, user_gt_code, user_aggs, ctrs)
    +
    + +
    +

    Arguments

    +
    user_gt
    +

    character: Grouping type

    + + +
    user_gt_code
    +

    character: Grouping type code

    + + +
    user_aggs
    +

    character: Aggregates selected by user

    + + +
    ctrs
    +

    data.frame: Countries lookup table

    + +
    +
    +

    Value

    + + +

    character

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/get_md_vars.html b/docs/reference/get_md_vars.html new file mode 100644 index 00000000..cbca9f99 --- /dev/null +++ b/docs/reference/get_md_vars.html @@ -0,0 +1,133 @@ + +Helper function to retrieve variables needed to handle imputation of +missing data — get_md_vars • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Helper function to retrieve variables needed to handle imputation of +missing data

    +
    + +
    +
    get_md_vars(md, ctr_alt_agg, year, off_alt_agg, user_off_reg)
    +
    + +
    +

    Arguments

    +
    md
    +

    data.frame: Table of country/year with missing data

    + + +
    ctr_alt_agg
    +

    character: Countries in alternative aggregate

    + + +
    year
    +

    character: Years

    + + +
    off_alt_agg
    +

    character: Instruction about how to handle official and +alternate aggregates

    + + +
    user_off_reg
    +

    character: Official regions requested by user

    + +
    +
    +

    Value

    + + +

    list

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/get_metaregion_table.html b/docs/reference/get_metaregion_table.html new file mode 100644 index 00000000..2f60f1a7 --- /dev/null +++ b/docs/reference/get_metaregion_table.html @@ -0,0 +1,111 @@ + +load metaregion from aux data — get_metaregion_table • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    If there is no data available, return an empty data.frame

    +
    + +
    +
    get_metaregion_table(data_dir)
    +
    + +
    +

    Arguments

    +
    data_dir
    +

    character: Data directory

    + +
    +
    +

    Value

    + + +

    data.table

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/get_param_values.html b/docs/reference/get_param_values.html new file mode 100644 index 00000000..764c79d9 --- /dev/null +++ b/docs/reference/get_param_values.html @@ -0,0 +1,122 @@ + +Get valid query parameter values +Get vector of accepted query parameter values for the API — get_param_values • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Get valid query parameter values +Get vector of accepted query parameter values for the API

    +
    + +
    +
    get_param_values(
    +  lkup,
    +  version,
    +  endpoint = c("all", "aux", "pip", "pip-grp", "pip-info", "valid-params")
    +)
    +
    + +
    +

    Arguments

    +
    lkup
    +

    list: A list of lkup tables

    + + +
    version
    +

    character: Data version. Defaults to most recent version.

    + + +
    endpoint
    +

    character: the endpoint for which to return valid parameters

    + +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/get_pg_table.html b/docs/reference/get_pg_table.html new file mode 100644 index 00000000..464b99b8 --- /dev/null +++ b/docs/reference/get_pg_table.html @@ -0,0 +1,115 @@ + +Load prosperity gap table from aux data — get_pg_table • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    If there is no data available, return an empty data.frame

    +
    + +
    +
    get_pg_table(data_dir, table = c("pg_svy", "pg_lnp"))
    +
    + +
    +

    Arguments

    +
    data_dir
    +

    character: Data directory

    + + +
    table
    +

    character: Name of auxiliary table

    + +
    +
    +

    Value

    + + +

    data.table

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/get_pip_version.html b/docs/reference/get_pip_version.html new file mode 100644 index 00000000..c99fd156 --- /dev/null +++ b/docs/reference/get_pip_version.html @@ -0,0 +1,115 @@ + +Return the versions of the pip packages used for computations — get_pip_version • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Return the versions of the pip packages used for computations

    +
    + +
    +
    get_pip_version(pip_packages = c("pipapi", "wbpip"), data_versions)
    +
    + +
    +

    Arguments

    +
    pip_packages
    +

    character: Custom packages powering the API

    + + +
    data_versions
    +

    character: Available data_versions

    + +
    +
    +

    Value

    + + +

    list

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/get_spr_table.html b/docs/reference/get_spr_table.html new file mode 100644 index 00000000..76be2dc5 --- /dev/null +++ b/docs/reference/get_spr_table.html @@ -0,0 +1,115 @@ + +load SPR table from aux data — get_spr_table • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    If there is no data available, return an empty data.frame

    +
    + +
    +
    get_spr_table(data_dir, table = c("spr_svy", "spr_lnp"))
    +
    + +
    +

    Arguments

    +
    data_dir
    +

    character: Data directory

    + + +
    table
    +

    character: Name of auxiliary table

    + +
    +
    +

    Value

    + + +

    data.table

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/get_svy_data.html b/docs/reference/get_svy_data.html new file mode 100644 index 00000000..3bce1cf7 --- /dev/null +++ b/docs/reference/get_svy_data.html @@ -0,0 +1,119 @@ + +Read survey data — get_svy_data • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Read survey data

    +
    + +
    +
    get_svy_data(svy_id, reporting_level, path)
    +
    + +
    +

    Arguments

    +
    svy_id
    +

    character: Survey ID

    + + +
    reporting_level
    +

    character: geographical reporting level

    + + +
    path
    +

    character: Path to survey data

    + +
    +
    +

    Value

    + + +

    data.frame

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/get_user_alt_gt.html b/docs/reference/get_user_alt_gt.html new file mode 100644 index 00000000..f0b11bc8 --- /dev/null +++ b/docs/reference/get_user_alt_gt.html @@ -0,0 +1,115 @@ + +Helper function to define user_alt_gt — get_user_alt_gt • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Helper function to define user_alt_gt

    +
    + +
    +
    get_user_alt_gt(user_gt, off_gt)
    +
    + +
    +

    Arguments

    +
    user_gt
    +

    character: Grouping type needed by user

    + + +
    off_gt
    +

    character: Official grouping type

    + +
    +
    +

    Value

    + + +

    character

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/get_user_x_code.html b/docs/reference/get_user_x_code.html new file mode 100644 index 00000000..b0018181 --- /dev/null +++ b/docs/reference/get_user_x_code.html @@ -0,0 +1,111 @@ + +Helper function to define user_var_code — get_user_x_code • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Helper function to define user_var_code

    +
    + +
    +
    get_user_x_code(x)
    +
    + +
    +

    Arguments

    +
    x
    +

    character: Grouping type needed by user

    + +
    +
    +

    Value

    + + +

    character

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/get_valid_aux_long_format_tables.html b/docs/reference/get_valid_aux_long_format_tables.html new file mode 100644 index 00000000..5a7b9f36 --- /dev/null +++ b/docs/reference/get_valid_aux_long_format_tables.html @@ -0,0 +1,105 @@ + +Returns all auxiliary tables that support the long_format=TRUE parameter — get_valid_aux_long_format_tables • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Returns all auxiliary tables that support the long_format=TRUE parameter

    +
    + +
    +
    get_valid_aux_long_format_tables()
    +
    + +
    +

    Value

    + + +

    character vector

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/ifel_isnull.html b/docs/reference/ifel_isnull.html new file mode 100644 index 00000000..01c1c3e8 --- /dev/null +++ b/docs/reference/ifel_isnull.html @@ -0,0 +1,115 @@ + +Efficient "if" "else" evaluation of null. — ifel_isnull • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Efficient "if" "else" evaluation of null.

    +
    + +
    +
    ifel_isnull(x, y)
    +
    + +
    +

    Arguments

    +
    x
    +

    object to evaluate

    + + +
    y
    +

    in case x null. If X is not null, then x.

    + +
    +
    +

    Value

    + + +

    object of class(x)

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/index.html b/docs/reference/index.html new file mode 100644 index 00000000..4a9dca68 --- /dev/null +++ b/docs/reference/index.html @@ -0,0 +1,378 @@ + +Function reference • pipapi + + +
    +
    + + + +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +

    All functions

    +

    +
    +

    add_agg_medians()

    +

    Add Aggregate medians

    +

    add_dist_stats()

    +

    Add pre-computed distributional stats

    +

    assign_serializer()

    +

    Helper function to return correct serializer

    +

    available_versions()

    +

    Sorted available PIP versions in data directory

    +

    change_grouped_stats_to_csv()

    +

    Change the list-output to dataframe

    +

    citation_from_version()

    +

    Return citation from the version

    +

    create_countries_vctr()

    +

    Create countries vectors

    +

    create_etag_header()

    +

    create_etag_header

    +

    create_return_cols()

    +

    helper function to create a list of return columns for various pipapi functions

    +

    create_versioned_lkups()

    +

    Create one list of lookups per data version

    +

    create_vintage_pattern_call()

    +

    create vintage call to be parsed into get_vintage_pattern_regex()

    +

    empty_response

    +

    Empty response schema

    +

    empty_response_cp_poverty

    +

    List of two datasets pov_trend and pov_mrv

    +

    empty_response_grp

    +

    Dataframe for grouped empty response

    +

    extract_identity()

    +

    Return identity from the version of the data

    +

    extract_ppp_date()

    +

    Return the ppp date from the version of the data

    +

    extract_release_date()

    +

    Return the release date from the version of the data

    +

    fg_assign_nas_values_to_dup_cols()

    +

    Coerce variable causing potential duplicates to NAs

    +

    fg_remove_duplicates()

    +

    Remove duplicated rows created during the interpolation process

    +

    fg_standardize_cache_id()

    +

    Standardize cache_id format to avoid duplication of rows

    +

    fillin_list()

    +

    Populate list in parent frame

    +

    filter_lkup()

    +

    Helper to filter metadata +aggregate distribution need to be filtered out when popshare is not null +This is a temporary function until a full fix is implemented, and popshare is +supported for all distributions

    +

    filter_md()

    +

    Helper function to filter missing data table

    +

    get_aux_table()

    +

    Return specified auxiliary data

    +

    get_aux_table_ui()

    +

    Return specified auxiliary data in wide format +Helper function to the UI

    +

    get_caller_names()

    +

    Get functions names in call stack

    +

    get_ctr_alt_agg()

    +

    Helper function to retrieve the required countries +needed to compute alternative aggregates requested by user +Get countries that belong to aggregates requested by the user that are NOT +official but alternative aggregates. We need to find out missing data +estimates only for those countries. For instance, if the user requested LAC +and AFE, we don't care about the the countries with missing data in the LAC +because their estimates are done implicitly. We DO care about the estimates +of the missing countries in AFE because we need the explicit SSA estimates.

    +

    get_grp_to_compute()

    +

    Helper function to retrieve the country/year pairs to be computed

    +

    get_impl_ctrs()

    +

    Helper function to retrieve the implicit country surveys present in both +alternative and official aggregates

    +

    get_md_vars()

    +

    Helper function to retrieve variables needed to handle imputation of +missing data

    +

    get_param_values()

    +

    Get valid query parameter values +Get vector of accepted query parameter values for the API

    +

    get_pip_version()

    +

    Return the versions of the pip packages used for computations

    +

    get_user_alt_gt()

    +

    Helper function to define user_alt_gt

    +

    get_user_x_code()

    +

    Helper function to define user_var_code

    +

    get_valid_aux_long_format_tables()

    +

    Returns all auxiliary tables that support the long_format=TRUE parameter

    +

    ifel_isnull()

    +

    Efficient "if" "else" evaluation of null.

    +

    is_empty()

    +

    Test whether a vector is length zero and IS not NULL

    +

    is_forked()

    +

    Helper function to determine whether an API call is compute intensive +and should be forked to a parallel process to avoid blocking the main +R process

    +

    lkup

    +

    List of lookup values

    +

    pip()

    +

    Compute PIP statistics

    +

    pipgd_lorenz_curve()

    +

    Lorenz curve

    +

    pip_aggregate()

    +

    Calculate estimates for aggregates different to the official regional +aggregation

    +

    pip_grp()

    +

    Compute various aggregations of PIP statistics

    +

    pip_grp_logic()

    +

    Logic for computing new aggregate

    +

    reporting_level_list

    +

    List of valid coverage values

    +

    return_correct_version()

    +

    Return the version of the data

    +

    return_if_exists()

    +

    Return the rows of the table if they exist in master file

    +

    select_country()

    +

    select_country +Helper function for subset_lkup()

    +

    select_off_alt_agg()

    +

    Helper function to identify how Official and Alternative regions should be +handled

    +

    select_reporting_level()

    +

    helper function to correctly filter look up table according to requested +reporting level

    +

    select_user_aggs()

    +

    Helper function to select correct Official Regions

    +

    select_years()

    +

    select_years +Helper function for subset_lkup()

    +

    start_api()

    +

    Main function to launch the API

    +

    ui_cp_charts()

    +

    Country Profiles Charts

    +

    ui_cp_download()

    +

    Country Profiles Key Indicators download

    +

    ui_cp_key_indicators()

    +

    Country Profiles Key Indicators

    +

    ui_hp_countries()

    +

    Home Page Country Charts

    +

    ui_hp_stacked()

    +

    Home Page Main Chart

    +

    ui_pc_charts()

    +

    Poverty Calculator Main chart

    +

    ui_pc_regional()

    +

    Poverty Calculator regional aggregates

    +

    ui_svy_meta()

    +

    Data Sources Survey Metadata

    +

    update_master_file()

    +

    Update master file with the contents of the dataframe

    +

    validate_input_grouped_stats()

    +

    Validate grouped-stats endpoint input values

    +

    valid_years()

    +

    Return available valid years

    +

    version_dataframe()

    +

    Return versions of the data available.

    +

    wld_lineup_year()

    +

    lineup year for the world

    + + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/is_empty.html b/docs/reference/is_empty.html new file mode 100644 index 00000000..6a186153 --- /dev/null +++ b/docs/reference/is_empty.html @@ -0,0 +1,124 @@ + +Test whether a vector is length zero and IS not NULL — is_empty • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Test whether a vector is length zero and IS not NULL

    +
    + +
    +
    is_empty(x)
    +
    + +
    +

    Arguments

    +
    x
    +

    Value to be passed

    + +
    +
    +

    Value

    + + +

    logical. TRUE if x is empty but it is not NULL

    +
    + +
    +

    Examples

    +
    x <- vector()
    +is_empty(x)
    +#> [1] TRUE
    +
    +y <- NULL
    +length(y)
    +#> [1] 0
    +is_empty(y)
    +#> [1] FALSE
    +
    +
    +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/is_forked.html b/docs/reference/is_forked.html new file mode 100644 index 00000000..30616088 --- /dev/null +++ b/docs/reference/is_forked.html @@ -0,0 +1,135 @@ + +Helper function to determine whether an API call is compute intensive +and should be forked to a parallel process to avoid blocking the main +R process — is_forked • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Helper function to determine whether an API call is compute intensive +and should be forked to a parallel process to avoid blocking the main +R process

    +
    + +
    +
    is_forked(country, year, intensity_threshold = 40, include_year = TRUE)
    +
    + +
    +

    Arguments

    +
    country
    +

    character: selected countries

    + + +
    year
    +

    character: selected years

    + + +
    intensity_threshold
    +

    numeric: Number of selected country/year above which +the request will be considered intensive

    + + +
    include_year
    +

    logical: Whether year selection should be included to determine +the intensity of the request

    + +
    +
    +

    Value

    + + +

    logical

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/lkup.html b/docs/reference/lkup.html new file mode 100644 index 00000000..08767213 --- /dev/null +++ b/docs/reference/lkup.html @@ -0,0 +1,103 @@ + +List of lookup values — lkup • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    List of lookup values

    +
    + +
    +
    data(lkup)
    +
    + +
    +

    Format

    +

    A list of lookup values

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/pip.html b/docs/reference/pip.html new file mode 100644 index 00000000..fa1cd8cf --- /dev/null +++ b/docs/reference/pip.html @@ -0,0 +1,211 @@ + +Compute PIP statistics — pip • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Compute the main PIP poverty and inequality statistics.

    +
    + +
    +
    pip(
    +  country = "ALL",
    +  year = "ALL",
    +  povline = 1.9,
    +  popshare = NULL,
    +  fill_gaps = FALSE,
    +  group_by = c("none", "wb"),
    +  welfare_type = c("all", "consumption", "income"),
    +  reporting_level = c("all", "national", "rural", "urban"),
    +  ppp = NULL,
    +  lkup,
    +  censor = TRUE,
    +  lkup_hash = lkup$cache_data_id$hash_pip,
    +  additional_ind = FALSE
    +)
    +
    + +
    +

    Arguments

    +
    country
    +

    character: Country ISO 3 codes

    + + +
    year
    +

    integer: Reporting year

    + + +
    povline
    +

    numeric: Poverty line

    + + +
    popshare
    +

    numeric: Proportion of the population living below the +poverty line

    + + +
    fill_gaps
    +

    logical: If set to TRUE, will interpolate / extrapolate +values for missing years

    + + +
    group_by
    +

    character: Will return aggregated values for predefined +sub-groups

    + + +
    welfare_type
    +

    character: Welfare type

    + + +
    reporting_level
    +

    character: Geographical reporting level

    + + +
    ppp
    +

    numeric: Custom Purchase Power Parity value

    + + +
    lkup
    +

    list: A list of lkup tables

    + + +
    censor
    +

    logical: Triggers censoring of country/year statistics

    + + +
    lkup_hash
    +

    character: hash of pip

    + + +
    additional_ind
    +

    logical: If TRUE add new set of indicators. Default if +FALSE

    + +
    +
    +

    Value

    + + +

    data.table

    +
    + +
    +

    Examples

    +
    if (FALSE) {
    +# Create lkups
    +lkups <- create_lkups("<data-folder>")
    +
    +# A single country and year
    +pip(country = "AGO",
    +    year = 2000,
    +    povline = 1.9,
    +    lkup = lkups)
    +
    +# All years for a single country
    +pip(country = "AGO",
    +    year = "all",
    +    povline = 1.9,
    +    lkup = lkups)
    +
    +# Fill gaps
    +pip(country = "AGO",
    +    year = "all",
    +    povline = 1.9,
    +    fill_gaps = TRUE,
    +    lkup = lkups)
    +
    +# Group by regions
    +pip(country = "all",
    +    year = "all",
    +    povline = 1.9,
    +    group_by = "wb",
    +    lkup = lkups)
    +}
    +
    +
    +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/pip_aggregate.html b/docs/reference/pip_aggregate.html new file mode 100644 index 00000000..b303f66e --- /dev/null +++ b/docs/reference/pip_aggregate.html @@ -0,0 +1,127 @@ + +Calculate estimates for aggregates different to the official regional +aggregation — pip_aggregate • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Calculate estimates for aggregates different to the official regional +aggregation

    +
    + +
    +
    pip_aggregate(df, by = NULL, return_cols)
    +
    + +
    +

    Arguments

    +
    df
    +

    data.table from pip_fg()

    + + +
    by
    +

    character: Additional variable to use in by when doing the +aggregations. Default is NULL, but it should be use to include +aggregations variables

    + + +
    return_cols
    +

    list: lkup$return_cols$pip_grp object. Controls returned +columns

    + +
    +
    +

    Value

    + + +

    data.table

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/pip_grp.html b/docs/reference/pip_grp.html new file mode 100644 index 00000000..aa0c7907 --- /dev/null +++ b/docs/reference/pip_grp.html @@ -0,0 +1,169 @@ + +Compute various aggregations of PIP statistics — pip_grp • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Compute various aggregations of PIP statistics

    +
    + +
    +
    pip_grp(
    +  country = "ALL",
    +  year = "ALL",
    +  povline = 1.9,
    +  group_by = c("wb", "none"),
    +  welfare_type = c("all", "consumption", "income"),
    +  reporting_level = c("all", "national"),
    +  lkup,
    +  censor = TRUE,
    +  lkup_hash = lkup$cache_data_id$hash_pip_grp
    +)
    +
    + +
    +

    Arguments

    +
    country
    +

    character: Country ISO 3 codes

    + + +
    year
    +

    integer: Reporting year

    + + +
    povline
    +

    numeric: Poverty line

    + + +
    group_by
    +

    character: Will return aggregated values for predefined +sub-groups

    + + +
    welfare_type
    +

    character: Welfare type

    + + +
    reporting_level
    +

    character: Geographical reporting level

    + + +
    lkup
    +

    list: A list of lkup tables

    + + +
    censor
    +

    logical: Triggers censoring of country/year statistics

    + + +
    lkup_hash
    +

    character: hash of pip

    + +
    +
    +

    Value

    + + +

    data.table

    +
    + +
    +

    Examples

    +
    if (FALSE) {
    +# Create lkups
    +lkups <- create_lkups("<data-folder>")
    +
    +# A single country and year
    +pip_grp(country = "all",
    +        year = 2000,
    +        povline = 1.9,
    +        group_by = "wb",
    +        lkup = lkups)
    +}
    +
    +
    +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/pip_grp_logic.html b/docs/reference/pip_grp_logic.html new file mode 100644 index 00000000..91fb1687 --- /dev/null +++ b/docs/reference/pip_grp_logic.html @@ -0,0 +1,167 @@ + +Logic for computing new aggregate — pip_grp_logic • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Logic for computing new aggregate

    +
    + +
    +
    pip_grp_logic(
    +  country = "ALL",
    +  year = "ALL",
    +  povline = 1.9,
    +  group_by = c("wb", "none"),
    +  welfare_type = c("all", "consumption", "income"),
    +  reporting_level = c("all", "national"),
    +  lkup,
    +  censor = TRUE,
    +  lkup_hash = lkup$cache_data_id$hash_pip_grp,
    +  additional_ind = FALSE
    +)
    +
    + +
    +

    Arguments

    +
    country
    +

    character: Country ISO 3 codes

    + + +
    year
    +

    integer: Reporting year

    + + +
    povline
    +

    numeric: Poverty line

    + + +
    group_by
    +

    character: Will return aggregated values for predefined +sub-groups

    + + +
    welfare_type
    +

    character: Welfare type

    + + +
    reporting_level
    +

    character: Geographical reporting level

    + + +
    lkup
    +

    list: A list of lkup tables

    + + +
    censor
    +

    logical: Triggers censoring of country/year statistics

    + + +
    lkup_hash
    +

    character: hash of pip

    + + +
    additional_ind
    +

    logical: If TRUE add new set of indicators. Default if +FALSE

    + +
    +
    +

    Value

    + + +

    data.table

    +
    + +
    +

    Examples

    +
    if (FALSE) {
    +# Create lkups
    +}
    +
    +
    +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/pipgd_lorenz_curve.html b/docs/reference/pipgd_lorenz_curve.html new file mode 100644 index 00000000..9aec89d1 --- /dev/null +++ b/docs/reference/pipgd_lorenz_curve.html @@ -0,0 +1,162 @@ + +Lorenz curve — pipgd_lorenz_curve • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Returns the Lorenz curve. User provides the cumulative welfare and +cumulative weight, as well as the number of points on the lorenz curve required. +By default, the best fitting Lorenz parameterization (quadratic or beta) is +selected.

    +
    + +
    +
    pipgd_lorenz_curve(welfare = NULL, weight = NULL, lorenz = NULL, n_bins = 100)
    +
    + +
    +

    Arguments

    +
    welfare
    +

    numeric vector of cumulative share of welfare (income/consumption)

    + + +
    weight
    +

    numeric vector of cumulative share of the population

    + + +
    lorenz
    +

    either "lb" or "lq"

    + + +
    n_bins
    +

    atomic double vector of length 1: number of points on the +lorenz curve

    + + +
    params
    +

    list of parameters

    + +
    +
    +

    Value

    + + +

    Returns a list which contains:

    • numeric lorenz curve,

    • +
    • corresponding points on x-axis,

    • +
    • whether lq or lb parameterization, and

    • +
    • if complete=TRUE, also returns all params.

    • +
    + +
    +

    Examples

    +
    if (FALSE) {
    +# Example 1: Generating a Lorenz Curve with default settings
    +pipgd_lorenz_curve(welfare = pip_gd$L,
    +                   weight = pip_gd$P)
    +
    +# Example 2: Specifying the number of bins for the Lorenz Curve
    +pipgd_lorenz_curve(welfare = pip_gd$L,
    +                   weight = pip_gd$P,
    +                   n_bins = 50)
    +
    +# Example 3: Using pre-calculated parameters
    +use_params <- pipgd_params(welfare = pip_gd$L,
    +                           weight = pip_gd$P)
    +pipgd_lorenz_curve(params = use_params)
    +
    +
    +# Example 4: Generating Lorenz Curve with a specific Lorenz model(e.g. Lorenz beta)
    +pipgd_lorenz_curve(params = use_params,
    +                   lorenz = "lb")
    +}
    +
    +
    +
    +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/reporting_level_list.html b/docs/reference/reporting_level_list.html new file mode 100644 index 00000000..5f5104ee --- /dev/null +++ b/docs/reference/reporting_level_list.html @@ -0,0 +1,103 @@ + +List of valid coverage values — reporting_level_list • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    List of valid coverage values

    +
    + +
    +
    reporting_level_list
    +
    + +
    +

    Format

    +

    An object of class character of length 3.

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/return_correct_version.html b/docs/reference/return_correct_version.html new file mode 100644 index 00000000..3ace44eb --- /dev/null +++ b/docs/reference/return_correct_version.html @@ -0,0 +1,133 @@ + +Return the version of the data — return_correct_version • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Return the version of the data

    +
    + +
    +
    return_correct_version(
    +  version = NULL,
    +  release_version = NULL,
    +  ppp_version = NULL,
    +  identity = "PROD",
    +  versions_available
    +)
    +
    + +
    +

    Arguments

    +
    version
    +

    Data version. Defaults to most recent version. See api/v1/versions

    + + +
    release_version
    +

    date when the data was published in YYYYMMDD format

    + + +
    ppp_version
    +

    ppp year to be used

    + + +
    identity
    +

    One of "PROD" (production), "INT" (internal) and "TEST"

    + + +
    versions_available
    +

    character vector of all the versions available

    + +
    +
    +

    Value

    + + +

    character

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/return_if_exists.html b/docs/reference/return_if_exists.html new file mode 100644 index 00000000..4c205d97 --- /dev/null +++ b/docs/reference/return_if_exists.html @@ -0,0 +1,123 @@ + +Return the rows of the table if they exist in master file — return_if_exists • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Return the rows of the table if they exist in master file

    +
    + +
    +
    return_if_exists(lkup, povline, con)
    +
    + +
    +

    Arguments

    +
    lkup
    +

    list: A list of lkup tables

    + + +
    povline
    +

    numeric: Poverty line

    + + +
    con
    +

    Connection object

    + + +
    country_code
    +

    Country Code

    + +
    +
    +

    Value

    + + +

    Dataframe

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/rg_pip.html b/docs/reference/rg_pip.html new file mode 100644 index 00000000..9278a0d0 --- /dev/null +++ b/docs/reference/rg_pip.html @@ -0,0 +1,154 @@ + +Compute survey year stats — rg_pip • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Compute the main PIP poverty and inequality statistics for survey years.

    +
    + +
    +
    rg_pip(
    +  country,
    +  year,
    +  povline,
    +  popshare,
    +  welfare_type,
    +  reporting_level,
    +  ppp,
    +  lkup,
    +  con
    +)
    +
    + +
    +

    Arguments

    +
    country
    +

    character: Country ISO 3 codes

    + + +
    year
    +

    integer: Reporting year

    + + +
    povline
    +

    numeric: Poverty line

    + + +
    popshare
    +

    numeric: Proportion of the population living below the +poverty line

    + + +
    welfare_type
    +

    character: Welfare type

    + + +
    reporting_level
    +

    character: Geographical reporting level

    + + +
    ppp
    +

    numeric: Custom Purchase Power Parity value

    + + +
    lkup
    +

    list: A list of lkup tables

    + + +
    con
    +

    duckdb connection object

    + +
    +
    +

    Value

    + + +

    data.frame

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/select_country.html b/docs/reference/select_country.html new file mode 100644 index 00000000..4352c51b --- /dev/null +++ b/docs/reference/select_country.html @@ -0,0 +1,129 @@ + +select_country +Helper function for subset_lkup() — select_country • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    select_country +Helper function for subset_lkup()

    +
    + +
    +
    select_country(lkup, keep, country, valid_regions)
    +
    + +
    +

    Arguments

    +
    lkup
    +

    list: A list of lkup tables

    + + +
    keep
    +

    logical vector

    + + +
    country
    +

    character: Country ISO 3 codes

    + + +
    valid_regions
    +

    character: List of valid region codes that can be used +for region selection

    + +
    +
    +

    Value

    + + +

    logical vector

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/select_off_alt_agg.html b/docs/reference/select_off_alt_agg.html new file mode 100644 index 00000000..2e4f542c --- /dev/null +++ b/docs/reference/select_off_alt_agg.html @@ -0,0 +1,120 @@ + +Helper function to identify how Official and Alternative regions should be +handled — select_off_alt_agg • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Helper function to identify how Official and Alternative regions should be +handled

    +
    + +
    +
    select_off_alt_agg(user_gt, off_gt)
    +
    + +
    +

    Arguments

    +
    user_gt
    +

    character: Grouping type implicitly selected by user

    + + +
    off_gt
    +

    character: Grouping type associated with Offical Regions

    + +
    +
    +

    Value

    + + +

    character

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/select_reporting_level.html b/docs/reference/select_reporting_level.html new file mode 100644 index 00000000..622ee904 --- /dev/null +++ b/docs/reference/select_reporting_level.html @@ -0,0 +1,124 @@ + +helper function to correctly filter look up table according to requested +reporting level — select_reporting_level • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    helper function to correctly filter look up table according to requested +reporting level

    +
    + +
    +
    select_reporting_level(lkup, keep, reporting_level)
    +
    + +
    +

    Arguments

    +
    lkup
    +

    data.table: Main lookup table

    + + +
    keep
    +

    logical: Logical vector of rows to be kept

    + + +
    reporting_level
    +

    character: Requested reporting level

    + +
    +
    +

    Value

    + + +

    data.table

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/select_user_aggs.html b/docs/reference/select_user_aggs.html new file mode 100644 index 00000000..1003d8f9 --- /dev/null +++ b/docs/reference/select_user_aggs.html @@ -0,0 +1,123 @@ + +Helper function to select correct Official Regions — select_user_aggs • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Helper function to select correct Official Regions

    +
    + +
    +
    select_user_aggs(country, all_agg, off_reg_ext, aggs)
    +
    + +
    +

    Arguments

    +
    country
    +

    character: User selected countries

    + + +
    all_agg
    +

    character: all country aggregates

    + + +
    off_reg_ext
    +

    character: Official region codes

    + + +
    aggs
    +

    data.frame: Regions lookup table

    + +
    +
    +

    Value

    + + +

    character

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/select_years.html b/docs/reference/select_years.html new file mode 100644 index 00000000..08e03da0 --- /dev/null +++ b/docs/reference/select_years.html @@ -0,0 +1,137 @@ + +select_years +Helper function for subset_lkup() — select_years • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    select_years +Helper function for subset_lkup()

    +
    + +
    +
    select_years(lkup, keep, year, country, data_dir, valid_regions = NULL)
    +
    + +
    +

    Arguments

    +
    lkup
    +

    list: A list of lkup tables

    + + +
    keep
    +

    logical vector

    + + +
    year
    +

    integer: Reporting year

    + + +
    country
    +

    character: Country ISO 3 codes

    + + +
    data_dir
    +

    character: directory path from lkup$data_root

    + + +
    valid_regions
    +

    character: List of valid region codes that can be used +for region selection

    + +
    +
    +

    Value

    + + +

    logical vector

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/start_api.html b/docs/reference/start_api.html new file mode 100644 index 00000000..9a28ba94 --- /dev/null +++ b/docs/reference/start_api.html @@ -0,0 +1,119 @@ + +Main function to launch the API — start_api • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Main function to launch the API

    +
    + +
    +
    start_api(api_version = "v1", port = 80, host = "0.0.0.0")
    +
    + +
    +

    Arguments

    +
    api_version
    +

    character: API version to launch

    + + +
    port
    +

    integer: Port

    + + +
    host
    +

    character: Host

    + +
    +
    +

    Value

    + + +

    plumber API

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/subset_ctry_years.html b/docs/reference/subset_ctry_years.html new file mode 100644 index 00000000..5a3d02ed --- /dev/null +++ b/docs/reference/subset_ctry_years.html @@ -0,0 +1,137 @@ + +Subset country-years table +This is a table created at start time to facilitate imputations +It part of the interpolated_list object — subset_ctry_years • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Subset country-years table +This is a table created at start time to facilitate imputations +It part of the interpolated_list object

    +
    + +
    +
    subset_ctry_years(country, year, lkup, valid_regions, data_dir)
    +
    + +
    +

    Arguments

    +
    country
    +

    character: Country ISO 3 codes

    + + +
    year
    +

    integer: Reporting year

    + + +
    lkup
    +

    list: A list of lkup tables

    + + +
    valid_regions
    +

    character: List of valid region codes that can be used

    + + +
    data_dir
    +

    character: directory path from lkup$data_root

    + +
    +
    +

    Value

    + + +

    data.frame

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/subset_lkup.html b/docs/reference/subset_lkup.html new file mode 100644 index 00000000..d34af8dd --- /dev/null +++ b/docs/reference/subset_lkup.html @@ -0,0 +1,154 @@ + +Subset look-up data — subset_lkup • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Subset look-up data

    +
    + +
    +
    subset_lkup(
    +  country,
    +  year,
    +  welfare_type,
    +  reporting_level,
    +  lkup,
    +  valid_regions,
    +  data_dir = NULL,
    +  povline,
    +  con
    +)
    +
    + +
    +

    Arguments

    +
    country
    +

    character: Country ISO 3 codes

    + + +
    year
    +

    integer: Reporting year

    + + +
    welfare_type
    +

    character: Welfare type

    + + +
    reporting_level
    +

    character: Geographical reporting level

    + + +
    lkup
    +

    list: A list of lkup tables

    + + +
    valid_regions
    +

    character: List of valid region codes that can be used +for region selection

    + + +
    data_dir
    +

    character: directory path from lkup$data_root

    + + +
    povline
    +

    numeric: Poverty line

    + + +
    con
    +

    duckdb connection object

    + +
    +
    +

    Value

    + + +

    data.frame

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/ui_cp_charts.html b/docs/reference/ui_cp_charts.html new file mode 100644 index 00000000..46332301 --- /dev/null +++ b/docs/reference/ui_cp_charts.html @@ -0,0 +1,134 @@ + +Country Profiles Charts — ui_cp_charts • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Provides numbers that will populate the country profile charts.

    +
    + +
    +
    ui_cp_charts(
    +  country = "AGO",
    +  povline = 1.9,
    +  pop_units = 1e+06,
    +  lkup,
    +  lkup_hash = lkup$cache_data_id$hash_ui_cp
    +)
    +
    + +
    +

    Arguments

    +
    country
    +

    character: Country ISO 3 codes

    + + +
    povline
    +

    numeric: Poverty line

    + + +
    pop_units
    +

    numeric: Units used to express population numbers (default +to million)

    + + +
    lkup
    +

    list: A list of lkup tables

    + + +
    lkup_hash
    +

    character: hash of pip

    + +
    +
    +

    Value

    + + +

    list

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/ui_cp_download.html b/docs/reference/ui_cp_download.html new file mode 100644 index 00000000..85f31cf0 --- /dev/null +++ b/docs/reference/ui_cp_download.html @@ -0,0 +1,133 @@ + +Country Profiles Key Indicators download — ui_cp_download • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Helper function to download Country Profile data

    +
    + +
    +
    ui_cp_download(
    +  country = "AGO",
    +  year = "ALL",
    +  povline = 1.9,
    +  lkup,
    +  lkup_hash = lkup$cache_data_id$hash_ui_cp
    +)
    +
    + +
    +

    Arguments

    +
    country
    +

    character: Country ISO 3 codes

    + + +
    year
    +

    integer: Reporting year

    + + +
    povline
    +

    numeric: Poverty line

    + + +
    lkup
    +

    list: A list of lkup tables

    + + +
    lkup_hash
    +

    character: hash of pip

    + +
    +
    +

    Value

    + + +

    list

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/ui_cp_key_indicators.html b/docs/reference/ui_cp_key_indicators.html new file mode 100644 index 00000000..9977114f --- /dev/null +++ b/docs/reference/ui_cp_key_indicators.html @@ -0,0 +1,128 @@ + +Country Profiles Key Indicators — ui_cp_key_indicators • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Provides numbers that will population for country profiles key indicators.

    +
    + +
    +
    ui_cp_key_indicators(
    +  country = "AGO",
    +  povline = NULL,
    +  lkup,
    +  lkup_hash = lkup$cache_data_id$hash_ui_cp
    +)
    +
    + +
    +

    Arguments

    +
    country
    +

    character: Country ISO 3 codes

    + + +
    povline
    +

    numeric: Poverty line

    + + +
    lkup
    +

    list: A list of lkup tables

    + + +
    lkup_hash
    +

    character: hash of pip

    + +
    +
    +

    Value

    + + +

    list

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/ui_cp_poverty_charts.html b/docs/reference/ui_cp_poverty_charts.html new file mode 100644 index 00000000..035b36e6 --- /dev/null +++ b/docs/reference/ui_cp_poverty_charts.html @@ -0,0 +1,124 @@ + +CP Poverty Charts — ui_cp_poverty_charts • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Provides numbers that will populate the country profiles poverty charts

    +
    + +
    +
    ui_cp_poverty_charts(country, povline, pop_units, lkup)
    +
    + +
    +

    Arguments

    +
    country
    +

    character: Country ISO 3 codes

    + + +
    povline
    +

    numeric: Poverty line

    + + +
    pop_units
    +

    numeric: Units used to express population numbers (default +to million)

    + + +
    lkup
    +

    list: A list of lkup tables

    + +
    +
    +

    Value

    + + +

    list

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/ui_hp_countries.html b/docs/reference/ui_hp_countries.html new file mode 100644 index 00000000..5da2851d --- /dev/null +++ b/docs/reference/ui_hp_countries.html @@ -0,0 +1,129 @@ + +Home Page Country Charts — ui_hp_countries • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Provides numbers that will populate the home page country charts.

    +
    + +
    +
    ui_hp_countries(
    +  country = c("IDN", "CIV"),
    +  povline = 1.9,
    +  pop_units = 1e+06,
    +  lkup
    +)
    +
    + +
    +

    Arguments

    +
    country
    +

    character: Country ISO 3 codes

    + + +
    povline
    +

    numeric: Poverty line

    + + +
    pop_units
    +

    numeric: Units used to express population numbers (default +to million)

    + + +
    lkup
    +

    list: A list of lkup tables

    + +
    +
    +

    Value

    + + +

    data.table

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/ui_hp_stacked.html b/docs/reference/ui_hp_stacked.html new file mode 100644 index 00000000..69c23a9f --- /dev/null +++ b/docs/reference/ui_hp_stacked.html @@ -0,0 +1,119 @@ + +Home Page Main Chart — ui_hp_stacked • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Provides numbers that will populate the home page main chart.

    +
    + +
    +
    ui_hp_stacked(povline = 1.9, lkup, lkup_hash = lkup$cache_data_id$hash_pip_grp)
    +
    + +
    +

    Arguments

    +
    povline
    +

    numeric: Poverty line

    + + +
    lkup
    +

    list: A list of lkup tables

    + + +
    lkup_hash
    +

    character: hash of pip

    + +
    +
    +

    Value

    + + +

    data.table

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/ui_pc_charts.html b/docs/reference/ui_pc_charts.html new file mode 100644 index 00000000..504891e9 --- /dev/null +++ b/docs/reference/ui_pc_charts.html @@ -0,0 +1,156 @@ + +Poverty Calculator Main chart — ui_pc_charts • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Provides numbers that will populate the poverty calculator main chart.

    +
    + +
    +
    ui_pc_charts(
    +  country = c("AGO"),
    +  year = "all",
    +  povline = 1.9,
    +  fill_gaps = FALSE,
    +  group_by = "none",
    +  welfare_type = c("all", "consumption", "income"),
    +  reporting_level = c("all", "national", "rural", "urban"),
    +  pop_units = 1e+06,
    +  lkup
    +)
    +
    + +
    +

    Arguments

    +
    country
    +

    character: Country ISO 3 codes

    + + +
    year
    +

    integer: Reporting year

    + + +
    povline
    +

    numeric: Poverty line

    + + +
    fill_gaps
    +

    logical: If set to TRUE, will interpolate / extrapolate +values for missing years

    + + +
    group_by
    +

    character: Will return aggregated values for predefined +sub-groups

    + + +
    welfare_type
    +

    character: Welfare type

    + + +
    reporting_level
    +

    character: Geographical reporting level

    + + +
    pop_units
    +

    numeric: Units used to express population numbers (default +to million)

    + + +
    lkup
    +

    list: A list of lkup tables

    + +
    +
    +

    Value

    + + +

    data.table

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/ui_pc_regional.html b/docs/reference/ui_pc_regional.html new file mode 100644 index 00000000..b71de270 --- /dev/null +++ b/docs/reference/ui_pc_regional.html @@ -0,0 +1,136 @@ + +Poverty Calculator regional aggregates — ui_pc_regional • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Provides numbers that will populate poverty calculator regional aggregates +for all years.

    +
    + +
    +
    ui_pc_regional(
    +  country = "ALL",
    +  year = "ALL",
    +  povline = 1.9,
    +  pop_units = 1e+06,
    +  lkup
    +)
    +
    + +
    +

    Arguments

    +
    country
    +

    character: Country ISO 3 codes

    + + +
    year
    +

    integer: Reporting year

    + + +
    povline
    +

    numeric: Poverty line

    + + +
    pop_units
    +

    numeric: Units used to express population numbers (default +to million)

    + + +
    lkup
    +

    list: A list of lkup tables

    + +
    +
    +

    Value

    + + +

    data.table

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/ui_svy_meta.html b/docs/reference/ui_svy_meta.html new file mode 100644 index 00000000..bfa8b2b9 --- /dev/null +++ b/docs/reference/ui_svy_meta.html @@ -0,0 +1,115 @@ + +Data Sources Survey Metadata — ui_svy_meta • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Provides survey metadata that will populate the Data Sources page.

    +
    + +
    +
    ui_svy_meta(country = "all", lkup)
    +
    + +
    +

    Arguments

    +
    country
    +

    character: Country ISO 3 codes

    + + +
    lkup
    +

    list: A list of lkup tables

    + +
    +
    +

    Value

    + + +

    data.table

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/update_master_file.html b/docs/reference/update_master_file.html new file mode 100644 index 00000000..fe4ba563 --- /dev/null +++ b/docs/reference/update_master_file.html @@ -0,0 +1,115 @@ + +Update master file with the contents of the dataframe — update_master_file • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Update master file with the contents of the dataframe

    +
    + +
    +
    update_master_file(dat, con)
    +
    + +
    +

    Arguments

    +
    dat
    +

    Dataframe to be appended

    + + +
    con
    +

    DuckDB connection object

    + +
    +
    +

    Value

    + + +

    number of rows updated

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/valid_years.html b/docs/reference/valid_years.html new file mode 100644 index 00000000..67c1513e --- /dev/null +++ b/docs/reference/valid_years.html @@ -0,0 +1,111 @@ + +Return available valid years — valid_years • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Return available valid years

    +
    + +
    +
    valid_years(data_dir)
    +
    + +
    +

    Arguments

    +
    data_dir
    +

    character: Path to the root directory

    + +
    +
    +

    Value

    + + +

    numeric vector of valid years

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/validate_input_grouped_stats.html b/docs/reference/validate_input_grouped_stats.html new file mode 100644 index 00000000..fbc80a2f --- /dev/null +++ b/docs/reference/validate_input_grouped_stats.html @@ -0,0 +1,119 @@ + +Validate grouped-stats endpoint input values — validate_input_grouped_stats • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Validate grouped-stats endpoint input values

    +
    + +
    +
    validate_input_grouped_stats(welfare, population, max_length = 100)
    +
    + +
    +

    Arguments

    +
    welfare
    +

    character: query values

    + + +
    population
    +

    character: valid values

    + + +
    max_length
    +

    integer: Max length of welfare vector

    + +
    +
    +

    Value

    + + +

    list of two vectors welfare and population

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/version_dataframe.html b/docs/reference/version_dataframe.html new file mode 100644 index 00000000..c91b6831 --- /dev/null +++ b/docs/reference/version_dataframe.html @@ -0,0 +1,111 @@ + +Return versions of the data available. — version_dataframe • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    Return versions of the data available.

    +
    + +
    +
    version_dataframe(versions)
    +
    + +
    +

    Arguments

    +
    versions
    +

    character: All available versions

    + +
    +
    +

    Value

    + + +

    Dataframe with 4 columns, versions, release_version, ppp_version and identity

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/wld_lineup_year.html b/docs/reference/wld_lineup_year.html new file mode 100644 index 00000000..7eebd627 --- /dev/null +++ b/docs/reference/wld_lineup_year.html @@ -0,0 +1,111 @@ + +lineup year for the world — wld_lineup_year • pipapi + + +
    +
    + + + +
    +
    + + +
    +

    lineup year for the world

    +
    + +
    +
    wld_lineup_year(data_dir)
    +
    + +
    +

    Arguments

    +
    data_dir
    +

    character: Path to the root directory

    + +
    +
    +

    Value

    + + +

    numeric vector of length one of lineup year for the world

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/sitemap.xml b/docs/sitemap.xml new file mode 100644 index 00000000..cdf78438 --- /dev/null +++ b/docs/sitemap.xml @@ -0,0 +1,300 @@ + + + + https://pip-technical-team.github.io/pipapi/404.html + + + https://pip-technical-team.github.io/pipapi/articles/debug-caching.html + + + https://pip-technical-team.github.io/pipapi/articles/duckdb-caching.html + + + https://pip-technical-team.github.io/pipapi/articles/index.html + + + https://pip-technical-team.github.io/pipapi/articles/new-endpoints.html + + + https://pip-technical-team.github.io/pipapi/authors.html + + + https://pip-technical-team.github.io/pipapi/CONTRIBUTING.html + + + https://pip-technical-team.github.io/pipapi/index.html + + + https://pip-technical-team.github.io/pipapi/LICENSE-text.html + + + https://pip-technical-team.github.io/pipapi/LICENSE.html + + + https://pip-technical-team.github.io/pipapi/news/index.html + + + https://pip-technical-team.github.io/pipapi/PULL_REQUEST_TEMPLATE.html + + + https://pip-technical-team.github.io/pipapi/reference/add_agg_medians.html + + + https://pip-technical-team.github.io/pipapi/reference/add_distribution_type.html + + + https://pip-technical-team.github.io/pipapi/reference/add_dist_stats.html + + + https://pip-technical-team.github.io/pipapi/reference/add_pg.html + + + https://pip-technical-team.github.io/pipapi/reference/add_spl.html + + + https://pip-technical-team.github.io/pipapi/reference/add_vars_out_of_pipeline.html + + + https://pip-technical-team.github.io/pipapi/reference/assign_serializer.html + + + https://pip-technical-team.github.io/pipapi/reference/available_versions.html + + + https://pip-technical-team.github.io/pipapi/reference/censor_stats.html + + + https://pip-technical-team.github.io/pipapi/reference/change_grouped_stats_to_csv.html + + + https://pip-technical-team.github.io/pipapi/reference/citation_from_version.html + + + https://pip-technical-team.github.io/pipapi/reference/clear_cache.html + + + https://pip-technical-team.github.io/pipapi/reference/create_countries_vctr.html + + + https://pip-technical-team.github.io/pipapi/reference/create_etag_header.html + + + https://pip-technical-team.github.io/pipapi/reference/create_lkups.html + + + https://pip-technical-team.github.io/pipapi/reference/create_return_cols.html + + + https://pip-technical-team.github.io/pipapi/reference/create_versioned_lkups.html + + + https://pip-technical-team.github.io/pipapi/reference/create_vintage_pattern_call.html + + + https://pip-technical-team.github.io/pipapi/reference/empty_response.html + + + https://pip-technical-team.github.io/pipapi/reference/empty_response_cp_poverty.html + + + https://pip-technical-team.github.io/pipapi/reference/empty_response_grp.html + + + https://pip-technical-team.github.io/pipapi/reference/estimate_type_ctr_lnp.html + + + https://pip-technical-team.github.io/pipapi/reference/estimate_type_var.html + + + https://pip-technical-team.github.io/pipapi/reference/extract_identity.html + + + https://pip-technical-team.github.io/pipapi/reference/extract_ppp_date.html + + + https://pip-technical-team.github.io/pipapi/reference/extract_release_date.html + + + https://pip-technical-team.github.io/pipapi/reference/fg_assign_nas_values_to_dup_cols.html + + + https://pip-technical-team.github.io/pipapi/reference/fg_pip.html + + + https://pip-technical-team.github.io/pipapi/reference/fg_remove_duplicates.html + + + https://pip-technical-team.github.io/pipapi/reference/fg_standardize_cache_id.html + + + https://pip-technical-team.github.io/pipapi/reference/fillin_list.html + + + https://pip-technical-team.github.io/pipapi/reference/filter_lkup.html + + + https://pip-technical-team.github.io/pipapi/reference/filter_md.html + + + https://pip-technical-team.github.io/pipapi/reference/get_additional_indicators.html + + + https://pip-technical-team.github.io/pipapi/reference/get_additional_indicators_grp.html + + + https://pip-technical-team.github.io/pipapi/reference/get_aux_table.html + + + https://pip-technical-team.github.io/pipapi/reference/get_aux_table_ui.html + + + https://pip-technical-team.github.io/pipapi/reference/get_caller_names.html + + + https://pip-technical-team.github.io/pipapi/reference/get_ctr_alt_agg.html + + + https://pip-technical-team.github.io/pipapi/reference/get_grp_to_compute.html + + + https://pip-technical-team.github.io/pipapi/reference/get_impl_ctrs.html + + + https://pip-technical-team.github.io/pipapi/reference/get_md_vars.html + + + https://pip-technical-team.github.io/pipapi/reference/get_metaregion_table.html + + + https://pip-technical-team.github.io/pipapi/reference/get_param_values.html + + + https://pip-technical-team.github.io/pipapi/reference/get_pg_table.html + + + https://pip-technical-team.github.io/pipapi/reference/get_pip_version.html + + + https://pip-technical-team.github.io/pipapi/reference/get_spr_table.html + + + https://pip-technical-team.github.io/pipapi/reference/get_svy_data.html + + + https://pip-technical-team.github.io/pipapi/reference/get_user_alt_gt.html + + + https://pip-technical-team.github.io/pipapi/reference/get_user_x_code.html + + + https://pip-technical-team.github.io/pipapi/reference/get_valid_aux_long_format_tables.html + + + https://pip-technical-team.github.io/pipapi/reference/ifel_isnull.html + + + https://pip-technical-team.github.io/pipapi/reference/index.html + + + https://pip-technical-team.github.io/pipapi/reference/is_empty.html + + + https://pip-technical-team.github.io/pipapi/reference/is_forked.html + + + https://pip-technical-team.github.io/pipapi/reference/lkup.html + + + https://pip-technical-team.github.io/pipapi/reference/pip.html + + + https://pip-technical-team.github.io/pipapi/reference/pipgd_lorenz_curve.html + + + https://pip-technical-team.github.io/pipapi/reference/pip_aggregate.html + + + https://pip-technical-team.github.io/pipapi/reference/pip_grp.html + + + https://pip-technical-team.github.io/pipapi/reference/pip_grp_logic.html + + + https://pip-technical-team.github.io/pipapi/reference/reporting_level_list.html + + + https://pip-technical-team.github.io/pipapi/reference/return_correct_version.html + + + https://pip-technical-team.github.io/pipapi/reference/return_if_exists.html + + + https://pip-technical-team.github.io/pipapi/reference/rg_pip.html + + + https://pip-technical-team.github.io/pipapi/reference/select_country.html + + + https://pip-technical-team.github.io/pipapi/reference/select_off_alt_agg.html + + + https://pip-technical-team.github.io/pipapi/reference/select_reporting_level.html + + + https://pip-technical-team.github.io/pipapi/reference/select_user_aggs.html + + + https://pip-technical-team.github.io/pipapi/reference/select_years.html + + + https://pip-technical-team.github.io/pipapi/reference/start_api.html + + + https://pip-technical-team.github.io/pipapi/reference/subset_ctry_years.html + + + https://pip-technical-team.github.io/pipapi/reference/subset_lkup.html + + + https://pip-technical-team.github.io/pipapi/reference/ui_cp_charts.html + + + https://pip-technical-team.github.io/pipapi/reference/ui_cp_download.html + + + https://pip-technical-team.github.io/pipapi/reference/ui_cp_key_indicators.html + + + https://pip-technical-team.github.io/pipapi/reference/ui_cp_poverty_charts.html + + + https://pip-technical-team.github.io/pipapi/reference/ui_hp_countries.html + + + https://pip-technical-team.github.io/pipapi/reference/ui_hp_stacked.html + + + https://pip-technical-team.github.io/pipapi/reference/ui_pc_charts.html + + + https://pip-technical-team.github.io/pipapi/reference/ui_pc_regional.html + + + https://pip-technical-team.github.io/pipapi/reference/ui_svy_meta.html + + + https://pip-technical-team.github.io/pipapi/reference/update_master_file.html + + + https://pip-technical-team.github.io/pipapi/reference/validate_input_grouped_stats.html + + + https://pip-technical-team.github.io/pipapi/reference/valid_years.html + + + https://pip-technical-team.github.io/pipapi/reference/version_dataframe.html + + + https://pip-technical-team.github.io/pipapi/reference/wld_lineup_year.html + + diff --git a/vignettes/duckdb-caching.Rmd b/vignettes/duckdb-caching.Rmd index 84c1e513..0e4b0d3b 100644 --- a/vignettes/duckdb-caching.Rmd +++ b/vignettes/duckdb-caching.Rmd @@ -27,15 +27,15 @@ pip(country = "AGO", year = 2000, lkup = lkup) Now since these are separate set of arguments 2 files of caching are created and saved on the disk. Now if a call to `pip` is made again `pip(country = "AGO", year = 2000, lkup = lkup)` which is same as 2) then it would return the result from the cached file stored on the disk without doing any calculation. Needless to say, this result is much faster. -However, notice that the 2nd call is subset of the 1st one. What I mean by that is the result of 2) is already present in result of 1). We have done the calculations for all the countries for the year 2000 in 1) we just need output of "AGO" from it to get the result for 2). +However, notice that the 2nd call is subset of the 1st one. What I mean by that is the result of 2) is already present in result of 1). We have done the calculations for all the countries for the year 2000 in 1) we just need output of `"AGO"` from it to get the result for 2). ## Custom caching for pipapi. -What if we could take subset of an existing cache like how we need it as above. However, this is not how traditional caching systems work. We would need to implement something custom if we want to make this work. +What if we could take subset of an existing cache like how we need it as above. However, this is not how traditional caching systems work so there is no ready-made solution available. We would need to implement this logic from scratch if we want to make this work. -We came up with an idea to implement this custom caching using `duckdb` in a table. Basically, all the queries that are called till now are saved in this table and whenever a new call is made it checks if the query is already called, if yes then it returns the result immediately or else it will do the calculation and then save the result to the table for next use and return the result. There are various scenarios that we need to consider and let's take help of an example to understand each one of them. +We came up with an idea to implement this custom caching using `duckdb` in a table. Basically, all the queries that are called till now are saved in the table and whenever a new call is made it checks if the result already exists in the table, if yes then it returns the result immediately or else it will do the calculation and then save the result to the table for next use and return the result. There are various scenarios that we need to consider to understand this approach. Let's take help of an example to understand each one of them. -Consider that we are just starting out and there is nothing saved in the table. +Consider that we are just starting out and there is nothing saved in the table and it's empty. #### Scenario 1 - @@ -43,7 +43,7 @@ Consider that we are just starting out and there is nothing saved in the table. pip(country = c("AGO", "USA"), year = 2000, lkup = lkup) ``` -Now since nothing is saved in the table this will go through the whole round of calculation and save the result in the table for future use and return the output. +Since the table is empty, this call will do the calculation and save the result in the table for future use and return the output. #### Scenario 2 - @@ -89,14 +89,15 @@ These are 6 different scenarios that can occur. Note that I have not used all th ## Code overview -We are creating a duckdb file to save our table. The location of this file is saved in an environment variable `PIP_CACHE_FILE` (Example `Sys.setenv(PIP_CACHE_FILE = "demo.duckdb")`). A table called `master_file` is created inside it where we save our cache. +We are creating a duckdb file to save our table. The location of this file is saved in an environment variable `PIP_CACHE_FILE` (Example `Sys.setenv('PIP_CACHE_FILE' = fs::path('e:/PIP/pipapi_data/demo.duckdb'))`). A table called `master_file` is created inside it where we save our cache. -Based on `fill_gaps` parameter we call either the function `fg_pip` or `rg_pip`. Both the functions call the `subset_lkup` function to filter the data that from `lkup` that is relevant to our call. In `subset_lkup` function we call the function `return_if_exists` which as the name suggests returns the data from cache if it exists. A new file called `duckdb_fun.R` has been added to manage all the functions related to duckdb. +Based on `fill_gaps` parameter we call either the function `fg_pip` or `rg_pip`. Both the functions call the `subset_lkup` function to filter the data from `lkup` that is relevant to our call. In `subset_lkup` function we call the function `return_if_exists` which as the name suggests returns the data from cache if it exists. A new file called `duckdb_fun.R` has been added to manage all the functions related to duckdb. -A named list is returned from `return_if_exists` function where it returns the final output (if it exists) from the master file and subsetted `lkup` (Scenario 3 where we have a part of data in master file). The partial (or full) final output is again returned as a named list from `subset_lkup` function which is used at the end to combine the two outputs. If `lkup` is non-empty then after all the calculation is done we use the function `update_master_file` to append the master file with new data. +A named list is returned from `return_if_exists` function where it returns the final output (if it exists) from the master file and subsetted `lkup`. The partial (or full) final output is again returned as a named list from `subset_lkup` function which is used at the end to combine the two outputs. If `lkup` is non-empty then after all the calculation is done we use the function `update_master_file` to append the master file with new data. ## Speed comparison +For analysis purposes, we are comparing speed in different scenarios on `DEV` branch vs that in `implement-duckdb` branch. ```{r} microbenchmark::microbenchmark( @@ -116,23 +117,6 @@ microbenchmark::microbenchmark( #pip_DEV 51.01007 53.67546 62.35531 56.08937 60.12046 354.7717 100 ``` -```{r} -microbenchmark::microbenchmark( - duckdb_caching = pip(country = "all", year = "all", lkup = lkup) -) - -#Unit: milliseconds -# expr min lq mean median uq max neval -# duckdb_caching 110.0725 115.0695 129.6408 118.4356 122.2621 494.3855 100 - -microbenchmark::microbenchmark( - pip_DEV = pip(country = "all", year = "all", lkup = lkup) -) - -#Unit: seconds -# expr min lq mean median uq max neval - #pip_DEV 14.42378 14.78717 14.98249 14.96088 15.11043 17.44418 100 -``` ```{r} country_list <- c("AGO", "ARG", "AUT", "BEL", "BGD", "BLR", "BOL", "CAN", "CHE", @@ -152,10 +136,10 @@ for(i in seq_along(country_list)) { tictoc::toc() ## For Duckdb -#16.69 sec elapsed +# 16.69 sec elapsed ## For DEV version -#9.26 sec elapsed +# 9.26 sec elapsed ``` ```{r} @@ -167,8 +151,8 @@ for(i in seq_along(country_list)) { tictoc::toc() ## DEV -#178.97 sec elapsed +# 178.97 sec elapsed ## Duckdb caching -#6.96 sec elapsed +# 6.96 sec elapsed ``` From b6795ad3cb4d053d1254a7f401cedef7394cc057 Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Tue, 14 Jan 2025 18:13:49 +0530 Subject: [PATCH 24/49] rm missing comma --- R/pip.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/pip.R b/R/pip.R index 3b14ac51..2cc3134d 100644 --- a/R/pip.R +++ b/R/pip.R @@ -131,7 +131,7 @@ pip <- function(country = "ALL", reporting_level = reporting_level, ppp = ppp, lkup = lkup, - con = con, + con = con ) } else { ## survey years ------------------ From 9aaf68497d5be80831acc29ad2ee01befaef9630 Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Sun, 19 Jan 2025 01:02:34 +0530 Subject: [PATCH 25/49] push draft --- .gitignore | 2 +- R/pip.R | 8 +++++--- vignettes/duckdb-caching.Rmd | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index 3ac36758..e8336386 100644 --- a/.gitignore +++ b/.gitignore @@ -9,4 +9,4 @@ tests/testdata/app_data/ .Renviron logs/ /sessionInfoLog -demo.duckdb +demo.duckdb* diff --git a/R/pip.R b/R/pip.R index 2cc3134d..241e426c 100644 --- a/R/pip.R +++ b/R/pip.R @@ -149,6 +149,10 @@ pip <- function(country = "ALL", } cached_data <- out$data_in_cache out <- out$main_data + + out <- collapse::rowbind( + cached_data, out + ) # Early return for empty table--------------- if (nrow(out) > 0) { # aggregate distributions ------------------ @@ -289,9 +293,7 @@ pip <- function(country = "ALL", update_master_file(out, con) } #} - out <- collapse::rowbind( - cached_data, out - ) + # Make sure no duplicate remains out <- out |> collapse::funique() # return ------------- diff --git a/vignettes/duckdb-caching.Rmd b/vignettes/duckdb-caching.Rmd index dfa93af6..86610ccc 100644 --- a/vignettes/duckdb-caching.Rmd +++ b/vignettes/duckdb-caching.Rmd @@ -101,7 +101,7 @@ For analysis purposes, we are comparing speed in different scenarios on `DEV` br ```{r} microbenchmark::microbenchmark( - pip_DEV = pip(country = "all", year = "all", lkup = lkup) + pip_DEV = pip(country = c("AGO", "USA"), year = 2000, lkup = lkup) ) #Unit: microseconds From 7c52503b80e32f790c1895e410c78b132d933762 Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Sun, 19 Jan 2025 16:42:44 +0530 Subject: [PATCH 26/49] ready for separate master files --- DESCRIPTION | 3 ++- R/duckdb_fun.R | 24 +++++++++--------------- R/fg_pip.R | 3 ++- R/pip.R | 42 ++++++++++++++++-------------------------- R/rg_pip.R | 3 ++- R/utils.R | 5 +++-- 6 files changed, 34 insertions(+), 46 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 9b289a31..e2c3d701 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -58,7 +58,8 @@ Imports: joyn, yaml, purrr, - future + future, + glue Remotes: PIP-Technical-Team/wbpip@DEV Depends: diff --git a/R/duckdb_fun.R b/R/duckdb_fun.R index 90bedb4e..32453759 100644 --- a/R/duckdb_fun.R +++ b/R/duckdb_fun.R @@ -7,21 +7,13 @@ #' @return Dataframe #' @export #' -return_if_exists <- function(lkup, povline, con) { - # all_args_data <- all_args(country_code, year, poverty_line) |> - # duckplyr::as_duckplyr_tibble() - # This file will be read from shared drive which will be an argument of this function. - # Additionally there were will more arguments to join instead of only 3 - # In fact, it will be joined by all the arguments in `pip` call +return_if_exists <- function(lkup, povline, con, fill_gaps) { # It is not possible to append to parquet file https://stackoverflow.com/questions/39234391/how-to-append-data-to-an-existing-parquet-file # Writing entire data will be very costly as data keeps on growing, better is to save data in duckdb and append to it. - master_file <- DBI::dbGetQuery(con, "select * from master_file") |> + target_file <- if (fill_gaps) "fg_master_file" else "rg_master_file" + master_file <- DBI::dbGetQuery(con, glue::glue("select * from {target_file}")) |> duckplyr::as_duckplyr_tibble() - # args_not_present_in_master <- duckplyr::anti_join( - # lkup, master_file, - # by = c("country_code", "reporting_year") - # ) data_present_in_master <- duckplyr::inner_join( master_file, lkup |> collapse::fselect(country_code, reporting_year, is_interpolated), by = c("country_code", "reporting_year", "is_interpolated") @@ -41,17 +33,19 @@ return_if_exists <- function(lkup, povline, con) { } #' Update master file with the contents of the dataframe -#' +#' @inheritParams pip #' @param dat Dataframe to be appended #' @param con DuckDB connection object #' #' @return number of rows updated #' @export #' -update_master_file <- function(dat, con) { +update_master_file <- function(dat, con, fill_gaps) { + target_file <- if (fill_gaps) "fg_master_file" else "rg_master_file" + duckdb::duckdb_register(con, "append_data", dat, overwrite = TRUE) - DBI::dbExecute(con, "INSERT INTO master_file SELECT * FROM append_data;") - message("Master File is updated.") + DBI::dbExecute(con, glue::glue("INSERT INTO {target_file} SELECT * FROM append_data;")) + message(glue::glue("{target_file} is updated.")) return(nrow(dat)) } diff --git a/R/fg_pip.R b/R/fg_pip.R index 941bf778..20a6962c 100644 --- a/R/fg_pip.R +++ b/R/fg_pip.R @@ -31,7 +31,8 @@ fg_pip <- function(country, valid_regions = valid_regions, data_dir = data_dir, povline = povline, - con = con + con = con, + fill_gaps = TRUE ) data_present_in_master <- metadata$data_present_in_master diff --git a/R/pip.R b/R/pip.R index 241e426c..cf0beaac 100644 --- a/R/pip.R +++ b/R/pip.R @@ -147,12 +147,20 @@ pip <- function(country = "ALL", con = con ) } + #browser() cached_data <- out$data_in_cache - out <- out$main_data + main_data <- out$main_data + + if (nrow(main_data) > 0) { + out <- main_data |> + collapse::fmutate(path = as.character(path)) |> + collapse::rowbind(cached_data) + + update_master_file(main_data, con, fill_gaps) + } else { + out <- cached_data + } - out <- collapse::rowbind( - cached_data, out - ) # Early return for empty table--------------- if (nrow(out) > 0) { # aggregate distributions ------------------ @@ -166,30 +174,12 @@ pip <- function(country = "ALL", } } - # Add extra variables -------------- - - # ## Add SPL and SPR --------------- - # out <- add_spl(df = out, - # fill_gaps = fill_gaps, - # data_dir = lkup$data_root) - # - # ## Add prosperity Gap ----------- - # - # out <- add_pg(df = out, - # fill_gaps = fill_gaps, - # data_dir = lkup$data_root) - # - # ## add distribution type ------------- - # # based on info in framework data, rather than welfare data - # add_distribution_type(df = out, - # lkup = lkup, - # fill_gaps = fill_gaps) + if (!data.table::is.data.table(out)) { + setDT(out) + } add_vars_out_of_pipeline(out, fill_gaps = fill_gaps, lkup = lkup) - - - # **** TO BE REMOVED **** REMOVAL STARTS HERE # Once `pip-grp` has been integrated in ingestion pipeline # Handles grouped aggregations @@ -290,7 +280,7 @@ pip <- function(country = "ALL", # Order rows by country code and reporting year data.table::setorder(out, country_code, reporting_year, reporting_level, welfare_type) - update_master_file(out, con) + } #} diff --git a/R/rg_pip.R b/R/rg_pip.R index de8368dc..26dfbb5e 100644 --- a/R/rg_pip.R +++ b/R/rg_pip.R @@ -29,7 +29,8 @@ rg_pip <- function(country, valid_regions = valid_regions, data_dir = data_dir, povline = povline, - con = con + con = con, + fill_gaps = FALSE ) data_present_in_master <- metadata$data_present_in_master metadata <- metadata$lkup diff --git a/R/utils.R b/R/utils.R index 9772bd60..7d4209c9 100644 --- a/R/utils.R +++ b/R/utils.R @@ -14,7 +14,8 @@ subset_lkup <- function(country, valid_regions, data_dir = NULL, povline, - con + con, + fill_gaps ) { # STEP 1 - Keep every row by default @@ -44,7 +45,7 @@ subset_lkup <- function(country, lkup <- lkup[keep, ] - cached_data <- return_if_exists(lkup, povline, con) + cached_data <- return_if_exists(lkup, povline, con, fill_gaps) return(list(lkup = cached_data$lkup, data_present_in_master = cached_data$data_present_in_master)) } From 4f03a8d0544675dd7b9fed479484982dc75b9ea8 Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Sun, 19 Jan 2025 20:56:00 +0530 Subject: [PATCH 27/49] remove bugs --- R/add_agg_stats.R | 2 +- R/pip.R | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/R/add_agg_stats.R b/R/add_agg_stats.R index f1cf6309..8ae40fe7 100644 --- a/R/add_agg_stats.R +++ b/R/add_agg_stats.R @@ -46,7 +46,7 @@ ag_average_poverty_stats <- function(df, return_cols) { national_cols <- return_cols$national_cols # This should be removed eventually - assertthat::assert_that(assertthat::are_equal(length(df$reporting_level), 2)) + # assertthat::assert_that(assertthat::are_equal(length(df$reporting_level), 2)) # STEP 1: Identify groups of variables that will be handled differently ------ ## original names diff --git a/R/pip.R b/R/pip.R index cf0beaac..ffe2a361 100644 --- a/R/pip.R +++ b/R/pip.R @@ -160,7 +160,9 @@ pip <- function(country = "ALL", } else { out <- cached_data } - + if (!data.table::is.data.table(out)) { + setDT(out) + } # Early return for empty table--------------- if (nrow(out) > 0) { # aggregate distributions ------------------ @@ -174,9 +176,7 @@ pip <- function(country = "ALL", } } - if (!data.table::is.data.table(out)) { - setDT(out) - } + add_vars_out_of_pipeline(out, fill_gaps = fill_gaps, lkup = lkup) From 70ed16e35d5a02120153aad36e42c27bdd35fd59 Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Wed, 22 Jan 2025 20:31:09 +0530 Subject: [PATCH 28/49] fix docs --- man/return_if_exists.Rd | 5 ++++- man/subset_lkup.Rd | 6 +++++- man/update_master_file.Rd | 5 ++++- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/man/return_if_exists.Rd b/man/return_if_exists.Rd index e18664b1..cc181ec1 100644 --- a/man/return_if_exists.Rd +++ b/man/return_if_exists.Rd @@ -4,7 +4,7 @@ \alias{return_if_exists} \title{Return the rows of the table if they exist in master file} \usage{ -return_if_exists(lkup, povline, con) +return_if_exists(lkup, povline, con, fill_gaps) } \arguments{ \item{lkup}{list: A list of lkup tables} @@ -13,6 +13,9 @@ return_if_exists(lkup, povline, con) \item{con}{Connection object} +\item{fill_gaps}{logical: If set to TRUE, will interpolate / extrapolate +values for missing years} + \item{country_code}{Country Code} } \value{ diff --git a/man/subset_lkup.Rd b/man/subset_lkup.Rd index a1657e43..48b7d162 100644 --- a/man/subset_lkup.Rd +++ b/man/subset_lkup.Rd @@ -13,7 +13,8 @@ subset_lkup( valid_regions, data_dir = NULL, povline, - con + con, + fill_gaps ) } \arguments{ @@ -35,6 +36,9 @@ for region selection} \item{povline}{numeric: Poverty line} \item{con}{duckdb connection object} + +\item{fill_gaps}{logical: If set to TRUE, will interpolate / extrapolate +values for missing years} } \value{ data.frame diff --git a/man/update_master_file.Rd b/man/update_master_file.Rd index a281d50a..bf12345f 100644 --- a/man/update_master_file.Rd +++ b/man/update_master_file.Rd @@ -4,12 +4,15 @@ \alias{update_master_file} \title{Update master file with the contents of the dataframe} \usage{ -update_master_file(dat, con) +update_master_file(dat, con, fill_gaps) } \arguments{ \item{dat}{Dataframe to be appended} \item{con}{DuckDB connection object} + +\item{fill_gaps}{logical: If set to TRUE, will interpolate / extrapolate +values for missing years} } \value{ number of rows updated From d1214ee5107719db87236e97063a233184dda1f9 Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Wed, 22 Jan 2025 21:01:14 +0530 Subject: [PATCH 29/49] fix few issues --- DESCRIPTION | 5 ++++- R/copy_functions.R | 1 - R/duckdb_fun.R | 3 +-- R/pipapi.R | 3 ++- R/utils.R | 2 +- man/estimate_type_var.Rd | 2 +- man/pipgd_lorenz_curve.Rd | 2 -- man/return_if_exists.Rd | 4 +--- 8 files changed, 10 insertions(+), 12 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index e2c3d701..3c8c6283 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -59,7 +59,10 @@ Imports: yaml, purrr, future, - glue + glue, + DBI, + duckdb, + duckplyr Remotes: PIP-Technical-Team/wbpip@DEV Depends: diff --git a/R/copy_functions.R b/R/copy_functions.R index efd9769d..018b3609 100644 --- a/R/copy_functions.R +++ b/R/copy_functions.R @@ -494,7 +494,6 @@ gd_compute_headcount_lq <- function( #' By default, the best fitting Lorenz parameterization (quadratic or beta) is #' selected. #' -#' @param params list of parameters #' @param welfare numeric vector of cumulative share of welfare (income/consumption) #' @param weight numeric vector of cumulative share of the population #' @param lorenz either "lb" or "lq" diff --git a/R/duckdb_fun.R b/R/duckdb_fun.R index 32453759..3d676dde 100644 --- a/R/duckdb_fun.R +++ b/R/duckdb_fun.R @@ -1,8 +1,7 @@ #' Return the rows of the table if they exist in master file #' -#' @param country_code Country Code #' @inheritParams subset_lkup -#' @param con Connection object +#' @param con Connection object to duckdb table #' #' @return Dataframe #' @export diff --git a/R/pipapi.R b/R/pipapi.R index f8151529..85cdb186 100644 --- a/R/pipapi.R +++ b/R/pipapi.R @@ -105,6 +105,7 @@ utils::globalVariables( "use_bin", "use_groupdata", "use_imputed", - "use_microdata" + "use_microdata", + "path" ) ) diff --git a/R/utils.R b/R/utils.R index 7d4209c9..d5456c00 100644 --- a/R/utils.R +++ b/R/utils.R @@ -410,7 +410,7 @@ censor_stats <- function(df, censored_table) { #' It also censors specific stats #' #' @param df data.table: Table to censor. -#' @param censored_table data.table: Censor table +#' @param lkup lkup value #' @keywords internal estimate_type_var <- function(df, lkup) { diff --git a/man/estimate_type_var.Rd b/man/estimate_type_var.Rd index 8ef32d77..e6e983cd 100644 --- a/man/estimate_type_var.Rd +++ b/man/estimate_type_var.Rd @@ -9,7 +9,7 @@ estimate_type_var(df, lkup) \arguments{ \item{df}{data.table: Table to censor.} -\item{censored_table}{data.table: Censor table} +\item{lkup}{lkup value} } \description{ It also censors specific stats diff --git a/man/pipgd_lorenz_curve.Rd b/man/pipgd_lorenz_curve.Rd index a5e40185..7547a5cd 100644 --- a/man/pipgd_lorenz_curve.Rd +++ b/man/pipgd_lorenz_curve.Rd @@ -15,8 +15,6 @@ pipgd_lorenz_curve(welfare = NULL, weight = NULL, lorenz = NULL, n_bins = 100) \item{n_bins}{atomic double vector of length 1: number of points on the lorenz curve} - -\item{params}{list of parameters} } \value{ Returns a list which contains: diff --git a/man/return_if_exists.Rd b/man/return_if_exists.Rd index cc181ec1..66e2457f 100644 --- a/man/return_if_exists.Rd +++ b/man/return_if_exists.Rd @@ -11,12 +11,10 @@ return_if_exists(lkup, povline, con, fill_gaps) \item{povline}{numeric: Poverty line} -\item{con}{Connection object} +\item{con}{Connection object to duckdb table} \item{fill_gaps}{logical: If set to TRUE, will interpolate / extrapolate values for missing years} - -\item{country_code}{Country Code} } \value{ Dataframe From dc8685b1a1c47238acfa0b89af6705e8e3d1eaba Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Fri, 24 Jan 2025 21:08:53 +0530 Subject: [PATCH 30/49] update caching --- vignettes/duckdb-caching.Rmd | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/vignettes/duckdb-caching.Rmd b/vignettes/duckdb-caching.Rmd index 86610ccc..16fe84aa 100644 --- a/vignettes/duckdb-caching.Rmd +++ b/vignettes/duckdb-caching.Rmd @@ -89,9 +89,7 @@ These are 6 different scenarios that can occur. Note that I have not used all th ## Code overview -We are creating a duckdb file to save our table. The location of this file is saved in an environment variable `PIP_CACHE_FILE` (Example `Sys.setenv('PIP_CACHE_FILE' = fs::path('e:/PIP/pipapi_data/demo.duckdb'))`). A table called `master_file` is created inside it where we save our cache. - -Based on `fill_gaps` parameter we call either the function `fg_pip` or `rg_pip`. Both the functions call the `subset_lkup` function to filter the data from `lkup` that is relevant to our call. In `subset_lkup` function we call the function `return_if_exists` which as the name suggests returns the data from cache if it exists. A new file called `duckdb_fun.R` has been added to manage all the functions related to duckdb. +We are creating a duckdb file to save our table. The location of this file is saved in an environment variable `PIP_CACHE_FILE` (Example `Sys.setenv('PIP_CACHE_FILE' = fs::path('e:/PIP/pipapi_data/demo.duckdb'))`). There are two tables created in the duckdb file called `rg_master_file` and `fg_master_file` based on the `fill_gaps` argument a table is selected to save and retrieve data. Based on `fill_gaps` parameter we call either the function `fg_pip` or `rg_pip`. Both the functions call the `subset_lkup` function to filter the data from `lkup` that is relevant to our call. In `subset_lkup` function we call the function `return_if_exists` which as the name suggests returns the data from cache if it exists. A new file called `duckdb_fun.R` has been added to manage all the functions related to duckdb. A named list is returned from `return_if_exists` function where it returns the final output (if it exists) from the master file and subsetted `lkup`. The partial (or full) final output is again returned as a named list from `subset_lkup` function which is used at the end to combine the two outputs. If `lkup` is non-empty then after all the calculation is done we use the function `update_master_file` to append the master file with new data. From 047b1a291f7281f59f80182ccfcb7a37c37da2b4 Mon Sep 17 00:00:00 2001 From: Ronak Sunil Shah Date: Fri, 24 Jan 2025 10:42:53 -0500 Subject: [PATCH 31/49] update --- vignettes/duckdb-caching.Rmd | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/vignettes/duckdb-caching.Rmd b/vignettes/duckdb-caching.Rmd index 16fe84aa..93e35279 100644 --- a/vignettes/duckdb-caching.Rmd +++ b/vignettes/duckdb-caching.Rmd @@ -111,8 +111,8 @@ microbenchmark::microbenchmark( ) #Unit: milliseconds -# expr min lq mean median uq max neval -# duckdb_caching 97.96608 100.8852 115.458 103.0205 107.466 1135.287 100 +# expr min lq mean median uq max neval +# duckdb_caching 138.3669 143.9853 148.3353 147.0136 152.0311 181.543 100 ``` ```{r} @@ -136,7 +136,7 @@ tictoc::toc() # 9.71 sec elapsed ## For Duckdb -# 9.75 sec elapsed +# 10.39 sec elapsed ``` ```{r} @@ -151,5 +151,5 @@ tictoc::toc() # 185.28 sec elapsed ## Duckdb caching -# 15.62 sec elapsed +# 17.97 sec elapsed ``` From fb62569831662a6df2b96d6286d51692b411b680 Mon Sep 17 00:00:00 2001 From: Ronak Sunil Shah Date: Fri, 24 Jan 2025 10:53:58 -0500 Subject: [PATCH 32/49] update timing --- vignettes/duckdb-caching.Rmd | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/vignettes/duckdb-caching.Rmd b/vignettes/duckdb-caching.Rmd index 93e35279..e89e474a 100644 --- a/vignettes/duckdb-caching.Rmd +++ b/vignettes/duckdb-caching.Rmd @@ -103,8 +103,8 @@ microbenchmark::microbenchmark( ) #Unit: microseconds -# expr min lq mean median uq max neval -# pip_DEV 593.396 628.3455 1355.73 647.4085 664.028 61786.03 100 +# expr min lq mean median uq max neval +# duckdb_DEV 628.59 669.893 2475.44 689.934 719.7505 177901 100 microbenchmark::microbenchmark( duckdb_caching = pip(country = c("AGO", "USA"), year = 2000, lkup = lkup) @@ -133,7 +133,7 @@ for(i in seq_along(country_list)) { tictoc::toc() ## For DEV version -# 9.71 sec elapsed +# 9.14 sec elapsed ## For Duckdb # 10.39 sec elapsed @@ -148,7 +148,7 @@ for(i in seq_along(country_list)) { tictoc::toc() ## DEV -# 185.28 sec elapsed +# 177.28 sec elapsed ## Duckdb caching # 17.97 sec elapsed From c2c5fbbb897498f9360dd9ce2103939f8de0d44b Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Mon, 27 Jan 2025 20:01:13 +0530 Subject: [PATCH 33/49] add updates --- DESCRIPTION | 3 ++- R/create_countries_vctr.R | 2 +- man/get_user_x_code.Rd | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 3c8c6283..b93f0bd8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -36,7 +36,8 @@ Suggests: lintr, withr, devtools, - stringr + stringr, + knitr Language: en-US Imports: data.table, diff --git a/R/create_countries_vctr.R b/R/create_countries_vctr.R index 1399bf65..230118c4 100644 --- a/R/create_countries_vctr.R +++ b/R/create_countries_vctr.R @@ -209,7 +209,7 @@ get_user_alt_gt <- function(user_gt, off_gt) { return(out) } -#' Helper function to define user_{var}_code +#' Helper function to define user_\{var\}_code #' #' @param x character: Grouping type needed by user #' diff --git a/man/get_user_x_code.Rd b/man/get_user_x_code.Rd index ad051135..fc1ba375 100644 --- a/man/get_user_x_code.Rd +++ b/man/get_user_x_code.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/create_countries_vctr.R \name{get_user_x_code} \alias{get_user_x_code} -\title{Helper function to define user_{var}_code} +\title{Helper function to define user_\{var\}_code} \usage{ get_user_x_code(x) } @@ -13,5 +13,5 @@ get_user_x_code(x) character } \description{ -Helper function to define user_{var}_code +Helper function to define user_\{var\}_code } From 82c89037083f74eb22eb2dcbc6161eeaa56bb66e Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Mon, 27 Jan 2025 20:16:40 +0530 Subject: [PATCH 34/49] Vignette builder --- .Rbuildignore | 2 ++ .gitignore | 2 ++ vignettes/debug-caching.Rmd | 6 +++++- vignettes/duckdb-caching.Rmd | 8 ++++++-- 4 files changed, 15 insertions(+), 3 deletions(-) diff --git a/.Rbuildignore b/.Rbuildignore index b3d8220f..0c8b97dd 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -12,3 +12,5 @@ TEMP/ ^docs$ ^pkgdown$ CONTRIBUTING.md +^doc$ +^Meta$ diff --git a/.gitignore b/.gitignore index e8336386..07ea1435 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,5 @@ tests/testdata/app_data/ logs/ /sessionInfoLog demo.duckdb* +/doc/ +/Meta/ diff --git a/vignettes/debug-caching.Rmd b/vignettes/debug-caching.Rmd index ce110bfe..5b82048c 100644 --- a/vignettes/debug-caching.Rmd +++ b/vignettes/debug-caching.Rmd @@ -1,8 +1,12 @@ --- title: "Debug caching and API endpoints" -output: html_document date: "2024-10-02" author: "Ronak Shah" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Debug caching and API endpoints} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} --- ## How caching works? diff --git a/vignettes/duckdb-caching.Rmd b/vignettes/duckdb-caching.Rmd index e89e474a..e774e44a 100644 --- a/vignettes/duckdb-caching.Rmd +++ b/vignettes/duckdb-caching.Rmd @@ -1,8 +1,12 @@ --- -title: "duckdb-caching" -output: html_document +title: "DuckDB Caching" date: "2024-12-26" author: "Ronak Shah" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{DuckDB Caching} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} --- ```{r setup, include=FALSE} From d054382b34d5f532a0eb0d68680eca607c9ef261 Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Fri, 7 Feb 2025 23:22:09 +0530 Subject: [PATCH 35/49] separate read and write connection --- R/pip.R | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/R/pip.R b/R/pip.R index ffe2a361..edf02c87 100644 --- a/R/pip.R +++ b/R/pip.R @@ -118,7 +118,8 @@ pip <- function(country = "ALL", # only run pip code if there is data that is not present in cache #if(nrow(result_from_cache$absent_args) > 0) { # use result_from_cache$absent_args$country_code reporting_year and poverty_line and pass it further. - con <- duckdb::dbConnect(duckdb::duckdb(), dbdir = Sys.getenv("PIP_CACHE_FILE")) + cache_file_path <- fs::path(lkup$data_root, 'cache', ext = "duckdb") + read_con <- duckdb::dbConnect(duckdb::duckdb(), dbdir = cache_file_path, read_only = TRUE) # mains estimates --------------- if (fill_gaps) { ## lineup years----------------- @@ -131,7 +132,7 @@ pip <- function(country = "ALL", reporting_level = reporting_level, ppp = ppp, lkup = lkup, - con = con + con = read_con ) } else { ## survey years ------------------ @@ -144,10 +145,14 @@ pip <- function(country = "ALL", reporting_level = reporting_level, ppp = ppp, lkup = lkup, - con = con + con = read_con ) } #browser() + # It is important to close the read connection before you open a write connection because + # duckdb kind of inherits read_only flag from previous connection object if it is not closed + # More details here https://app.clickup.com/t/868cdpe3q + duckdb::dbDisconnect(read_con) cached_data <- out$data_in_cache main_data <- out$main_data @@ -155,8 +160,9 @@ pip <- function(country = "ALL", out <- main_data |> collapse::fmutate(path = as.character(path)) |> collapse::rowbind(cached_data) - - update_master_file(main_data, con, fill_gaps) + write_con <- duckdb::dbConnect(duckdb::duckdb(), dbdir = cache_file_path) + update_master_file(main_data, write_con, fill_gaps) + dbDisconnect(write_con) } else { out <- cached_data } From f668d2e7fb97f180b05b357c4186f3b69ceccef3 Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Sun, 9 Feb 2025 16:25:20 +0530 Subject: [PATCH 36/49] added reset cache function --- R/{duckdb_fun.R => duckdb_func.R} | 16 ++++++++++++++++ man/return_if_exists.Rd | 2 +- man/update_master_file.Rd | 2 +- 3 files changed, 18 insertions(+), 2 deletions(-) rename R/{duckdb_fun.R => duckdb_func.R} (84%) diff --git a/R/duckdb_fun.R b/R/duckdb_func.R similarity index 84% rename from R/duckdb_fun.R rename to R/duckdb_func.R index 3d676dde..7cad951d 100644 --- a/R/duckdb_fun.R +++ b/R/duckdb_func.R @@ -48,3 +48,19 @@ update_master_file <- function(dat, con, fill_gaps) { return(nrow(dat)) } + + +#' Reset the cache. Only to be used internally +#' +#' @noRd +reset_cache <- function(con, type = c("both", "rg", "fg")) { + + type <- match.arg(type) + if(type == "both") type = c("rg", "fg") + if("rg" %in% type) { + DBI::dbExecute(con, "DELETE from rg_master_file") + } + if("fg" %in% type) { + DBI::dbExecute(con, "DELETE from fg_master_file") + } +} diff --git a/man/return_if_exists.Rd b/man/return_if_exists.Rd index 66e2457f..967f4689 100644 --- a/man/return_if_exists.Rd +++ b/man/return_if_exists.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/duckdb_fun.R +% Please edit documentation in R/duckdb_func.R \name{return_if_exists} \alias{return_if_exists} \title{Return the rows of the table if they exist in master file} diff --git a/man/update_master_file.Rd b/man/update_master_file.Rd index bf12345f..d2fe8ce4 100644 --- a/man/update_master_file.Rd +++ b/man/update_master_file.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/duckdb_fun.R +% Please edit documentation in R/duckdb_func.R \name{update_master_file} \alias{update_master_file} \title{Update master file with the contents of the dataframe} From 2d4c9f41b8788968e8cfdf46b05bee03dc4abb79 Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Wed, 12 Feb 2025 23:23:06 +0530 Subject: [PATCH 37/49] move connection in func;reset_cache ready for API --- R/duckdb_func.R | 21 ++++++++++++++++----- R/pip.R | 11 ++--------- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/R/duckdb_func.R b/R/duckdb_func.R index 7cad951d..bb7f048e 100644 --- a/R/duckdb_func.R +++ b/R/duckdb_func.R @@ -34,16 +34,18 @@ return_if_exists <- function(lkup, povline, con, fill_gaps) { #' Update master file with the contents of the dataframe #' @inheritParams pip #' @param dat Dataframe to be appended -#' @param con DuckDB connection object +#' @param cache_file_path path where cache file is saved #' #' @return number of rows updated #' @export #' -update_master_file <- function(dat, con, fill_gaps) { +update_master_file <- function(dat, cache_file_path, fill_gaps) { + write_con <- duckdb::dbConnect(duckdb::duckdb(), dbdir = cache_file_path) target_file <- if (fill_gaps) "fg_master_file" else "rg_master_file" duckdb::duckdb_register(con, "append_data", dat, overwrite = TRUE) DBI::dbExecute(con, glue::glue("INSERT INTO {target_file} SELECT * FROM append_data;")) + duckdb::dbDisconnect(write_con) message(glue::glue("{target_file} is updated.")) return(nrow(dat)) @@ -53,14 +55,23 @@ update_master_file <- function(dat, con, fill_gaps) { #' Reset the cache. Only to be used internally #' #' @noRd -reset_cache <- function(con, type = c("both", "rg", "fg")) { +reset_cache <- function(pass = Sys.getenv('LOCAL_KEY'), type = c("both", "rg", "fg"), lkup) { + # lkup will be passed through API and will not be an argument to endpoint, same as pip call + # Checks if the keys match across local and server before reseting the cache + if (pass != Sys.getenv('SERVER_KEY')) { + rlang::abort("Either key not set or incorrect key!") + } + + cache_file_path <- fs::path(lkup$data_root, 'cache', ext = "duckdb") + write_con <- duckdb::dbConnect(duckdb::duckdb(), dbdir = cache_file_path) type <- match.arg(type) if(type == "both") type = c("rg", "fg") if("rg" %in% type) { - DBI::dbExecute(con, "DELETE from rg_master_file") + DBI::dbExecute(write_con, "DELETE from rg_master_file") } if("fg" %in% type) { - DBI::dbExecute(con, "DELETE from fg_master_file") + DBI::dbExecute(write_con, "DELETE from fg_master_file") } + duckdb::dbDisconnect(write_con) } diff --git a/R/pip.R b/R/pip.R index edf02c87..fe5448f7 100644 --- a/R/pip.R +++ b/R/pip.R @@ -111,13 +111,7 @@ pip <- function(country = "ALL", # 4) country = "all" year = 2019 # 5) country = "AGO" year = "all" # 6) country = "all" year = "all" - # browser() - # This initialization is necessary for rowbind at the end if all the data is present in cache - #out <- NULL - # only run pip code if there is data that is not present in cache - #if(nrow(result_from_cache$absent_args) > 0) { - # use result_from_cache$absent_args$country_code reporting_year and poverty_line and pass it further. cache_file_path <- fs::path(lkup$data_root, 'cache', ext = "duckdb") read_con <- duckdb::dbConnect(duckdb::duckdb(), dbdir = cache_file_path, read_only = TRUE) # mains estimates --------------- @@ -160,9 +154,8 @@ pip <- function(country = "ALL", out <- main_data |> collapse::fmutate(path = as.character(path)) |> collapse::rowbind(cached_data) - write_con <- duckdb::dbConnect(duckdb::duckdb(), dbdir = cache_file_path) - update_master_file(main_data, write_con, fill_gaps) - dbDisconnect(write_con) + # Update cache with data + update_master_file(main_data, cache_file_path, fill_gaps) } else { out <- cached_data } From 0fd49fc52c7df103204061c44ab2638e0c64b25d Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Thu, 13 Feb 2025 22:39:37 +0530 Subject: [PATCH 38/49] option to query live data --- R/duckdb_func.R | 35 ++++++++++++++++++++--------------- R/pip.R | 9 ++++++--- 2 files changed, 26 insertions(+), 18 deletions(-) diff --git a/R/duckdb_func.R b/R/duckdb_func.R index bb7f048e..969e5d4c 100644 --- a/R/duckdb_func.R +++ b/R/duckdb_func.R @@ -9,23 +9,28 @@ return_if_exists <- function(lkup, povline, con, fill_gaps) { # It is not possible to append to parquet file https://stackoverflow.com/questions/39234391/how-to-append-data-to-an-existing-parquet-file # Writing entire data will be very costly as data keeps on growing, better is to save data in duckdb and append to it. - target_file <- if (fill_gaps) "fg_master_file" else "rg_master_file" - master_file <- DBI::dbGetQuery(con, glue::glue("select * from {target_file}")) |> - duckplyr::as_duckplyr_tibble() + if(getOption("pipapi.query_live_data", FALSE)) { + data_present_in_master <- NULL + } else { + target_file <- if (fill_gaps) "fg_master_file" else "rg_master_file" + master_file <- DBI::dbGetQuery(con, glue::glue("select * from {target_file}")) |> + duckplyr::as_duckplyr_tibble() - data_present_in_master <- duckplyr::inner_join( - master_file, lkup |> collapse::fselect(country_code, reporting_year, is_interpolated), - by = c("country_code", "reporting_year", "is_interpolated") - ) |> duckplyr::filter(poverty_line == povline) + data_present_in_master <- duckplyr::inner_join( + master_file, lkup |> collapse::fselect(country_code, reporting_year, is_interpolated), + by = c("country_code", "reporting_year", "is_interpolated") + ) |> duckplyr::filter(poverty_line == povline) - keep <- TRUE - if(nrow(data_present_in_master) > 0) { - keep <- !with(lkup, paste(country_code, reporting_year, is_interpolated)) %in% - with(data_present_in_master, paste(country_code, reporting_year, is_interpolated)) + keep <- TRUE + if(nrow(data_present_in_master) > 0) { + # Remove the rows from lkup that are present in master + keep <- !with(lkup, paste(country_code, reporting_year, is_interpolated)) %in% + with(data_present_in_master, paste(country_code, reporting_year, is_interpolated)) - lkup <- lkup[keep, ] + lkup <- lkup[keep, ] - message("Returning data from cache.") + message("Returning data from cache.") + } } # nrow(data_present_in_master) should be equal to sum(keep) return(list(data_present_in_master = data_present_in_master, lkup = lkup)) @@ -55,10 +60,10 @@ update_master_file <- function(dat, cache_file_path, fill_gaps) { #' Reset the cache. Only to be used internally #' #' @noRd -reset_cache <- function(pass = Sys.getenv('LOCAL_KEY'), type = c("both", "rg", "fg"), lkup) { +reset_cache <- function(pass = Sys.getenv('PIP_CACHE_LOCAL_KEY'), type = c("both", "rg", "fg"), lkup) { # lkup will be passed through API and will not be an argument to endpoint, same as pip call # Checks if the keys match across local and server before reseting the cache - if (pass != Sys.getenv('SERVER_KEY')) { + if (pass != Sys.getenv('PIP_CACHE_SERVER_KEY')) { rlang::abort("Either key not set or incorrect key!") } diff --git a/R/pip.R b/R/pip.R index fe5448f7..0ed6007f 100644 --- a/R/pip.R +++ b/R/pip.R @@ -142,7 +142,6 @@ pip <- function(country = "ALL", con = read_con ) } - #browser() # It is important to close the read connection before you open a write connection because # duckdb kind of inherits read_only flag from previous connection object if it is not closed # More details here https://app.clickup.com/t/868cdpe3q @@ -154,8 +153,12 @@ pip <- function(country = "ALL", out <- main_data |> collapse::fmutate(path = as.character(path)) |> collapse::rowbind(cached_data) - # Update cache with data - update_master_file(main_data, cache_file_path, fill_gaps) + # cached_data is NULL when we are querying live data in which case we don't update cache + # This will be used only for development purpose and we don't have any intention to use it in production. + if(!is.null(cached_data)) { + # Update cache with data + update_master_file(main_data, cache_file_path, fill_gaps) + } } else { out <- cached_data } From 6ed301e0d779fdb7c09b3bce8148c4fe21efdd71 Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Fri, 14 Feb 2025 23:20:47 +0530 Subject: [PATCH 39/49] add an API endpoint --- R/utils-plumber.R | 4 +++- R/utils.R | 11 +++++++++-- inst/plumber/v1/endpoints.R | 14 +++++++++++++- vignettes/new-endpoints.Rmd | 2 ++ 4 files changed, 27 insertions(+), 4 deletions(-) diff --git a/R/utils-plumber.R b/R/utils-plumber.R index 01babe3a..a5c5d2fd 100644 --- a/R/utils-plumber.R +++ b/R/utils-plumber.R @@ -112,7 +112,9 @@ validate_query_parameters <- "mean", "times_mean", "lorenz", - "n_bins" + "n_bins", + "pass", + "type" )) { params$argsQuery <- params$argsQuery[names(params$argsQuery) %in% valid_params] diff --git a/R/utils.R b/R/utils.R index d5456c00..ba7dd9e1 100644 --- a/R/utils.R +++ b/R/utils.R @@ -592,13 +592,18 @@ create_query_controls <- function(svy_lkup, type = "character") # Tables table <- list(values = aux_tables, type = "character") + + # type + type <- list(values = c("both", "rg", "fg"), type = "character") + + pass <- list(values = Sys.getenv('PIP_CACHE_SERVER_KEY'), type = "character") # parameters parameter <- list(values = c("country", "year", "povline", "popshare", "fill_gaps", "aggregate", "group_by", "welfare_type", "reporting_level", "ppp", "version", - "format", "table", "long_format"), + "format", "table", "long_format", "type", "pass"), type = "character") # cum_welfare @@ -674,7 +679,9 @@ create_query_controls <- function(svy_lkup, times_mean = times_mean, lorenz = lorenz, n_bins = n_bins, - endpoint = endpoint + endpoint = endpoint, + type = type, + pass = pass ) return(query_controls) diff --git a/inst/plumber/v1/endpoints.R b/inst/plumber/v1/endpoints.R index 7729c652..3222ec33 100644 --- a/inst/plumber/v1/endpoints.R +++ b/inst/plumber/v1/endpoints.R @@ -85,7 +85,7 @@ function(req, res) { # treated asynchronously. # 2) The introduction of PPP versioning implies having a dynamic default # poverty line - + browser() req <- pipapi:::assign_required_params(req, pl_lkup = lkups$pl_lkup) @@ -367,6 +367,18 @@ function() { } } +#* Reset DuckDB cache file +#* @get /api/v1/duckdb-reset +#* @param pass:[chr] Local password, this password is checked against the server password +#* @param type:[chr] Which table do you want to delete? Values accepted are "both", "rg" and "fg" +#* @serializer unboxedJSON +function(req, res) { + params <- req$argsQuery + params$lkup <- lkups$versions_paths[[params$version]] + params$version <- NULL + do.call(pipapi:::reset_cache, params) +} + # #* Return cache log # #* @get /api/v1/cache-log # #* @serializer print list(quote = FALSE) diff --git a/vignettes/new-endpoints.Rmd b/vignettes/new-endpoints.Rmd index df7f1fa6..e52a7e3b 100644 --- a/vignettes/new-endpoints.Rmd +++ b/vignettes/new-endpoints.Rmd @@ -18,3 +18,5 @@ The arguments are validated in function `validate_query_parameters` which has a **Values :** Validate your input values in `create_query_controls` function by adding range or list of accepted values. If the arguments is character then you need to give all possible values that it can take. If the argument is numeric, then you need to supply `min` and `max` values to ensure that the numeric values stays in range. Based on the type of argument, `check_param_chr`, `check_param_num` or `check_param_lgl` is called. This also ensures that the argument name should mean the same everywhere. So it is not possible that the same argument can have two different meaning. For example, it is not possible that the argument `requested_mean` accepts value 0 to 1 in one endpoint and `c("yes"/"no")` in another endpoint again ensuring consistency. Another thing to note is that the argument and values are available in both `req$args` as well as `req$argsQuery` however, all the validation is performed only on `argsQuery` and only `argsQuery` is used the entire API. So we suggest to continue using `argsQuery` for consistency purposes. + +Once you do these changes, don't forget to refresh the session before testing out your changes. From 80eca1d91306662163d81f118fc1c0da4efb2092 Mon Sep 17 00:00:00 2001 From: Ronak Sunil Shah Date: Wed, 19 Feb 2025 11:46:21 -0500 Subject: [PATCH 40/49] fix-fg_pip_local --- DESCRIPTION | 1 - R/duckdb_func.R | 4 ++-- man/update_master_file.Rd | 4 ++-- tests/testthat/test-fg_pip-local.R | 37 +++++++++++++++++++----------- 4 files changed, 27 insertions(+), 19 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index b93f0bd8..0133626d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -26,7 +26,6 @@ Suggests: covr, testthat, spelling, - knitr, rmarkdown, markdown, assertthat, diff --git a/R/duckdb_func.R b/R/duckdb_func.R index 969e5d4c..c08dbd30 100644 --- a/R/duckdb_func.R +++ b/R/duckdb_func.R @@ -48,8 +48,8 @@ update_master_file <- function(dat, cache_file_path, fill_gaps) { write_con <- duckdb::dbConnect(duckdb::duckdb(), dbdir = cache_file_path) target_file <- if (fill_gaps) "fg_master_file" else "rg_master_file" - duckdb::duckdb_register(con, "append_data", dat, overwrite = TRUE) - DBI::dbExecute(con, glue::glue("INSERT INTO {target_file} SELECT * FROM append_data;")) + duckdb::duckdb_register(write_con, "append_data", dat, overwrite = TRUE) + DBI::dbExecute(write_con, glue::glue("INSERT INTO {target_file} SELECT * FROM append_data;")) duckdb::dbDisconnect(write_con) message(glue::glue("{target_file} is updated.")) diff --git a/man/update_master_file.Rd b/man/update_master_file.Rd index d2fe8ce4..f4e712a7 100644 --- a/man/update_master_file.Rd +++ b/man/update_master_file.Rd @@ -4,12 +4,12 @@ \alias{update_master_file} \title{Update master file with the contents of the dataframe} \usage{ -update_master_file(dat, con, fill_gaps) +update_master_file(dat, cache_file_path, fill_gaps) } \arguments{ \item{dat}{Dataframe to be appended} -\item{con}{DuckDB connection object} +\item{cache_file_path}{path where cache file is saved} \item{fill_gaps}{logical: If set to TRUE, will interpolate / extrapolate values for missing years} diff --git a/tests/testthat/test-fg_pip-local.R b/tests/testthat/test-fg_pip-local.R index ee5523ed..c3c3ae97 100644 --- a/tests/testthat/test-fg_pip-local.R +++ b/tests/testthat/test-fg_pip-local.R @@ -13,6 +13,8 @@ lkups <- create_versioned_lkups(data_dir, vintage_pattern = latest_version) lkup <- lkups$versions_paths[[lkups$latest_release]] +con <- duckdb::dbConnect(duckdb::duckdb(), dbdir = fs::path(lkup$data_root, "cache", ext = "duckdb")) + local_mocked_bindings( get_caller_names = function() c("else") ) @@ -28,10 +30,11 @@ test_that("Imputation is working for extrapolated aggregated distribution", { welfare_type = "all", reporting_level = "all", ppp = NULL, - lkup = lkup + lkup = lkup, + con = con ) - expect_equal(nrow(tmp), 2) + expect_equal(nrow(tmp$main_data), 2) tmp <- fg_pip( country = "CHN", @@ -41,10 +44,11 @@ test_that("Imputation is working for extrapolated aggregated distribution", { welfare_type = "all", reporting_level = "national", ppp = NULL, - lkup = lkup + lkup = lkup, + con = con ) - expect_equal(nrow(tmp), 2) + expect_equal(nrow(tmp$main_data), 2) }) ## Interpolation ---- @@ -57,10 +61,11 @@ test_that("Imputation is working for interpolated mixed distribution", { welfare_type = "all", reporting_level = "all", ppp = NULL, - lkup = lkup + lkup = lkup, + con = con ) - expect_equal(nrow(tmp), 2) + expect_equal(nrow(tmp$main_data), 2) tmp <- fg_pip( country = "IND", @@ -70,10 +75,11 @@ test_that("Imputation is working for interpolated mixed distribution", { welfare_type = "all", reporting_level = "national", ppp = NULL, - lkup = lkup + lkup = lkup, + con = con ) - expect_equal(nrow(tmp), 2) + expect_equal(nrow(tmp$main_data), 2) }) test_that("Imputation is working for interpolated aggregate distribution", { @@ -85,10 +91,11 @@ test_that("Imputation is working for interpolated aggregate distribution", { welfare_type = "all", reporting_level = "all", ppp = NULL, - lkup = lkup + lkup = lkup, + con = con ) - expect_equal(nrow(tmp), 2) + expect_equal(nrow(tmp$main_data), 2) tmp <- fg_pip( country = "CHN", @@ -98,10 +105,11 @@ test_that("Imputation is working for interpolated aggregate distribution", { welfare_type = "all", reporting_level = "national", ppp = NULL, - lkup = lkup + lkup = lkup, + con = con ) - expect_equal(nrow(tmp), 2) + expect_equal(nrow(tmp$main_data), 2) }) @@ -150,9 +158,10 @@ tmp <- fg_pip( welfare_type = "all", reporting_level = "all", ppp = NULL, - lkup = lkup + lkup = lkup, + con = con ) - +tmp <- tmp$main_data # dt <- pip(country = "ALL", # lkup = lkup, # povline = 2.15, From a6cec561e14af5e69a6b56d21a19b13afbdedd5f Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Thu, 20 Feb 2025 22:40:21 +0530 Subject: [PATCH 41/49] early response for empty table --- R/pip.R | 183 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 91 insertions(+), 92 deletions(-) diff --git a/R/pip.R b/R/pip.R index 0ed6007f..c903aad9 100644 --- a/R/pip.R +++ b/R/pip.R @@ -166,124 +166,123 @@ pip <- function(country = "ALL", setDT(out) } # Early return for empty table--------------- - if (nrow(out) > 0) { - # aggregate distributions ------------------ - if (reporting_level %in% c("national", "all")) { - out <- add_agg_stats( - df = out, - return_cols = lkup$return_cols$ag_average_poverty_stats - ) - if (reporting_level == "national") { - out <- out[reporting_level == "national"] - } + if (nrow(out) == 0) return(pipapi::empty_response) + + # aggregate distributions ------------------ + if (reporting_level %in% c("national", "all")) { + out <- add_agg_stats( + df = out, + return_cols = lkup$return_cols$ag_average_poverty_stats + ) + if (reporting_level == "national") { + out <- out[reporting_level == "national"] } + } - add_vars_out_of_pipeline(out, fill_gaps = fill_gaps, lkup = lkup) + add_vars_out_of_pipeline(out, fill_gaps = fill_gaps, lkup = lkup) - # **** TO BE REMOVED **** REMOVAL STARTS HERE - # Once `pip-grp` has been integrated in ingestion pipeline - # Handles grouped aggregations - if (group_by != "none") { - # Handle potential (insignificant) difference in poverty_line values that - # may mess-up the grouping - out$poverty_line <- povline + # **** TO BE REMOVED **** REMOVAL STARTS HERE + # Once `pip-grp` has been integrated in ingestion pipeline + # Handles grouped aggregations + if (group_by != "none") { + # Handle potential (insignificant) difference in poverty_line values that + # may mess-up the grouping + out$poverty_line <- povline - out <- pip_aggregate_by( - df = out, - group_lkup = lkup[["pop_region"]], - return_cols = lkup$return_cols$pip_grp - ) - # Censor regional values - if (censor) { - out <- censor_rows(out, lkup[["censored"]], type = "regions") - } - - out <- out[, c("region_name", - "region_code", - "reporting_year", - "reporting_pop", - "poverty_line", - "headcount", - "poverty_gap", - "poverty_severity", - "watts", - "mean", - "pop_in_poverty")] - - return(out) + out <- pip_aggregate_by( + df = out, + group_lkup = lkup[["pop_region"]], + return_cols = lkup$return_cols$pip_grp + ) + # Censor regional values + if (censor) { + out <- censor_rows(out, lkup[["censored"]], type = "regions") } - # **** TO BE REMOVED **** REMOVAL ENDS HERE + out <- out[, c("region_name", + "region_code", + "reporting_year", + "reporting_pop", + "poverty_line", + "headcount", + "poverty_gap", + "poverty_severity", + "watts", + "mean", + "pop_in_poverty")] + + return(out) + } + # **** TO BE REMOVED **** REMOVAL ENDS HERE - # pre-computed distributional stats --------------- - crr_names <- names(out) # current variables - names2keep <- lkup$return_cols$pip$cols # all variables - out <- add_dist_stats( - df = out, - dist_stats = lkup[["dist_stats"]] - ) + # pre-computed distributional stats --------------- + crr_names <- names(out) # current variables + names2keep <- lkup$return_cols$pip$cols # all variables - # Add aggregate medians ---------------- - out <- add_agg_medians( - df = out, - fill_gaps = fill_gaps, - data_dir = lkup$data_root - ) + out <- add_dist_stats( + df = out, + dist_stats = lkup[["dist_stats"]] + ) - # format ---------------- + # Add aggregate medians ---------------- + out <- add_agg_medians( + df = out, + fill_gaps = fill_gaps, + data_dir = lkup$data_root + ) + # format ---------------- - if (fill_gaps) { - ## Inequality indicators to NA for lineup years ---- - dist_vars <- names2keep[!(names2keep %in% crr_names)] - out[, - (dist_vars) := NA_real_] + if (fill_gaps) { - ## estimate_var ----- - out <- estimate_type_ctr_lnp(out, lkup) + ## Inequality indicators to NA for lineup years ---- + dist_vars <- names2keep[!(names2keep %in% crr_names)] + out[, + (dist_vars) := NA_real_] - } else { - out[, estimate_type := NA_character_] - } - ## Handle survey coverage ------------ - if (reporting_level != "all") { - keep <- out$reporting_level == reporting_level - out <- out[keep, ] - } + ## estimate_var ----- + out <- estimate_type_ctr_lnp(out, lkup) - # Censor country values - if (censor) { - out <- censor_rows(out, lkup[["censored"]], type = "countries") - } + } else { + out[, estimate_type := NA_character_] + } + ## Handle survey coverage ------------ + if (reporting_level != "all") { + keep <- out$reporting_level == reporting_level + out <- out[keep, ] + } + # Censor country values + if (censor) { + out <- censor_rows(out, lkup[["censored"]], type = "countries") + } - # Select columns - if (additional_ind) { - get_additional_indicators(out) - added_names <- attr(out, "new_indicators_names") - names2keep <- c(names2keep, added_names) - } - # Keep relevant variables - out <- out[, .SD, .SDcols = names2keep] + # Select columns + if (additional_ind) { + get_additional_indicators(out) + added_names <- attr(out, "new_indicators_names") + names2keep <- c(names2keep, added_names) + } + # Keep relevant variables + out <- out[, .SD, .SDcols = names2keep] - # make sure we always report the same precision in all numeric variables - doub_vars <- - names(out)[unlist(lapply(out, is.double))] |> - data.table::copy() - out[, (doub_vars) := lapply(.SD, round, digits = 12), - .SDcols = doub_vars] + # make sure we always report the same precision in all numeric variables + doub_vars <- + names(out)[unlist(lapply(out, is.double))] |> + data.table::copy() - # Order rows by country code and reporting year - data.table::setorder(out, country_code, reporting_year, reporting_level, welfare_type) + out[, (doub_vars) := lapply(.SD, round, digits = 12), + .SDcols = doub_vars] - } + # Order rows by country code and reporting year + data.table::setorder(out, country_code, reporting_year, reporting_level, welfare_type) #} # Make sure no duplicate remains From f75b0119ead443b9c328c25372b22bcbeda73e69 Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda" Date: Thu, 20 Feb 2025 22:09:35 -0500 Subject: [PATCH 42/49] add test file for testing cache --- inst/TMP/TMP_duckdb_cache.R | 38 +++++++++++++++++++++++++++++++++++++ pipapi.Rproj | 1 - 2 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 inst/TMP/TMP_duckdb_cache.R diff --git a/inst/TMP/TMP_duckdb_cache.R b/inst/TMP/TMP_duckdb_cache.R new file mode 100644 index 00000000..e37562d9 --- /dev/null +++ b/inst/TMP/TMP_duckdb_cache.R @@ -0,0 +1,38 @@ +devtools::load_all(".") +force <- FALSE +if (!"lkups" %in% ls() || isTRUE(force)) { + data_dir <- Sys.getenv("PIPAPI_DATA_ROOT_FOLDER_LOCAL") |> + fs::path() + fs::dir_ls(data_dir, recurse = FALSE) +} + + +latest_version <- + pipapi:::available_versions(data_dir) |> + max() + +latest_version <- NULL +latest_version <- "20240627_2017_01_02_PROD" +lkups <- create_versioned_lkups(data_dir, + vintage_pattern = latest_version) + +lkup <- lkups$versions_paths[[lkups$latest_release]] + + +reset_cache(lkup = lkup) + + + +# 1. +pip(country = "all", year = 2000, lkup = lkup) + +# 2. +pip(country = "AGO", year = 2000, lkup = lkup) + + +pip(country = "all", year = "all", lkup = lkup) + + +pip(country = "IND", year = 2018, lkup = lkup) + +pip(country = "IND", year = "all", lkup = lkup) diff --git a/pipapi.Rproj b/pipapi.Rproj index 20664e2d..4e3ca1bc 100644 --- a/pipapi.Rproj +++ b/pipapi.Rproj @@ -1,5 +1,4 @@ Version: 1.0 -ProjectId: cf4563bf-f15d-4f36-a587-51bf840c7f41 RestoreWorkspace: No SaveWorkspace: No From 344d001722760ced247ebe63667b398ddad696ea Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda" Date: Fri, 21 Feb 2025 13:51:38 -0500 Subject: [PATCH 43/49] add default pipapi.query_live_data option and clean it up a little --- R/duckdb_func.R | 23 ++++++++++++++--------- R/zzz.R | 12 ++++++++++++ 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/R/duckdb_func.R b/R/duckdb_func.R index c08dbd30..2e349c9c 100644 --- a/R/duckdb_func.R +++ b/R/duckdb_func.R @@ -9,20 +9,23 @@ return_if_exists <- function(lkup, povline, con, fill_gaps) { # It is not possible to append to parquet file https://stackoverflow.com/questions/39234391/how-to-append-data-to-an-existing-parquet-file # Writing entire data will be very costly as data keeps on growing, better is to save data in duckdb and append to it. - if(getOption("pipapi.query_live_data", FALSE)) { - data_present_in_master <- NULL - } else { + if (getOption("pipapi.query_live_data")) { target_file <- if (fill_gaps) "fg_master_file" else "rg_master_file" - master_file <- DBI::dbGetQuery(con, glue::glue("select * from {target_file}")) |> + + master_file <- DBI::dbGetQuery(con, + glue::glue("select * from {target_file}")) |> duckplyr::as_duckplyr_tibble() - data_present_in_master <- duckplyr::inner_join( - master_file, lkup |> collapse::fselect(country_code, reporting_year, is_interpolated), - by = c("country_code", "reporting_year", "is_interpolated") - ) |> duckplyr::filter(poverty_line == povline) + data_present_in_master <- + duckplyr::inner_join( + x = master_file, + y = lkup |> + collapse::fselect(country_code, reporting_year, is_interpolated), + by = c("country_code", "reporting_year", "is_interpolated")) |> + duckplyr::filter(poverty_line == povline) keep <- TRUE - if(nrow(data_present_in_master) > 0) { + if (nrow(data_present_in_master) > 0) { # Remove the rows from lkup that are present in master keep <- !with(lkup, paste(country_code, reporting_year, is_interpolated)) %in% with(data_present_in_master, paste(country_code, reporting_year, is_interpolated)) @@ -31,6 +34,8 @@ return_if_exists <- function(lkup, povline, con, fill_gaps) { message("Returning data from cache.") } + } else { + data_present_in_master <- NULL } # nrow(data_present_in_master) should be equal to sum(keep) return(list(data_present_in_master = data_present_in_master, lkup = lkup)) diff --git a/R/zzz.R b/R/zzz.R index d0d7dd8c..bbba132e 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,3 +1,8 @@ +pipapi_default_options <- list( + pipapi.query_live_data = FALSE +) + + .onLoad <- function(libname, pkgname) { if (Sys.getenv("PIPAPI_APPLY_CACHING") == "TRUE") { d <- rappdirs::user_cache_dir("pipapi") @@ -21,5 +26,12 @@ assign("cd", cd, envir = as.environment(pos)) packageStartupMessage("Info: Disk based caching is enabled.") } + + op <- options() + toset <- !(names(pipapi_default_options) %in% names(op)) + if (any(toset)) options(pipapi_default_options[toset]) + + invisible() + } From f37d565bbf7f7440457688f577b8888f70a9cc18 Mon Sep 17 00:00:00 2001 From: Ronak Sunil Shah Date: Tue, 25 Feb 2025 12:52:47 -0500 Subject: [PATCH 44/49] create file if it doesn't exist --- R/duckdb_func.R | 115 ++++++++++++++++++++++++++++++++++++++++++++++++ R/pip.R | 4 ++ 2 files changed, 119 insertions(+) diff --git a/R/duckdb_func.R b/R/duckdb_func.R index c08dbd30..c8d305e4 100644 --- a/R/duckdb_func.R +++ b/R/duckdb_func.R @@ -80,3 +80,118 @@ reset_cache <- function(pass = Sys.getenv('PIP_CACHE_LOCAL_KEY'), type = c("both } duckdb::dbDisconnect(write_con) } + +create_duckdb_file <- function(cache_file_path) { + con <- duckdb::dbConnect(duckdb::duckdb(), dbdir = cache_file_path) + DBI::dbExecute(con, "CREATE OR REPLACE table rg_master_file ( + country_code VARCHAR, + survey_id VARCHAR, + cache_id VARCHAR, + wb_region_code VARCHAR, + reporting_year DOUBLE, + surveyid_year VARCHAR, + survey_year DOUBLE, + survey_time VARCHAR, + survey_acronym VARCHAR, + survey_coverage VARCHAR, + survey_comparability DOUBLE, + comparable_spell VARCHAR, + welfare_type VARCHAR, + reporting_level VARCHAR, + survey_mean_lcu DOUBLE, + survey_mean_ppp DOUBLE, + survey_median_ppp DOUBLE, + survey_median_lcu DOUBLE, + predicted_mean_ppp DOUBLE, + ppp DOUBLE, + cpi DOUBLE, + reporting_pop DOUBLE, + reporting_gdp DOUBLE, + reporting_pce DOUBLE, + pop_data_level VARCHAR, + gdp_data_level VARCHAR, + pce_data_level VARCHAR, + cpi_data_level VARCHAR, + ppp_data_level VARCHAR, + distribution_type VARCHAR, + gd_type VARCHAR, + is_interpolated BOOLEAN, + is_used_for_line_up BOOLEAN, + is_used_for_aggregation BOOLEAN, + estimation_type VARCHAR, + display_cp DOUBLE, + path VARCHAR, + country_name VARCHAR, + africa_split VARCHAR, + africa_split_code VARCHAR, + region_name VARCHAR, + region_code VARCHAR, + world VARCHAR, + world_code VARCHAR, + poverty_line DOUBLE, + mean DOUBLE, + median DOUBLE, + headcount DOUBLE, + poverty_gap DOUBLE, + poverty_severity DOUBLE, + watts DOUBLE + + )") + + DBI::dbExecute(con, "CREATE OR REPLACE table fg_master_file ( + country_code VARCHAR, + survey_id VARCHAR, + cache_id VARCHAR, + wb_region_code VARCHAR, + reporting_year DOUBLE, + surveyid_year VARCHAR, + survey_year DOUBLE, + survey_time VARCHAR, + survey_acronym VARCHAR, + survey_coverage VARCHAR, + survey_comparability DOUBLE, + comparable_spell VARCHAR, + welfare_type VARCHAR, + reporting_level VARCHAR, + survey_mean_lcu DOUBLE, + survey_mean_ppp DOUBLE, + survey_median_ppp DOUBLE, + survey_median_lcu DOUBLE, + predicted_mean_ppp DOUBLE, + ppp DOUBLE, + cpi DOUBLE, + reporting_pop DOUBLE, + reporting_gdp DOUBLE, + reporting_pce DOUBLE, + pop_data_level VARCHAR, + gdp_data_level VARCHAR, + pce_data_level VARCHAR, + cpi_data_level VARCHAR, + ppp_data_level VARCHAR, + distribution_type VARCHAR, + gd_type VARCHAR, + is_interpolated BOOLEAN, + is_used_for_line_up BOOLEAN, + is_used_for_aggregation BOOLEAN, + estimation_type VARCHAR, + interpolation_id VARCHAR, + display_cp DOUBLE, + country_name VARCHAR, + africa_split VARCHAR, + africa_split_code VARCHAR, + region_name VARCHAR, + region_code VARCHAR, + world VARCHAR, + world_code VARCHAR, + path VARCHAR, + data_interpolation_id VARCHAR, + poverty_line DOUBLE, + mean DOUBLE, + median DOUBLE, + headcount DOUBLE, + poverty_gap DOUBLE, + poverty_severity DOUBLE, + watts DOUBLE + )") + DBI::dbDisconnect(con) +} diff --git a/R/pip.R b/R/pip.R index c903aad9..cfd3504b 100644 --- a/R/pip.R +++ b/R/pip.R @@ -113,6 +113,10 @@ pip <- function(country = "ALL", # 6) country = "all" year = "all" cache_file_path <- fs::path(lkup$data_root, 'cache', ext = "duckdb") + if (!file.exists(cache_file_path)) { + # Create an empty duckdb file + create_duckdb_file(cache_file_path) + } read_con <- duckdb::dbConnect(duckdb::duckdb(), dbdir = cache_file_path, read_only = TRUE) # mains estimates --------------- if (fill_gaps) { From 3d8b3d087c5eb8e222b46185241962f9b1abf1f2 Mon Sep 17 00:00:00 2001 From: Ronak Sunil Shah Date: Tue, 25 Feb 2025 22:27:16 -0500 Subject: [PATCH 45/49] lineup_year issue solve --- R/fg_pip.R | 3 ++- R/pip_grp.R | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/R/fg_pip.R b/R/fg_pip.R index 20a6962c..15518f4e 100644 --- a/R/fg_pip.R +++ b/R/fg_pip.R @@ -73,7 +73,8 @@ fg_pip <- function(country, data_dir = data_dir) # Join because some data might be coming from cache so it might be absent in metadata - ctry_years <- collapse::join(ctry_years, metadata |> collapse::fselect(names(ctry_years)), + ctry_years <- collapse::join(ctry_years, metadata |> + collapse::fselect(intersect(names(ctry_years), names(metadata))), verbose = 0,how = "inner") results_subset <- vector(mode = "list", length = nrow(ctry_years)) diff --git a/R/pip_grp.R b/R/pip_grp.R index 94aa98a4..87a149db 100644 --- a/R/pip_grp.R +++ b/R/pip_grp.R @@ -59,7 +59,8 @@ pip_grp <- function(country = "ALL", reporting_level = reporting_level, ppp = NULL, lkup = lkup) - + # For now just rowbinding two dataframes, but we would need to use it more smartly in the future + out <- collapse::rowbind(out) # return empty dataframe if no metadata is found if (nrow(out) == 0) { return(pipapi::empty_response_grp) From 8514b1b6be7ca59c2e1ff220675f757a72ed8fb2 Mon Sep 17 00:00:00 2001 From: Ronak Sunil Shah Date: Wed, 26 Feb 2025 03:56:52 -0500 Subject: [PATCH 46/49] fix tests --- R/pip_grp_logic.R | 2 ++ tests/testthat/test-ui_poverty_indicators.R | 2 +- tests/testthat/test-utils.R | 12 ++++++------ 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/R/pip_grp_logic.R b/R/pip_grp_logic.R index c21e43ea..60369816 100644 --- a/R/pip_grp_logic.R +++ b/R/pip_grp_logic.R @@ -84,6 +84,8 @@ pip_grp_logic <- function(country = "ALL", lkup = lkup ) + # For now just rowbinding two dataframes, but we would need to use it more smartly in the future + fg_pip_master <- collapse::rowbind(fg_pip_master) add_vars_out_of_pipeline(fg_pip_master, fill_gaps = TRUE, lkup = lkup) if (lcv$off_alt_agg == "both") { diff --git a/tests/testthat/test-ui_poverty_indicators.R b/tests/testthat/test-ui_poverty_indicators.R index b4422c97..d209887e 100644 --- a/tests/testthat/test-ui_poverty_indicators.R +++ b/tests/testthat/test-ui_poverty_indicators.R @@ -20,7 +20,7 @@ test_that("ui_pc_charts() works as expected", { povline = 1.9, lkup = lkups) expect_equal(class(res), c("data.table", "data.frame")) - expect_equal(names(res), lkups$return_cols$ui_pc_charts$cols) + expect_equal(names(res), setdiff(lkups$return_cols$ui_pc_charts$cols, "estimate_type")) expect_equal(nrow(res), nrow(lkups$svy_lkup[country_code == "AGO"])) }, get_caller_names = function() c("else") diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index da38fa01..5ec627d5 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -68,7 +68,7 @@ test_that("subset_lkup correctly selects all countries", { valid_regions = valid_regions, data_dir = data_dir) - expect_equal(nrow(tmp), nrow(ref_lkup)) + expect_equal(nrow(tmp$lkup), nrow(ref_lkup)) }) test_that("subset_lkup correctly selects countries", { @@ -81,7 +81,7 @@ test_that("subset_lkup correctly selects countries", { valid_regions = valid_regions, data_dir = data_dir) - expect_equal(sort(unique(tmp$country_code)), sort(selection)) + expect_equal(sort(unique(tmp$lkup$country_code)), sort(selection)) }) test_that("subset_lkup correctly selects single regions", { @@ -94,7 +94,7 @@ test_that("subset_lkup correctly selects single regions", { valid_regions = valid_regions, data_dir = data_dir) - expect_equal(sort(unique(tmp$region_code)), sort(selection)) + expect_equal(sort(unique(tmp$lkup$region_code)), sort(selection)) }) test_that("subset_lkup correctly selects multiple regions", { @@ -107,7 +107,7 @@ test_that("subset_lkup correctly selects multiple regions", { valid_regions = valid_regions, data_dir = data_dir) - expect_equal(sort(unique(tmp$region_code)), sort(selection)) + expect_equal(sort(unique(tmp$lkup$region_code)), sort(selection)) }) test_that("subset_lkup correctly selects countries and regions", { @@ -125,9 +125,9 @@ test_that("subset_lkup correctly selects countries and regions", { data_dir = data_dir) # Regions are selected - expect_true(all(region_selection %in% (unique(tmp$region_code)))) + expect_true(all(region_selection %in% (unique(tmp$lkup$region_code)))) # Countries are selected - expect_true(all(country_selection %in% (unique(tmp$country_code)))) + expect_true(all(country_selection %in% (unique(tmp$lkup$country_code)))) }) # select_country() test suite From 5afb426969865b3d02be3ff99ffe91fe637b97a3 Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Fri, 28 Feb 2025 20:26:34 +0530 Subject: [PATCH 47/49] change condition --- R/duckdb_func.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/duckdb_func.R b/R/duckdb_func.R index 723cfe5e..28d91773 100644 --- a/R/duckdb_func.R +++ b/R/duckdb_func.R @@ -9,7 +9,7 @@ return_if_exists <- function(lkup, povline, con, fill_gaps) { # It is not possible to append to parquet file https://stackoverflow.com/questions/39234391/how-to-append-data-to-an-existing-parquet-file # Writing entire data will be very costly as data keeps on growing, better is to save data in duckdb and append to it. - if (getOption("pipapi.query_live_data")) { + if (!getOption("pipapi.query_live_data")) { target_file <- if (fill_gaps) "fg_master_file" else "rg_master_file" master_file <- DBI::dbGetQuery(con, From c0f30aaaedf8ecc705298361fa5275f6c3f75b42 Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Thu, 6 Mar 2025 21:40:46 +0530 Subject: [PATCH 48/49] depend on latest duckplyr --- DESCRIPTION | 3 ++- R/duckdb_func.R | 4 ++-- pipapi.Rproj | 1 + 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 0133626d..75972c3d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -36,7 +36,8 @@ Suggests: withr, devtools, stringr, - knitr + knitr, + dplyr Language: en-US Imports: data.table, diff --git a/R/duckdb_func.R b/R/duckdb_func.R index 28d91773..8960c821 100644 --- a/R/duckdb_func.R +++ b/R/duckdb_func.R @@ -17,12 +17,12 @@ return_if_exists <- function(lkup, povline, con, fill_gaps) { duckplyr::as_duckplyr_tibble() data_present_in_master <- - duckplyr::inner_join( + dplyr::inner_join( x = master_file, y = lkup |> collapse::fselect(country_code, reporting_year, is_interpolated), by = c("country_code", "reporting_year", "is_interpolated")) |> - duckplyr::filter(poverty_line == povline) + dplyr::filter(poverty_line == povline) keep <- TRUE if (nrow(data_present_in_master) > 0) { diff --git a/pipapi.Rproj b/pipapi.Rproj index 4e3ca1bc..7a441a72 100644 --- a/pipapi.Rproj +++ b/pipapi.Rproj @@ -1,4 +1,5 @@ Version: 1.0 +ProjectId: 3fab55c7-e5d2-495a-8d65-ef16b44733fe RestoreWorkspace: No SaveWorkspace: No From 1623ca8c2bc6df8e62cd4c73e60215f4d3173b47 Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Wed, 12 Mar 2025 22:54:37 +0530 Subject: [PATCH 49/49] update vignette --- vignettes/duckdb-caching.Rmd | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/vignettes/duckdb-caching.Rmd b/vignettes/duckdb-caching.Rmd index e774e44a..fe5749c8 100644 --- a/vignettes/duckdb-caching.Rmd +++ b/vignettes/duckdb-caching.Rmd @@ -15,7 +15,7 @@ knitr::opts_chunk$set(eval = FALSE, echo = TRUE) ## Introduction -Current caching mechanism for pip uses traditional caching where basically a hash is created based on the value of the arguments passed in the function and if someone calls the same function with the same arguments again the cached result is returned instead of doing the same calculation again. For `pip` we used the packages `cachem` and `memoise` to implement this system of caching. This traditional caching strategy works well in general however, `pip` is a special case and it would benefit much more if it had a custom strategy for caching. +Current caching mechanism for pip uses traditional caching where basically a hash is created based on the value of the arguments passed in the function and if someone calls the same function with the same arguments again the cached result is returned instead of doing the same calculation again. For `pip` we used the packages `cachem` and `memoise` to implement this system of caching. We will call this caching system as traditional caching strategy. This traditional caching strategy works well in general however, `pip` is a special case and it would benefit much more if it had a custom strategy for caching. ## How caching currently works? @@ -37,7 +37,7 @@ However, notice that the 2nd call is subset of the 1st one. What I mean by that What if we could take subset of an existing cache like how we need it as above. However, this is not how traditional caching systems work so there is no ready-made solution available. We would need to implement this logic from scratch if we want to make this work. -We came up with an idea to implement this custom caching using `duckdb` in a table. Basically, all the queries that are called till now are saved in the table and whenever a new call is made it checks if the result already exists in the table, if yes then it returns the result immediately or else it will do the calculation and then save the result to the table for next use and return the result. There are various scenarios that we need to consider to understand this approach. Let's take help of an example to understand each one of them. +We came up with an idea to implement this custom caching using `duckdb` where we save the output in a table. Basically, all the queries that are called till now are saved in the table and whenever a new call is made it checks if the result already exists in the table, if yes then it returns the result immediately or else it will do the calculation and then save the result to the table for next use and return the result. There are various scenarios that we need to consider to understand this approach. Let's take help of an example to understand each one of them. Consider that we are just starting out and there is nothing saved in the table and it's empty. @@ -47,7 +47,7 @@ Consider that we are just starting out and there is nothing saved in the table a pip(country = c("AGO", "USA"), year = 2000, lkup = lkup) ``` -Since the table is empty, this call will do the calculation and save the result in the table for future use and return the output. +Since the table is empty, this call will do all the calculation and save the result in the table for future use and return the output. #### Scenario 2 - @@ -93,9 +93,9 @@ These are 6 different scenarios that can occur. Note that I have not used all th ## Code overview -We are creating a duckdb file to save our table. The location of this file is saved in an environment variable `PIP_CACHE_FILE` (Example `Sys.setenv('PIP_CACHE_FILE' = fs::path('e:/PIP/pipapi_data/demo.duckdb'))`). There are two tables created in the duckdb file called `rg_master_file` and `fg_master_file` based on the `fill_gaps` argument a table is selected to save and retrieve data. Based on `fill_gaps` parameter we call either the function `fg_pip` or `rg_pip`. Both the functions call the `subset_lkup` function to filter the data from `lkup` that is relevant to our call. In `subset_lkup` function we call the function `return_if_exists` which as the name suggests returns the data from cache if it exists. A new file called `duckdb_fun.R` has been added to manage all the functions related to duckdb. +We are creating a duckdb file to save our table. This file is specific to one release and is saved in the root of the release folder with name `cache.duckdb`. There are two tables created in the duckdb file called `rg_master_file` and `fg_master_file` based on the `fill_gaps` argument a table is selected to save and retrieve data. Based on `fill_gaps` parameter we call either the function `fg_pip` or `rg_pip`. Both the functions call the `subset_lkup` function to filter the data from `lkup` that is relevant to our call. In `subset_lkup` function we call the function `return_if_exists` which as the name suggests returns the data from cache if it exists. A new file called `duckdb_fun.R` has been added to manage all the functions related to duckdb. -A named list is returned from `return_if_exists` function where it returns the final output (if it exists) from the master file and subsetted `lkup`. The partial (or full) final output is again returned as a named list from `subset_lkup` function which is used at the end to combine the two outputs. If `lkup` is non-empty then after all the calculation is done we use the function `update_master_file` to append the master file with new data. +A named list is returned from `return_if_exists` function where it returns the final output (if it exists) from the master file and subsetted `lkup`. The partial (or full) final output is again returned as a named list from `subset_lkup` function which is used at the end to combine the two outputs. If `lkup` is non-empty then after all the calculation is done we use the function `update_master_file` to append the master file with new data. If we are running the function for the first time in the release the code also has a provision to create an empty `cache.duckdb` file with two tables. ## Speed comparison @@ -107,16 +107,16 @@ microbenchmark::microbenchmark( ) #Unit: microseconds -# expr min lq mean median uq max neval -# duckdb_DEV 628.59 669.893 2475.44 689.934 719.7505 177901 100 +# expr min lq mean median uq max neval +# duckdb_DEV 830.463 899.872 2241.876 918.446 954.1285 116429.3 100 microbenchmark::microbenchmark( duckdb_caching = pip(country = c("AGO", "USA"), year = 2000, lkup = lkup) ) #Unit: milliseconds -# expr min lq mean median uq max neval -# duckdb_caching 138.3669 143.9853 148.3353 147.0136 152.0311 181.543 100 +# expr min lq mean median uq max neval +# duckdb_caching 161.6227 170.5818 185.5906 175.3116 184.9512 793.8183 100 ``` ```{r} @@ -137,10 +137,10 @@ for(i in seq_along(country_list)) { tictoc::toc() ## For DEV version -# 9.14 sec elapsed +# 16.36 sec elapsed ## For Duckdb -# 10.39 sec elapsed +#18.17 sec elapsed ``` ```{r} @@ -152,8 +152,8 @@ for(i in seq_along(country_list)) { tictoc::toc() ## DEV -# 177.28 sec elapsed +# 403.38 sec elapsed ## Duckdb caching -# 17.97 sec elapsed +# 33.53 sec elapsed ```