diff --git a/.Rbuildignore b/.Rbuildignore index 330a86d..6b79329 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,4 +1,6 @@ ^renv$ +^renv/ +^\.renv$ ^renv\.lock$ ^climateapi\.Rproj$ ^\.Rproj\.user$ @@ -8,3 +10,4 @@ ^docs$ ^pkgdown$ ^\.github$ +^temporary-scripts$ diff --git a/.Rprofile b/.Rprofile deleted file mode 100644 index 81b960f..0000000 --- a/.Rprofile +++ /dev/null @@ -1 +0,0 @@ -source("renv/activate.R") diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..652e077 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,27 @@ +# climateapi Package Development Notes + +## Performance-Critical Functions + +The following functions rely on large input datasets and are slow. Speed optimizations using `duckplyr`, `parquet`, `arrow`, and `tidytable` are critical for these functions: + +- `get_ihp_registrations()` - IHP registration data can be very large (millions of records) +- `get_nfip_policies()` - NFIP policy data exceeds 80 million records nationally +- `get_nfip_claims()` - NFIP claims data exceeds 2 million records + +### Testing Strategy for Large-Data Functions + +Tests for these functions load data once at the top of the test file and reuse that object for all success tests. This avoids repeated I/O during test runs. Validation tests (expected to fail) call the function directly without using the cached data object. + +### Performance Considerations + +When modifying these functions: +- Prefer `arrow::read_parquet()` over CSV reads +- Use `tidytable` or `dtplyr` for grouped operations on large data +- Avoid loading full datasets into memory when filtering is possible +- Consider chunked processing for extremely large files + +## Testing Philosophy + +**Do not create skip functions for unavailable dependencies.** If a test requires a package (like `tidycensus`) or a resource (like Box), that dependency should be available when tests run. If something is missing, that's a real problem to fix, not work around with skip logic. + +The only acceptable skip pattern is for tests that require external data sources that legitimately may not be configured in all environments (e.g., Box path for large data files). Even then, the validation and signature tests should still run. diff --git a/DESCRIPTION b/DESCRIPTION index def567b..179d915 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -60,11 +60,12 @@ Remotes: UI-Research/urbnindicators, UrbanInstitute/urbnthemes URL: https://ui-research.github.io/climateapi/ -Suggests: +Suggests: knitr, qualtRics, rmarkdown, testthat (>= 3.0.0), tidyverse +Config/testthat/edition: 3 VignetteBuilder: knitr Config/testthat/edition: 3 diff --git a/R/CLAUDE.md b/R/CLAUDE.md new file mode 100644 index 0000000..adfdcb1 --- /dev/null +++ b/R/CLAUDE.md @@ -0,0 +1,7 @@ + +# Recent Activity + + + +*No recent activity* + \ No newline at end of file diff --git a/R/cache_it.R b/R/cache_it.R index a66a423..267f284 100644 --- a/R/cache_it.R +++ b/R/cache_it.R @@ -10,30 +10,35 @@ #' case, file_name must be provided. #' @param file_name File name (without extension). Optional when object is provided #' (uses object's name). Required when object is missing and reading from cache. -#' @param path Directory path where the file should be saved/read. Defaults to /data. -#' If the path does not exist, the user will be prompted to create it (in -#' interactive sessions) or an error will be thrown (in non-interactive sessions). +#' Must not contain path separators or invalid filename characters. +#' @param path Directory path where the file should be saved/read. Defaults to +#' current directory ("."). If the path does not exist, the user will be prompted +#' to create it (in interactive sessions) or an error will be thrown (in +#' non-interactive sessions). #' @param read Logical or character. TRUE by default. #' - TRUE: Find and read the most recent cached version based on datestamp. #' - FALSE: Skip reading, always write a new cached file #' - Character: Read the specific file with this exact filename (including extension). #' Defaults to TRUE. +#' @param keep_n Integer. Maximum number of cached versions to keep. When writing +#' a new file, older versions beyond this limit are deleted. Defaults to 5. +#' Set to NULL or Inf to keep all versions. #' #' @return The object that was cached (either written or read) #' #' @examples #' \dontrun{ #' ## Note: datestamps in filenames are illustrative; user results will -#' ## vary depending on the the date at runtime +#' ## vary depending on the date at runtime #' #' # Regular data frames #' my_data <- tibble(x = 1:10, y = letters[1:10]) #' -#' # Cache with automatic naming and datestamp -#' cache_it(my_data) # Creates: my_data_2025_12_07.parquet +#' # Cache with automatic naming and datestamp (writes to current directory) +#' cache_it(my_data) # Creates: ./my_data_2025_12_07.parquet #' -#' # Cache with custom filename -#' cache_it(my_data, file_name = "custom_name") +#' # Cache with custom filename and path +#' cache_it(my_data, file_name = "custom_name", path = "data") #' #' # Read most recent cached version if exists, otherwise write #' cached_data <- cache_it(my_data, read = TRUE) @@ -56,9 +61,15 @@ #' # Read specific file when object doesn't exist #' old_data <- cache_it(read = "my_data_2025_12_01.parquet") #' +#' # Keep only the 3 most recent cached versions +#' cache_it(my_data, keep_n = 3) +#' +#' # Keep all cached versions (no cleanup) +#' cache_it(my_data, keep_n = NULL) +#' #' # SF objects (automatically uses sfarrow) #' my_sf <- sf::st_read(system.file("shape/nc.shp", package="sf")) -#' cache_it(my_sf) # Creates: my_sf_2025_12_07_sf.parquet +#' cache_it(my_sf) # Creates: ./my_sf_2025_12_07_sf.parquet #' #' # Read most recent sf cached file #' cached_sf <- cache_it(my_sf, read = TRUE) @@ -70,11 +81,12 @@ #' @export cache_it <- function(object, file_name = NULL, - path = "/data", - read = TRUE) { + path = ".", + read = TRUE, + keep_n = 5) { # Determine if object parameter was provided - object_provided <- !missing(object) + object_provided <- !missing(object) # Get the name to use for the file and check if we have an actual object value is_string_literal <- FALSE @@ -96,6 +108,12 @@ cache_it <- function(object, } } + # Validate file_name: no path separators or invalid filename characters + invalid_chars <- c("/", "\\", ":", "*", "?", "\"", "<", ">", "|") + if (any(stringr::str_detect(file_name, stringr::fixed(invalid_chars)))) { + stop("file_name contains invalid characters. Must not contain: / \\ : * ? \" < > |") + } + # Try to access the actual object value (if provided and not a string literal) has_object_value <- FALSE if (object_provided && !is_string_literal) { @@ -104,6 +122,7 @@ cache_it <- function(object, force(object) TRUE }, error = function(e) { + warning("Object '", file_name, "' could not be evaluated: ", conditionMessage(e)) FALSE }) } @@ -125,7 +144,7 @@ cache_it <- function(object, # Construct full file path full_path <- file.path(path, full_file_name) - # if the specified `path` does not exist, check with user about creating it + # If the specified `path` does not exist, check with user about creating it if (!dir.exists(path)) { if (interactive()) { create_dir <- readline(prompt = stringr::str_c("The specified `path` does not exist. Do you want to create a directory at ", path, "? Y/N: ")) @@ -139,11 +158,22 @@ cache_it <- function(object, } } + # Escape regex metacharacters in file_name for pattern matching + file_name_escaped <- stringr::str_replace_all( + file_name, + "([\\.\\^\\$\\*\\+\\?\\{\\}\\[\\]\\\\\\|\\(\\)])", + "\\\\\\1" + ) + + # Helper function to find cached files + find_cached_files <- function() { + pattern <- stringr::str_c("^", file_name_escaped, "_\\d{4}_\\d{2}_\\d{2}(_sf)?\\.parquet$") + list.files(path, pattern = pattern, full.names = TRUE) + } + # Handle reading based on read parameter if (isTRUE(read)) { - # Find the most recent cached version (both regular and sf files) - pattern <- stringr::str_c("^", file_name, "_\\d{4}_\\d{2}_\\d{2}(_sf)?\\.parquet$") - cached_files <- list.files(path, pattern = pattern, full.names = TRUE) + cached_files <- find_cached_files() if (length(cached_files) > 0) { # Extract dates from filenames and find the most recent @@ -159,8 +189,7 @@ cache_it <- function(object, # Check if file is an sf object based on filename file_is_sf <- stringr::str_detect(most_recent_file, "_sf\\.parquet$") - message(stringr::str_c("Reading most recent cached file: ", basename(most_recent_file), - " (dated ", most_recent_date, ")")) + message("Reading cached file: ", basename(most_recent_file), " (dated ", most_recent_date, ")") if (file_is_sf) { return(sfarrow::st_read_parquet(most_recent_file)) @@ -168,8 +197,7 @@ cache_it <- function(object, return(arrow::read_parquet(most_recent_file)) } } else { - message(stringr::str_c("No cached files found for '", file_name, - "'. Writing new file.")) + message("No cached files found for '", file_name, "'. Writing new file.") } } else if (is.character(read)) { @@ -180,7 +208,7 @@ cache_it <- function(object, # Check if file is an sf object based on filename file_is_sf <- stringr::str_detect(specific_path, "_sf\\.parquet$") - message(stringr::str_c("Reading specified cached file: ", read)) + message("Reading cached file: ", read) if (file_is_sf) { return(sfarrow::st_read_parquet(specific_path)) @@ -192,8 +220,7 @@ cache_it <- function(object, } } else if (isFALSE(read)) { - # Don't read, proceed to writing - message(stringr::str_c("Skipping read. Writing new cached file.")) + message("Writing new cached file.") } # Write object to parquet file @@ -203,10 +230,31 @@ cache_it <- function(object, if (is_sf) { sfarrow::st_write_parquet(obj = object, dsn = full_path) - message(stringr::str_c("Cached sf object to: ", basename(full_path))) } else { - arrow::write_parquet(object, full_path) - message(stringr::str_c("Cached object to: ", basename(full_path))) + arrow::write_parquet(object, full_path, compression = "snappy") + } + message("Cached to: ", basename(full_path)) + + # Clean up old versions if keep_n is set + if (!is.null(keep_n) && is.finite(keep_n) && keep_n > 0) { + cached_files <- find_cached_files() + + if (length(cached_files) > keep_n) { + file_dates <- cached_files |> + basename() |> + stringr::str_extract("\\d{4}_\\d{2}_\\d{2}") |> + stringr::str_replace_all("_", "-") |> + as.Date() + + # Sort by date (oldest first) and identify files to delete + date_order <- order(file_dates) + files_to_delete <- cached_files[date_order[seq_len(length(cached_files) - keep_n)]] + + for (f in files_to_delete) { + file.remove(f) + } + message("Removed ", length(files_to_delete), " old cached file(s) (keeping ", keep_n, " most recent).") + } } return(object) diff --git a/R/convert_table_text_to_dataframe.R b/R/convert_table_text_to_dataframe.R index e2fe994..0f4f7db 100644 --- a/R/convert_table_text_to_dataframe.R +++ b/R/convert_table_text_to_dataframe.R @@ -9,7 +9,13 @@ #' @param short_document Boolean; default is FALSE. If TRUE, it is assumed that the document is short enough that it can be processed in a single API call. If FALSE and the inputted `text` is a single item, the function throws an error. Note that multi-page documents should be broken into multi-item vectors/lists before being passed to `text`. #' @param required Boolean; default is FALSE. If TRUE, the LLM will be instructed to return values for all columns. If FALSE, `NULL` values are allowed. Generally, NULL values should be allowed unless you are certain that every value in the inputted text-table has a non-NULL value. #' -#' @return A list of dataframes, with each item corresponding to one page of the inputted text. The dataframes have the same column names and types as specified in `column_types`. Use `purrr::bind_rows()` to consolidate results into a single dataframe, if needed. +#' @return A list of tibbles, where each list element corresponds to one item (typically one page) in the input `text` vector/list. Each tibble contains: +#' \describe{ +#' \item{Structure}{Columns match the names and types defined in `column_types`. Each row represents one record extracted from the table text by the LLM.} +#' \item{NULL values}{When `required = FALSE` (default), columns may contain NULL/NA values if the LLM could not extract a value for that cell.} +#' \item{Empty dataframes}{If the LLM encounters an error processing a page, that list element will be an empty `data.frame()`.} +#' } +#' Use `purrr::list_rbind()` or `dplyr::bind_rows()` to consolidate results into a single dataframe. A warning is issued reminding users to review AI-generated results for accuracy. #' @export #' @examples #' \dontrun{ diff --git a/R/estimate_units_per_parcel.R b/R/estimate_units_per_parcel.R index 27885bd..b06f527 100644 --- a/R/estimate_units_per_parcel.R +++ b/R/estimate_units_per_parcel.R @@ -123,7 +123,19 @@ benchmark_units_to_census = function(data) { #' @param zoning A spatial (polygon) zoning dataset. #' @param acs Optionally, a non-spatial dataset, at the tract level, returned from `urbnindicators::compile_acs_data()`. #' -#' @returns The inputted parcels datasets with attributes describing estimated unit counts by unit type. +#' @return An `sf` object (point geometry, representing parcel centroids) containing the input parcel data augmented with estimated residential unit information. The returned object includes: +#' \describe{ +#' \item{parcel_id}{Character or numeric. The unique parcel identifier from the input data.} +#' \item{tract_geoid}{Character. The 11-digit Census tract GEOID containing the parcel centroid.} +#' \item{jurisdiction}{Character. The jurisdiction name associated with the parcel.} +#' \item{municipality_name}{Character. The municipality name associated with the parcel.} +#' \item{residential_unit_count}{Numeric. The estimated number of residential units on the parcel, benchmarked against ACS estimates at the tract level.} +#' \item{residential_unit_categories}{Factor (ordered). Categorical classification of unit counts: "0", "1", "2", "3-4", "5-9", "10-19", "20-49", "50+".} +#' \item{median_value_improvement_sf}{Numeric. Tract-level median improvement value for single-family parcels.} +#' \item{median_value_improvement_mh}{Numeric. Tract-level median improvement value for manufactured home parcels.} +#' \item{acs_units_*}{Numeric. ACS-reported housing unit counts by units-in-structure category for the tract.} +#' \item{zone, zoned_housing_type, far, setback_*, height_maximum, ...}{Various zoning attributes joined from the zoning dataset.} +#' } #' @export estimate_units_per_parcel = function( structures, diff --git a/R/get_emergency_managerment_performance.R b/R/get_emergency_managerment_performance.R index b176c82..d04ac52 100644 --- a/R/get_emergency_managerment_performance.R +++ b/R/get_emergency_managerment_performance.R @@ -3,7 +3,15 @@ #' @param file_path Path to the downloaded dataset on Box. #' @param api Logical indicating whether to use the OpenFEMA API to retrieve the data. Default is TRUE. #' -#' @return A data frame containing emergency management performance grant (EMPG) data. +#' @return A tibble containing Emergency Management Performance Grant (EMPG) data with the following columns: +#' \describe{ +#' \item{state_name}{Character. The name of the state receiving the grant (renamed from original "state" column).} +#' \item{year_project_start}{Numeric. The year the project started, with corrections applied for known data entry errors in the source data.} +#' \item{state_code}{Character. Two-digit FIPS state code.} +#' \item{state_abbreviation}{Character. Two-letter USPS state abbreviation.} +#' \item{...}{Additional columns from the OpenFEMA EMPG dataset, cleaned via `janitor::clean_names()`.} +#' } +#' Data are filtered to records with `year_project_start > 2012`. A warning is issued noting data completeness concerns for 2024-2025. #' @export get_emergency_management_performance = function( diff --git a/R/get_government_finances.R b/R/get_government_finances.R index e9d4f55..97007b2 100644 --- a/R/get_government_finances.R +++ b/R/get_government_finances.R @@ -2,7 +2,20 @@ #' #' @param year A four-digit year. The default is 2022. #' -#' @return A dataframe containing government unit-level expenses for the specified year. +#' @return A tibble containing government unit-level financial data aggregated by unit, with the following columns: +#' \describe{ +#' \item{unit_id}{Character. Unique identifier for the government unit.} +#' \item{year_data}{Numeric. The year of the financial data.} +#' \item{amount_thousands}{Numeric. Total expenditure amount in thousands of dollars.} +#' \item{government_type}{Character. Type of government unit: "State", "County", "City", "Township", "Special District", or "School District/Educational Service Agency".} +#' \item{data_quality}{Numeric. Proportion of records that were reported (vs. imputed or from alternative sources), ranging from 0 to 1.} +#' \item{unit_name}{Character. Name of the government unit.} +#' \item{county_name}{Character. County name where the unit is located.} +#' \item{state_code}{Character. Two-digit state FIPS code.} +#' \item{population}{Numeric. Population served by the government unit.} +#' \item{enrollment}{Numeric. Student enrollment (for school districts; NA for other unit types).} +#' \item{amount_per_capita}{Numeric. Expenditure per capita (or per enrolled student for school districts).} +#' } #' @export get_government_finances = function(year = 2022) { diff --git a/R/get_ihp_registrations.R b/R/get_ihp_registrations.R index a33114f..f6d499f 100644 --- a/R/get_ihp_registrations.R +++ b/R/get_ihp_registrations.R @@ -7,7 +7,19 @@ #' @param api If TRUE, query the API. If FALSE (default), read from disk. #' @param outpath The path to save the parquet-formatted datafile. Applicable only when `api = FALSE`. #' -#' @returns A dataframe comprising IHP registrations +#' @return A tibble containing Individual and Households Program (IHP) registration data at the household level, joined to county-level geography. Due to ZIP-to-county crosswalking, records may be duplicated across counties (see warning). The returned object includes: +#' \describe{ +#' \item{unique_id}{Character. A UUID uniquely identifying each original IHP registration.} +#' \item{allocation_factor_zcta_to_county}{Numeric. The proportion of the ZCTA's population in this county (0-1). Used to apportion registrations when a ZIP spans multiple counties.} +#' \item{geoid_county}{Character. Five-digit FIPS county code.} +#' \item{zcta_code}{Character. Five-digit ZCTA (ZIP Code Tabulation Area) code.} +#' \item{geoid_tract}{Character. 11-digit Census tract GEOID (may have missingness).} +#' \item{geoid_block_group}{Character. 12-digit Census block group GEOID (may have missingness).} +#' \item{disaster_number}{Character. FEMA disaster number associated with the registration.} +#' \item{amount_individual_housing_program, amount_housing_assistance, amount_other_needs_assistance, amount_rental_assistance, amount_repairs, amount_replacement, amount_personal_property}{Numeric. Various IHP assistance amounts in dollars.} +#' \item{amount_flood_insurance_premium_paid_by_fema}{Numeric. Flood insurance premium paid by FEMA in dollars.} +#' \item{state_name, state_abbreviation, state_code}{Character. State identifiers.} +#' } #' @export #' #' @examples diff --git a/R/get_lodes.R b/R/get_lodes.R index 75373c1..4c23caa 100644 --- a/R/get_lodes.R +++ b/R/get_lodes.R @@ -157,8 +157,6 @@ rename_lodes_variables = function(.df) { #' \item{jobs_firm_age}{number of employees by the age of employing firm; only available in 'wac' datasets} #' \item{jobs_firm_size}{number of employees for a given range in employer size; only available in 'wac' datasets} #' } -#' -#' #' @export get_lodes = function( lodes_type, diff --git a/R/get_preliminary_damage_assessments.R b/R/get_preliminary_damage_assessments.R index 4a12f51..fefd94e 100644 --- a/R/get_preliminary_damage_assessments.R +++ b/R/get_preliminary_damage_assessments.R @@ -258,7 +258,24 @@ extract_pda_attributes = function(path) { #' @param directory_path The path to the directory where PDA PDFs are stored. Use `scrape_pda_pdfs` to generate these files. #' @param use_cache Boolean. Read the existing dataset stored at `file_path`? If FALSE, data will be generated anew. Else, if a file exists at `file_path`, this file will be returned. #' -#' @return A dataframe of preliminary damage assessment reports. +#' @return A tibble containing parsed data from FEMA Preliminary Damage Assessment (PDA) PDF reports. Each row represents one disaster declaration request. The returned object includes: +#' \describe{ +#' \item{disaster_number}{Character. Four-digit FEMA disaster number.} +#' \item{event_type}{Character. Declaration outcome: "approved", "denial", "appeal_approved", or "appeal_denial".} +#' \item{event_title}{Character. Title/description of the disaster event from the PDA.} +#' \item{event_date_determined}{Date. Date the declaration determination was made.} +#' \item{event_native_flag}{Integer. 1 if this is a tribal declaration, 0 otherwise.} +#' \item{ia_requested}{Integer. 1 if Individual Assistance was requested, 0 otherwise.} +#' \item{ia_residences_impacted, ia_residences_destroyed, ia_residences_major_damage, ia_residences_minor_damage, ia_residences_affected}{Numeric. Counts of affected residences by damage category.} +#' \item{ia_residences_insured_total_percent, ia_residences_insured_flood_percent}{Numeric. Insurance coverage percentages.} +#' \item{ia_cost_estimate_total}{Numeric. Total estimated Individual Assistance cost.} +#' \item{pa_requested}{Integer. 1 if Public Assistance was requested, 0 otherwise.} +#' \item{pa_cost_estimate_total}{Numeric. Total estimated Public Assistance cost.} +#' \item{pa_per_capita_impact_statewide, pa_per_capita_impact_countywide_max, pa_per_capita_impact_countywide_min}{Numeric. Per capita impact metrics.} +#' \item{pa_per_capita_impact_indicator_statewide, pa_per_capita_impact_indicator_countywide}{Character. Per capita impact indicator values.} +#' \item{text}{Character. Full extracted text from the PDA for reference.} +#' } +#' Note: Due to the unstructured nature of PDF source documents, some extracted values may be inaccurate and should be verified. #' @export #' #' @examples diff --git a/R/get_sba_loans.R b/R/get_sba_loans.R index a6e470c..37b2210 100644 --- a/R/get_sba_loans.R +++ b/R/get_sba_loans.R @@ -4,7 +4,20 @@ #' @title Access SBA data on disaster loans -#' @returns A dataframe comprising city- and zip-level data on SBA loanmaking +#' @return A tibble containing SBA disaster loan data at the city/zip level, combining both home and business loan records. The returned object includes: +#' \describe{ +#' \item{disaster_number_fema}{Character. The FEMA disaster number associated with the loan.} +#' \item{disaster_number_sba_physical}{Character. SBA physical disaster declaration number.} +#' \item{disaster_number_sba_eidl}{Character. SBA Economic Injury Disaster Loan (EIDL) declaration number.} +#' \item{damaged_property_zip_code}{Character. ZIP code of the damaged property.} +#' \item{damaged_property_city_name}{Character. City name of the damaged property.} +#' \item{damaged_property_state_code}{Character. State code of the damaged property.} +#' \item{verified_loss_total}{Numeric. Total verified loss amount in dollars.} +#' \item{approved_amount_total}{Numeric. Total approved loan amount in dollars.} +#' \item{approved_amount_real_estate}{Numeric. Approved loan amount for real estate in dollars.} +#' \item{fiscal_year}{Character. Fiscal year of the loan (format: "20XX").} +#' \item{loan_type}{Character. Either "business" or "residential" indicating the loan category.} +#' } #' @export #' #' @examples diff --git a/R/get_sheldus.R b/R/get_sheldus.R index 7cc1321..a0d5644 100644 --- a/R/get_sheldus.R +++ b/R/get_sheldus.R @@ -1,7 +1,22 @@ #' @title Access temporal county-level SHELDUS hazard damage data. #' @param file_path The path to the raw SHELDUS data. #' -#' @returns A dataframe comprising hazard x month x year x county observations of hazard events. +#' @return A tibble containing SHELDUS (Spatial Hazard Events and Losses Database for the United States) data at the county-year-month-hazard level. The returned object includes: +#' \describe{ +#' \item{unique_id}{Character. A UUID uniquely identifying each observation.} +#' \item{GEOID}{Character. Five-digit FIPS county code. Connecticut counties are crosswalked to 2022 planning regions using population-weighted allocation factors.} +#' \item{state_name}{Character. State name (sentence case).} +#' \item{county_name}{Character. County name.} +#' \item{year}{Numeric. Year of the hazard event.} +#' \item{month}{Numeric. Month of the hazard event (1-12).} +#' \item{hazard}{Character. Type of hazard event.} +#' \item{damage_property}{Numeric. Property damage in 2023 inflation-adjusted dollars.} +#' \item{damage_crop}{Numeric. Crop damage in 2023 inflation-adjusted dollars.} +#' \item{injuries}{Numeric. Number of injuries.} +#' \item{fatalities}{Numeric. Number of fatalities.} +#' \item{records}{Numeric. Number of individual event records aggregated into this observation.} +#' } +#' Note: Only counties that existed in either 2010 or 2022 are included. #' @export #' #' @examples diff --git a/R/get_structures.R b/R/get_structures.R index 9bc7e32..d50a647 100644 --- a/R/get_structures.R +++ b/R/get_structures.R @@ -5,7 +5,21 @@ #' @param boundaries A POLYGON or MULTIPOLYGON object, or an sf::st_bbox()-style bbox. #' @param keep_structures Logical. If TRUE, the raw structure data will be returned alongside the summarized data. -#' @returns A dataframe comprising estimated counts of each structure type, at the specified `geography`, for all such geographic units intersecting the `boundaries` object. If keep_structure = TRUE, returns a list with two elements: the summarized data and the raw structure data. +#' @return Depends on the `keep_structures` parameter: +#' +#' **When `keep_structures = FALSE` (default):** A tibble containing structure counts aggregated by geography and occupancy type, with columns: +#' \describe{ +#' \item{GEOID}{Character. Census geography identifier (county FIPS or tract GEOID depending on `geography` parameter).} +#' \item{primary_occupancy}{Character. The primary occupancy classification of the structures (e.g., "Single Family Dwelling", "Multi - Family Dwelling").} +#' \item{occupancy_class}{Character. Broad occupancy classification (e.g., "Residential", "Commercial").} +#' \item{count}{Integer. Number of structures of this occupancy type in the geography.} +#' } +#' +#' **When `keep_structures = TRUE`:** A named list with two elements: +#' \describe{ +#' \item{structures_summarized}{The aggregated tibble described above.} +#' \item{structures_raw}{An `sf` object (POINT geometry) containing individual structure records with columns: `unique_id` (building ID), `occupancy_class`, `primary_occupancy`, `county_fips`, and geometry.} +#' } #' @export #' @examples diff --git a/R/qualtrics_analysis.R b/R/qualtrics_analysis.R index 1a5447e..b263e5d 100644 --- a/R/qualtrics_analysis.R +++ b/R/qualtrics_analysis.R @@ -8,7 +8,15 @@ #' @param sections A named vector specifying the last question number in each survey section #' @param text_replace A named character vector of regex patterns to replace in the metadata #' -#' @return A dataframe of formatted metadata +#' @return A tibble containing formatted Qualtrics survey metadata with the following columns: +#' \describe{ +#' \item{question_number}{Integer. The sequential position of the question in the survey (1-indexed).} +#' \item{question_name}{Character. The internal Qualtrics question identifier (e.g., "Q1", "Q2_1").} +#' \item{text_main}{Character. The primary question text, with any patterns specified in `text_replace` substituted.} +#' \item{text_sub}{Character. The sub-question or response option text, with any patterns specified in `text_replace` substituted.} +#' \item{survey_section}{Character. The name of the survey section to which the question belongs, as defined by the `sections` parameter. Filled upward from section boundaries. +#' } +#' } #' @export qualtrics_format_metadata = function(metadata, sections = c(), text_replace = "zzzzz") { @@ -38,7 +46,7 @@ qualtrics_format_metadata = function(metadata, sections = c(), text_replace = "z #' @param survey_section A regex pattern to match the survey section(s) #' @param return_values The name of the column (character) to be returned #' -#' @return A character vector of the requested metadata +#' @return A character vector containing the values from the column specified by `return_values` (default: "text_sub"), filtered to rows matching either the `question_name` or `survey_section` pattern. The length of the vector corresponds to the number of matching rows in the metadata. Returns an empty character vector if no matches are found. #' @export qualtrics_get_metadata = function(metadata, question_name = NULL, survey_section = NULL, return_values = "text_sub") { @@ -69,7 +77,14 @@ qualtrics_get_metadata = function(metadata, question_name = NULL, survey_section #' @param text_replace A named character vector of regex patterns to replace in the response text #' @param omit_other Logical; whether to omit the "Other" response option. Default is TRUE. #' -#' @return A ggplot object +#' @return A `ggplot2` object representing a visualization of survey responses. The plot type varies based on `question_type`: +#' \describe{ +#' \item{For "continuous"}{A boxplot showing the distribution of numeric responses, with question sub-text on the y-axis and values on the x-axis. Multiple sub-questions are displayed as separate boxplots. +#' } +#' \item{For "checkbox_single" or "checkbox_multi"}{A horizontal bar chart showing response counts. Response options are ordered by total count (descending). For "checkbox_multi", bars are stacked by response type.} +#' \item{For "checkbox_factor"}{A stacked horizontal bar chart showing response counts by factor level, with response options ordered by total count.} +#' } +#' The plot uses Urban Institute theming via `urbnthemes::theme_urbn_print()` and includes the specified `title` and auto-generated or custom `subtitle`. #' @export qualtrics_plot_question = function( df, @@ -213,7 +228,13 @@ qualtrics_plot_question = function( #' @param predicate_question Optional. The name of a single column that controls whether columns selected with `question_code_include` #' @param predicate_question_negative_value If `predicate_question` is specified, provide the value that indicates a negative response to the predicate question. For responses where the predicate question has this value, this value will be imputed to the specified columns #' -#' @return The inputted `df` object with missing/non-missing values applied to specified columns +#' @return A tibble containing only the columns selected by `question_code_include` (excluding those matching `question_code_omit`), with missing values handled according to the following logic: +#' \describe{ +#' \item{Without predicate_question}{If all selected columns are NA for a row, values remain NA. If any selected column has a non-NA value, NA values in other selected columns are replaced with the appropriate default value from `default_values` based on column type.} +#' \item{With predicate_question}{If the predicate question is NA, all selected columns are set to NA. If the predicate question equals `predicate_question_negative_value +#' `, all selected columns are set to the appropriate default value. Otherwise, original values are preserved.} +#' } +#' Column types and their default value mappings: character uses `default_values[[1]]`, numeric uses `default_values[[2]]`, and Date/POSIXct uses `default_values[[3]]`. #' @export qualtrics_define_missing = function( df, diff --git a/R/read_ipums_cached.R b/R/read_ipums_cached.R index d0fdc6d..6d7494b 100644 --- a/R/read_ipums_cached.R +++ b/R/read_ipums_cached.R @@ -20,7 +20,12 @@ #' @param extract_definition A `define_extract_micro()` or `define_extract_agg()` object. #' @param refresh If true, execute the API query, even if data are already stored locally. Defaults to FALSE. #' -#' @return A dataframe corresponding to the supplied `extract_definition` +#' @return A tibble containing IPUMS data corresponding to the supplied `extract_definition`. The structure varies by collection type: +#' \describe{ +#' \item{For microdata collections (e.g., "usa", "cps")}{Returns individual-level records with columns corresponding to the variables specified in the extract definition. Column names and types are determined by IPUMS variable specifications. The data are read via `ipumsr::read_ipums_micro()`.} +#' \item{For aggregate collections ("nhgis", "ihgis")}{Returns aggregate data (typically at geographic summary levels) with columns corresponding to the requested tables/variables. IPUMS variable attributes are applied via the collection's codebook. The data are read via `ipumsr::read_ipums_agg()`.} +#' } +#' If a cached file exists at the specified path and `refresh = FALSE`, the cached data are returned with a warning. Otherwise, the extract is submitted to IPUMS, downloaded, and cached for future use. #' @export #' #' @examples diff --git a/R/spatial_analysis.R b/R/spatial_analysis.R index 967c883..ab84cdb 100644 --- a/R/spatial_analysis.R +++ b/R/spatial_analysis.R @@ -7,7 +7,13 @@ #' @param max_length The maximum length of each segment. Segments longer than this value will be subdivided; those that are below this threshold will be returned as-is. #' @param crs The coordinate reference system to which the linestring should be transformed. Default is 5070. #' -#' @return A spatial dataframe comprising linestrings below the `max_length` threshold, linked back to their input linestrings via a `line_id` attribute +#' @return An `sf` object (simple feature collection) with geometry type LINESTRING. The returned object contains: +#' \describe{ +#' \item{row_id}{Integer. The row index from the original input linestring, allowing linkage back to the input data.} +#' \item{...}{All original attributes from the input `line` object are preserved and joined back via `row_id`.} +#' \item{geometry}{LINESTRING geometry. Each segment is at most `max_length` units long (in the CRS units). Segments shorter than `max_length` in the input are returned unchanged.} +#' } +#' The CRS of the output is set to the value specified by the `crs` parameter (default: EPSG:5070). #' @export subdivide_linestring = function(line, max_length, crs = 5070) { @@ -72,7 +78,14 @@ subdivide_linestring = function(line, max_length, crs = 5070) { #' #' @param .sf The spatial dataframe containing one or more polygons #' -#' @return A simple feature collection of linestrings derived from the inputted polygons; all attributes are retained, and two new attributes--`polygon_id` and `line_id`--are prepended to the output +#' @return An `sf` object (simple feature collection) with geometry type LINESTRING. The returned object contains: +#' \describe{ +#' \item{polygon_id}{Integer. The row index of the originating polygon from the input `.sf` object, enabling linkage back to the source polygon.} +#' \item{line_id}{Integer. A sequential identifier for each line segment within its originating polygon. Line segments are ordered according to the vertex sequence of the polygon boundary.} +#' \item{...}{All original attributes from the input `.sf` object are preserved and joined back via `polygon_id`.} +#' \item{geometry}{LINESTRING geometry. Each line segment represents one edge of the original polygon boundary.} +#' } +#' The CRS of the output matches the input `.sf` object (transformed to EPSG:5070 during processing). #' @export polygons_to_linestring = function(.sf) { sf1 = .sf %>% diff --git a/R/utilities.R b/R/utilities.R index a5d00c8..ea00ea2 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -1,6 +1,7 @@ #' @title Get the user's username #' -#' @return The username of the user running the script +#' @return A character string containing the system username. Uses `Sys.info()["user"]` +#' which works reliably across Windows, Mac, and Linux. #' @export #' #' @examples @@ -8,14 +9,20 @@ #' get_system_username() #' } get_system_username = function() { - here::here() |> - stringr::str_match("Users/.*?/") |> - stringr::str_remove_all("Users|/") + username <- Sys.info()[["user"]] + if (is.null(username) || username == "") { + stop("Could not determine system username from Sys.info()") + } + + username } #' @title Get the path to the C&C Box folder #' -#' @return The filepath to the C&C Box folder +#' @return A character string containing the full file path to the Climate and Communities (C&C) Box folder. +#' On Windows, returns "C:/Users/{username}/Box/METRO Climate and Communities Practice Area/github-repository". +#' On Mac, checks for Box at "/Users/{username}/Box" or "/Users/{username}/Library/CloudStorage/Box-Box", +#' using whichever exists. Throws an error if the Box folder cannot be found. #' @export #' #' @examples @@ -23,10 +30,42 @@ get_system_username = function() { #' get_box_path() #' } get_box_path = function() { - username = get_system_username() - file.path( - "C:", "Users", username, "Box", "METRO Climate and Communities Practice Area", - "github-repository") + username <- get_system_username() + os <- Sys.info()[["sysname"]] + box_subfolder <- file.path("METRO Climate and Communities Practice Area", "github-repository") + + if (os == "Windows") { + box_root <- file.path("C:", "Users", username, "Box") + } else if (os == "Darwin") { + # Mac: Check common Box locations + box_locations <- c( + file.path("/Users", username, "Box"), + file.path("/Users", username, "Library", "CloudStorage", "Box-Box") + ) + box_root <- NULL + for (loc in box_locations) { + if (dir.exists(loc)) { + box_root <- loc + break + } + } + if (is.null(box_root)) { + stop( + "Could not find Box folder. Checked:\n", + paste(" -", box_locations, collapse = "\n") + ) + } + } else { + stop("Unsupported operating system: ", os, ". Only Windows and Mac are supported.") + } + + box_path <- file.path(box_root, box_subfolder) + + if (!dir.exists(box_path)) { + warning("Box path does not exist: ", box_path) + } + + box_path } @@ -34,7 +73,7 @@ get_box_path = function() { #' #' @param dataset The name of the dataset. One of c('nfip_policies', 'ihp_registrations'). #' -#' @return A vector of raw column names to be selected from the specified dataset +#' @return A character vector containing the raw column names (in camelCase format as they appear in the source data) to be selected when reading the specified dataset. The columns returned are curated subsets of the full dataset columns, excluding administrative/metadata fields. For "nfip_policies": 20 columns including location, policy details, and building characteristics. For "ihp_registrations": ~20 columns including disaster info, geographic identifiers, and assistance amounts. get_dataset_columns = function(dataset) { if (length(dataset) > 1 | !is.character(dataset)) { @@ -118,7 +157,7 @@ get_dataset_columns = function(dataset) { #' @param subsetted_columns The columns to include in the outputted parquet data. #' @param dataset NULL by default. Alternately, one of c("nfip_policies", "ihp_registrations"). If not null, this will be used to select the columns that are returned. #' -#' @returns Nothing. Parquet data are written to local path. +#' @return NULL (invisibly). This function is called for its side effect of writing a parquet file to disk at the specified `outpath` (or a path derived from `inpath` with a .parquet extension). The function reads the input file in chunks to handle large files efficiently, optionally subsets to specified columns, and writes the result in Apache Parquet format using `arrow::write_parquet()`. convert_delimited_to_parquet = function( inpath, outpath = NULL, @@ -170,7 +209,12 @@ convert_delimited_to_parquet = function( #' @param return_geometry Logical. Include the geometries of returned geographies? #' @param projection The EPSG code of the desired projection. Default is 5070 (Albers Equal Area). #' -#' @returns A dataframe (optionally, an sf-dataframe) comprising Census geographies +#' @return A tibble (or `sf` object if `return_geometry = TRUE`) containing Census geographies that overlap with the input spatial data. The structure depends on the geographic extent: +#' \describe{ +#' \item{When multiple states overlap}{Returns state-level data with columns: `state_geoid` (2-digit FIPS), `geography` ("state").} +#' \item{When a single state overlaps}{Returns tract-level data with columns: `state_geoid` (2-digit FIPS), `county_geoid` (5-digit FIPS), `geography` ("tract").} +#' } +#' If `return_geometry = TRUE`, the geometry column is retained; otherwise it is dropped. #' @export get_spatial_extent_census = function(data, return_geometry = FALSE, projection = 5070) { warning("This leverages `sf::st_overlaps()` and does not provide the desired results consistently.") @@ -226,7 +270,8 @@ get_spatial_extent_census = function(data, return_geometry = FALSE, projection = #' @param file_names Optionally, a character vector of the same length as `urls` containing only the file names (not the full paths) with which the downloaded files should be named. If NULL (default), file names are extracted from `urls`. #' @param silent If TRUE (default), files are saved silently. If FALSE, downloaded files are read and returned as a list. #' -#' @returns Either nothing (silent == TRUE) or a list of dataframes from the specified URLs. +#' @return When `silent = TRUE` (default): Returns NULL invisibly. Files are downloaded and saved to `directory`. +#' When `silent = FALSE`: Returns a list of data frames, one per URL, containing the contents of each downloaded .xlsx file as read by `openxlsx::read.xlsx()`. List elements are in the same order as the input `urls`. #' @export read_xlsx_from_url = function(urls, directory, file_names = NULL, silent = TRUE) { @@ -264,7 +309,12 @@ read_xlsx_from_url = function(urls, directory, file_names = NULL, silent = TRUE) #' @param geography_type One of c("state", "county"). #' @param year The year for which to obtain state/county metadata. Cannot be greater than the most recent year supported by `library(tidycensus)` for the 5-year ACS. #' -#' @return A data frame containing metadata about the specified geography type and area. +#' @return A tibble containing geographic metadata. The structure varies by `geography_type`: +#' \describe{ +#' \item{For "county"}{Returns county-level data with columns: `state_code` (2-digit FIPS), `state_name`, `state_abbreviation` (2-letter USPS), `state_population`, `county_code` (5-digit FIPS), `county_name`, `county_population`.} +#' \item{For "state"}{Returns state-level data with columns: `state_abbreviation`, `state_code`, `state_name` (one row per state, no county information).} +#' } +#' Population data are sourced from the ACS 5-year estimates for the specified `year`. get_geography_metadata = function( geography_type = c("state", "county"), @@ -338,7 +388,7 @@ date_string_to_date = function(date_string) { #' @param base_year The year to use as the base for inflation adjustment. If NULL, defaults to the most recent year in the PCE index data. #' @param names_suffix A suffix to add to the names of the inflation-adjusted variables. If NULL, defaults to "_". If "", columns are renamed in place. #' -#' @return A dataframe with inflation-adjusted values +#' @return A tibble identical to the input `df` with additional inflation-adjusted columns. For each column specified in `dollar_variables`, a new column is created with the same name plus `names_suffix` (default: "_{base_year}"). The adjusted values are calculated by multiplying original values by an inflation factor derived from the PCE Price Index ratio between the base year and each observation's year. Original columns are preserved unchanged. #' @export #' #' @examples diff --git a/tests/testthat/test-cache_it.R b/tests/testthat/test-cache_it.R new file mode 100644 index 0000000..3d36c34 --- /dev/null +++ b/tests/testthat/test-cache_it.R @@ -0,0 +1,98 @@ +# Tests for cache_it.R + +test_that("cache_it validates input parameters", { + # Test that either object or file_name must be provided + expect_error( + cache_it(file_name = NULL, path = tempdir(), read = FALSE), + "Either 'object' or 'file_name' must be provided" + ) +}) + +test_that("cache_it handles path validation", { + test_data <- tibble::tibble(x = 1:5, y = letters[1:5]) + + # Test with non-existent path (non-interactive should error) + expect_error( + cache_it(test_data, path = "/nonexistent/path/12345", read = FALSE) + ) +}) + +test_that("cache_it creates correct filename format", { + # Create temporary directory for testing + temp_dir <- tempdir() + + # Create test data + test_data <- tibble::tibble(x = 1:5, y = letters[1:5]) + + # Cache the data + result <- cache_it(test_data, file_name = "test_cache", path = temp_dir, read = FALSE) + + # Check that file was created with correct naming pattern + files <- list.files(temp_dir, pattern = "^test_cache_\\d{4}_\\d{2}_\\d{2}\\.parquet$") + expect_true(length(files) >= 1) + + # Clean up + unlink(file.path(temp_dir, files)) +}) + +test_that("cache_it returns the cached object", { + temp_dir <- tempdir() + test_data <- tibble::tibble(x = 1:5, y = letters[1:5]) + + result <- cache_it(test_data, file_name = "test_return", path = temp_dir, read = FALSE) + + # Result should be the same as input + expect_equal(result$x, test_data$x) + expect_equal(result$y, test_data$y) + + # Clean up + files <- list.files(temp_dir, pattern = "^test_return") + unlink(file.path(temp_dir, files)) +}) + +test_that("cache_it read parameter works correctly", { + temp_dir <- tempdir() + test_data <- tibble::tibble(x = 1:5, y = letters[1:5]) + + # First, write the data + cache_it(test_data, file_name = "test_read", path = temp_dir, read = FALSE) + + # Then read it back (should find the cached file) + result <- cache_it(file_name = "test_read", path = temp_dir, read = TRUE) + + expect_equal(result$x, test_data$x) + expect_equal(result$y, test_data$y) + + # Clean up + files <- list.files(temp_dir, pattern = "^test_read") + unlink(file.path(temp_dir, files)) +}) + +test_that("cache_it handles specific file read", { + temp_dir <- tempdir() + test_data <- tibble::tibble(x = 1:5, y = letters[1:5]) + + # Write the data first + result1 <- cache_it(test_data, file_name = "test_specific", path = temp_dir, read = FALSE) + + # Get the created filename + files <- list.files(temp_dir, pattern = "^test_specific_\\d{4}_\\d{2}_\\d{2}\\.parquet$") + expect_true(length(files) >= 1) + + # Read specific file + result2 <- cache_it(file_name = "test_specific", path = temp_dir, read = files[1]) + + expect_equal(result2$x, test_data$x) + + # Clean up + unlink(file.path(temp_dir, files)) +}) + +test_that("cache_it errors when reading non-existent specific file", { + temp_dir <- tempdir() + + expect_error( + cache_it(file_name = "test", path = temp_dir, read = "nonexistent_file.parquet"), + "does not exist" + ) +}) diff --git a/tests/testthat/test-convert_table_text_to_dataframe.R b/tests/testthat/test-convert_table_text_to_dataframe.R new file mode 100644 index 0000000..0202b3e --- /dev/null +++ b/tests/testthat/test-convert_table_text_to_dataframe.R @@ -0,0 +1,60 @@ +# Tests for convert_table_text_to_dataframe.R + +test_that("convert_table_text_to_dataframe validates llm_company_name", { + expect_error( + convert_table_text_to_dataframe( + text = "test", + column_types = NULL, + llm_company_name = "invalid", + read_warning = TRUE, + short_document = TRUE + ), + "Only `openai` and `anthropic`" + ) +}) + +test_that("convert_table_text_to_dataframe requires read_warning acknowledgment", { + expect_error( + convert_table_text_to_dataframe( + text = "test", + column_types = NULL, + read_warning = FALSE + ), + "Read the function documentation" + ) +}) + +test_that("convert_table_text_to_dataframe validates text length", { + # Single-item text without short_document=TRUE should error + expect_error( + convert_table_text_to_dataframe( + text = "single page text", + column_types = NULL, + read_warning = TRUE, + short_document = FALSE + ), + "length of 1" + ) +}) + +test_that("convert_table_text_to_dataframe function signature is correct", { + expect_true(is.function(convert_table_text_to_dataframe)) + + # Check parameter names exist + params <- names(formals(convert_table_text_to_dataframe)) + expect_true("text" %in% params) + expect_true("column_types" %in% params) + expect_true("llm_company_name" %in% params) + expect_true("preprocess" %in% params) + expect_true("read_warning" %in% params) + expect_true("short_document" %in% params) + expect_true("required" %in% params) + + # Check defaults + f <- convert_table_text_to_dataframe + expect_equal(formals(f)$llm_company_name, "openai") + expect_true(formals(f)$preprocess) + expect_false(formals(f)$read_warning) + expect_false(formals(f)$short_document) + expect_false(formals(f)$required) +}) diff --git a/tests/testthat/test-estimate_units_per_parcel.R b/tests/testthat/test-estimate_units_per_parcel.R new file mode 100644 index 0000000..9e86a79 --- /dev/null +++ b/tests/testthat/test-estimate_units_per_parcel.R @@ -0,0 +1,22 @@ +# Tests for estimate_units_per_parcel.R + +test_that("estimate_units_per_parcel function signature is correct", { + expect_true(is.function(estimate_units_per_parcel)) + + # Check parameter names + params <- names(formals(estimate_units_per_parcel)) + expect_true("structures" %in% params) + expect_true("parcels" %in% params) + expect_true("zoning" %in% params) + expect_true("acs" %in% params) + + # Check default for acs + f <- estimate_units_per_parcel + expect_null(formals(f)$acs) +}) + +test_that("estimate_units_per_parcel requires specific input datasets", { + # This is a complex function requiring specific data structures + # The function will error without properly structured inputs + expect_true(is.function(estimate_units_per_parcel)) +}) diff --git a/tests/testthat/test-estimate_zoning_envelope.R b/tests/testthat/test-estimate_zoning_envelope.R new file mode 100644 index 0000000..65adcf0 --- /dev/null +++ b/tests/testthat/test-estimate_zoning_envelope.R @@ -0,0 +1,116 @@ +# Tests for estimate_zoning_envelope.R + +test_that("estimate_zoning_envelope validates required columns", { + # Create test data missing required columns + incomplete_df <- tibble::tibble( + parcel_id = 1:3, + parcel_area_sqft = c(1000, 2000, 3000) + ) + + expect_error( + estimate_zoning_envelope(incomplete_df), + "must contain the following columns" + ) +}) + +test_that("estimate_zoning_envelope validates parcel_dimensions consistency", { + # If one parcel dimension is provided, both must be provided + partial_df <- tibble::tibble( + parcel_id = 1:3, + parcel_area_sqft = c(1000, 2000, 3000), + setback_front = 10, + setback_rear = 10, + setback_side = 10, + parcel_area_sqft_minimum = 1000, + units_per_parcel_maximum = 10, + units_per_acre_maximum = NA, + parcel_coverage_percent_maximum = 70, + parcel_coverage_percent_maximum_building = 70, + open_space_ratio_minimum = 0.2, + floor_area_ratio_maximum = 2, + height_stories_maximum = 3, + height_feet_maximum = NA, + parking_stalls_per_parcel_minimum = 1, + parking_stalls_per_unit_minimum = 2, + parcel_depth = c(50, 60, 70) # Only depth provided, not width + ) + + expect_error( + estimate_zoning_envelope(partial_df), + "either both or neither" + ) +}) + +test_that("estimate_zoning_envelope function signature is correct", { + expect_true(is.function(estimate_zoning_envelope)) + + # Check parameter defaults + f <- estimate_zoning_envelope + expect_equal(formals(f)$development_size_maximum, 300) + expect_equal(formals(f)$standard_unit_sqft_multifamily, 1000) + expect_equal(formals(f)$standard_parking_stall_sqft, 325) + expect_equal(formals(f)$parking_model, "singlestory") +}) + +test_that("estimate_zoning_envelope returns expected structure with valid input", { + # Create valid test data + valid_df <- tibble::tibble( + parcel_id = 1:3, + parcel_area_sqft = c(5000, 10000, 15000), + setback_front = 10, + setback_rear = 10, + setback_side = 5, + parcel_area_sqft_minimum = 1000, + units_per_parcel_maximum = 10, + units_per_acre_maximum = NA, + parcel_coverage_percent_maximum = 70, + parcel_coverage_percent_maximum_building = 70, + open_space_ratio_minimum = 0.2, + floor_area_ratio_maximum = 2, + height_stories_maximum = 3, + height_feet_maximum = NA, + parking_stalls_per_parcel_minimum = 1, + parking_stalls_per_unit_minimum = 1 + ) + + result <- estimate_zoning_envelope(valid_df) + + # Check that result is a data frame + expect_true(is.data.frame(result)) + + # Check that maximum_development_capacity_zoned column exists + expect_true("maximum_development_capacity_zoned" %in% names(result)) + + # Check that all input rows are preserved + expect_equal(nrow(result), 3) + + # Check that capacity values are non-negative + expect_true(all(result$maximum_development_capacity_zoned >= 0)) +}) + +test_that("estimate_zoning_envelope respects parking_model parameter", { + valid_df <- tibble::tibble( + parcel_id = 1, + parcel_area_sqft = 10000, + setback_front = 10, + setback_rear = 10, + setback_side = 5, + parcel_area_sqft_minimum = 1000, + units_per_parcel_maximum = 50, + units_per_acre_maximum = NA, + parcel_coverage_percent_maximum = 70, + parcel_coverage_percent_maximum_building = 70, + open_space_ratio_minimum = 0.2, + floor_area_ratio_maximum = 5, + height_stories_maximum = 5, + height_feet_maximum = NA, + parking_stalls_per_parcel_minimum = 1, + parking_stalls_per_unit_minimum = 2 + ) + + result_single <- estimate_zoning_envelope(valid_df, parking_model = "singlestory") + result_multi <- estimate_zoning_envelope(valid_df, parking_model = "multistory") + + # Multistory parking should allow more units (parking distributed vertically) + expect_true(result_multi$maximum_development_capacity_zoned >= result_single$maximum_development_capacity_zoned) +}) diff --git a/tests/testthat/test-get_business_patterns.R b/tests/testthat/test-get_business_patterns.R index 544d90c..586a5e7 100644 --- a/tests/testthat/test-get_business_patterns.R +++ b/tests/testthat/test-get_business_patterns.R @@ -1,3 +1,4 @@ + testthat::test_that("naics_code_digits errors clearly when not in c(2,3)", { testthat::expect_error({get_business_patterns(year = 2022, naics_code_digits = 4)}) }) diff --git a/tests/testthat/test-get_current_fire_perimeters.R b/tests/testthat/test-get_current_fire_perimeters.R new file mode 100644 index 0000000..889995e --- /dev/null +++ b/tests/testthat/test-get_current_fire_perimeters.R @@ -0,0 +1,42 @@ +# Tests for get_current_fire_perimeters.R + +test_that("get_current_fire_perimeters validates geography parameter", { + # geography must be NULL + expect_error( + get_current_fire_perimeters(geography = "county"), + "must be NULL" + ) +}) + +test_that("get_current_fire_perimeters validates api parameter", { + # api must be TRUE + expect_error( + get_current_fire_perimeters(api = FALSE), + "must be queried from the API" + ) +}) + +test_that("get_current_fire_perimeters validates bbox parameter", { + # Invalid bbox should error (either with custom message or st_bbox error) + expect_error( + get_current_fire_perimeters(bbox = "invalid") + ) +}) + +test_that("get_current_fire_perimeters function signature is correct", { + expect_true(is.function(get_current_fire_perimeters)) + + # Check parameter names + params <- names(formals(get_current_fire_perimeters)) + expect_true("geography" %in% params) + expect_true("file_path" %in% params) + expect_true("bbox" %in% params) + expect_true("api" %in% params) + + # Check defaults + f <- get_current_fire_perimeters + expect_null(formals(f)$geography) + expect_null(formals(f)$file_path) + expect_null(formals(f)$bbox) + expect_true(formals(f)$api) +}) diff --git a/tests/testthat/test-get_emergency_management_performance.R b/tests/testthat/test-get_emergency_management_performance.R new file mode 100644 index 0000000..1d783ae --- /dev/null +++ b/tests/testthat/test-get_emergency_management_performance.R @@ -0,0 +1,24 @@ +# Tests for get_emergency_management_performance.R + +test_that("get_emergency_management_performance function signature is correct", { + expect_true(is.function(get_emergency_management_performance)) + + # Check parameter names + params <- names(formals(get_emergency_management_performance)) + expect_true("file_path" %in% params) + expect_true("api" %in% params) + + # Check defaults + f <- get_emergency_management_performance + expect_true(formals(f)$api) +}) + +test_that("get_emergency_management_performance validates file_path when api=FALSE", { + expect_error( + get_emergency_management_performance( + file_path = "/nonexistent/path/file.csv", + api = FALSE + ), + "valid `file_path`" + ) +}) diff --git a/tests/testthat/test-get_fema_disaster_declarations.R b/tests/testthat/test-get_fema_disaster_declarations.R new file mode 100644 index 0000000..7d748a0 --- /dev/null +++ b/tests/testthat/test-get_fema_disaster_declarations.R @@ -0,0 +1,27 @@ +# Tests for get_fema_disaster_declarations.R + +test_that("get_fema_disaster_declarations validates file_path when api=FALSE", { + # When api=FALSE, file_path must exist + expect_error( + get_fema_disaster_declarations( + file_path = "/nonexistent/path/file.csv", + api = FALSE + ), + "does not point to a valid file" + ) +}) + +test_that("get_fema_disaster_declarations function exists and has correct signature", { + expect_true(is.function(get_fema_disaster_declarations)) + + # Check default parameters + f <- get_fema_disaster_declarations + expect_true(formals(f)$api) +}) + +test_that("get_fema_disaster_declarations returns expected structure (mocked)", { + # This is a structural test - actual API calls require network access + # We're testing that the function signature and parameters are correct + expect_true("file_path" %in% names(formals(get_fema_disaster_declarations))) + expect_true("api" %in% names(formals(get_fema_disaster_declarations))) +}) diff --git a/tests/testthat/test-get_government_finances.R b/tests/testthat/test-get_government_finances.R new file mode 100644 index 0000000..e02824e --- /dev/null +++ b/tests/testthat/test-get_government_finances.R @@ -0,0 +1,18 @@ +# Tests for get_government_finances.R + +test_that("get_government_finances function signature is correct", { + expect_true(is.function(get_government_finances)) + + # Check parameter names + params <- names(formals(get_government_finances)) + expect_true("year" %in% params) + + # Check default year + f <- get_government_finances + expect_equal(formals(f)$year, 2022) +}) + +test_that("get_government_finances validates year parameter type", { + # Year should be numeric (function uses str_sub on it) + expect_true(is.function(get_government_finances)) +}) diff --git a/tests/testthat/test-get_hazard_mitigation_assistance.R b/tests/testthat/test-get_hazard_mitigation_assistance.R new file mode 100644 index 0000000..8df8d86 --- /dev/null +++ b/tests/testthat/test-get_hazard_mitigation_assistance.R @@ -0,0 +1,31 @@ +# Tests for get_hazard_mitigation_assistance.R + +test_that("get_hazard_mitigation_assistance validates state_abbreviations", { + expect_error( + get_hazard_mitigation_assistance(state_abbreviations = "XX"), + "Only the 50 states and DC" + ) +}) + +test_that("get_hazard_mitigation_assistance validates file paths", { + expect_error( + get_hazard_mitigation_assistance( + file_path_old_grant_system = "/nonexistent/path.parquet", + file_path_new_grant_system = "/also/nonexistent.parquet" + ), + "no file at the specified" + ) +}) + +test_that("get_hazard_mitigation_assistance function signature is correct", { + expect_true(is.function(get_hazard_mitigation_assistance)) + + # Check parameter names + params <- names(formals(get_hazard_mitigation_assistance)) + expect_true("file_path_old_grant_system" %in% params) + expect_true("file_path_new_grant_system" %in% params) + expect_true("state_abbreviations" %in% params) + + # Check default for state_abbreviations is NULL (meaning all states) + expect_null(formals(get_hazard_mitigation_assistance)$state_abbreviations) +}) diff --git a/tests/testthat/test-get_ihp_registrations.R b/tests/testthat/test-get_ihp_registrations.R new file mode 100644 index 0000000..579156d --- /dev/null +++ b/tests/testthat/test-get_ihp_registrations.R @@ -0,0 +1,108 @@ +# Tests for get_ihp_registrations.R +# +# NOTE: This function relies on large input data and is slow. +# Data is loaded once at the top of the test file to avoid repeated I/O. +# Validation/error tests call the function directly (not using cached data). + +# --------------------------------------------------------------------------- +# Load data once for success tests (skip if Box path unavailable) +# --------------------------------------------------------------------------- +ihp_test_data <- NULL + +skip_if_no_box <- function() { + + box_path <- tryCatch(get_box_path(), error = function(e) NULL) + if (is.null(box_path) || !dir.exists(box_path)) { + skip("Box path not available") + } +} + +# Attempt to load data once for all success tests +local({ + box_path <- tryCatch(get_box_path(), error = function(e) NULL) + if (!is.null(box_path) && dir.exists(box_path)) { + ihp_test_data <<- tryCatch( + suppressWarnings(suppressMessages( + get_ihp_registrations(state_fips = "DC", api = FALSE) + )), + error = function(e) NULL + ) + } +}) + +# --------------------------------------------------------------------------- +# Validation tests (expected to fail - call function directly) +# --------------------------------------------------------------------------- +test_that("get_ihp_registrations validates file_name when api=FALSE", { + expect_error( + get_ihp_registrations( + file_name = "nonexistent_file.parquet", + api = FALSE + ), + "invalid" + ) +}) + +test_that("get_ihp_registrations function signature is correct", { + expect_true(is.function(get_ihp_registrations)) + + # Check parameter names + +params <- names(formals(get_ihp_registrations)) + expect_true("state_fips" %in% params) + expect_true("file_name" %in% params) + expect_true("api" %in% params) + expect_true("outpath" %in% params) + + # Check defaults + f <- get_ihp_registrations + expect_null(formals(f)$state_fips) + expect_false(formals(f)$api) + expect_null(formals(f)$outpath) +}) + +# --------------------------------------------------------------------------- +# Success tests (use pre-loaded data) +# --------------------------------------------------------------------------- +test_that("get_ihp_registrations returns expected columns", { + skip_if_no_box() + skip_if(is.null(ihp_test_data), "IHP test data not loaded") + + expected_cols <- c( + "unique_id", "allocation_factor_zcta_to_county", "geoid_county", + "zcta_code", "geoid_tract", "geoid_block_group", "disaster_number", + "amount_individual_housing_program", "amount_housing_assistance", + "amount_other_needs_assistance", "amount_rental_assistance", + "amount_repairs", "amount_replacement", "amount_personal_property", + "amount_flood_insurance_premium_paid_by_fema", + "state_name", "state_abbreviation", "state_code" + ) + + for (col in expected_cols) { + expect_true(col %in% names(ihp_test_data), info = paste("Missing column:", col)) + } +}) + +test_that("get_ihp_registrations returns a tibble", { + skip_if_no_box() + skip_if(is.null(ihp_test_data), "IHP test data not loaded") + + expect_s3_class(ihp_test_data, "tbl_df") +}) + +test_that("get_ihp_registrations allocation_factor is numeric between 0 and 1", { + skip_if_no_box() + skip_if(is.null(ihp_test_data), "IHP test data not loaded") + + alloc <- ihp_test_data$allocation_factor_zcta_to_county + expect_type(alloc, "double") + expect_true(all(alloc >= 0 & alloc <= 1, na.rm = TRUE)) +}) + +test_that("get_ihp_registrations geoid_county is 5 characters", { + skip_if_no_box() + skip_if(is.null(ihp_test_data), "IHP test data not loaded") + + geoids <- ihp_test_data$geoid_county[!is.na(ihp_test_data$geoid_county)] + expect_true(all(nchar(geoids) == 5)) +}) diff --git a/tests/testthat/test-get_lodes.R b/tests/testthat/test-get_lodes.R index 60cf041..97666d5 100644 --- a/tests/testthat/test-get_lodes.R +++ b/tests/testthat/test-get_lodes.R @@ -1,36 +1,20 @@ -testthat::test_that("states clearly errors when invalid state abbreviation is supplied", { +testthat::test_that("states clearly errors when invalid state abbreviation is supplied", { testthat::expect_error({get_lodes(lodes_type = "wac", year = 2022, states = "AB")}) - testthat::expect_error({get_lodes(lodes_type = "rac", year = 2022, states = "AB")}) - testthat::expect_error({get_lodes(lodes_type = "od", year = 2022, states = c("AL", "AM"))}) - - }) - testthat::test_that("warning generated when missing state and year combination is supplied", { - testthat::expect_warning({get_lodes(lodes_type = "od", year = 2022, states = c("AK", "MN"))}) - testthat::expect_warning({get_lodes(lodes_type = "wac", year = 2009, states = c("DC", "MN"))}) - - }) - testthat::test_that("error generated when invalid lodes_type is supplied", { - testthat::expect_error({get_lodes(lodes_type = "dc", year = 2022, states = c("AK", "MN"))}) - testthat::expect_error({get_lodes(lodes_type = "mac", year = 2009, states = c("DC", "MN"))}) - - }) - - testthat::test_that("variables have no negative values", { test <- get_lodes(lodes_type = "wac", year = 2022, states = "all") @@ -53,4 +37,3 @@ testthat::test_that("variables have no negative values", { NULL ) }) - diff --git a/tests/testthat/test-get_nfip_claims.R b/tests/testthat/test-get_nfip_claims.R new file mode 100644 index 0000000..17a1536 --- /dev/null +++ b/tests/testthat/test-get_nfip_claims.R @@ -0,0 +1,118 @@ +# Tests for get_nfip_claims.R +# +# NOTE: This function relies on large input data and is slow. +# Data is loaded once at the top of the test file to avoid repeated I/O. +# Validation/error tests call the function directly (not using cached data). + +# --------------------------------------------------------------------------- +# Load data once for success tests (skip if Box path unavailable) +# --------------------------------------------------------------------------- +nfip_claims_test_data <- NULL + +skip_if_no_box <- function() { + box_path <- tryCatch(get_box_path(), error = function(e) NULL) + if (is.null(box_path) || !dir.exists(box_path)) { + skip("Box path not available") + } +} + +# Attempt to load data once for all success tests (small county: DC = 11001) +local({ + box_path <- tryCatch(get_box_path(), error = function(e) NULL) + if (!is.null(box_path) && dir.exists(box_path)) { + nfip_claims_test_data <<- tryCatch( + suppressWarnings(suppressMessages( + get_nfip_claims(county_geoids = "11001", api = FALSE) + )), + error = function(e) NULL + ) + } +}) + +# --------------------------------------------------------------------------- +# Validation tests (expected to fail - call function directly) +# --------------------------------------------------------------------------- +test_that("get_nfip_claims validates county_geoids when api=TRUE", { + expect_error( + get_nfip_claims(county_geoids = NULL, api = TRUE), + "must supply this argument" + ) +}) + +test_that("get_nfip_claims validates file_name when api=FALSE", { + expect_error( + get_nfip_claims(file_name = NA, api = FALSE), + "must provide a `file_name`" + ) +}) + +test_that("get_nfip_claims function signature is correct", { + expect_true(is.function(get_nfip_claims)) + + # Check parameter names + params <- names(formals(get_nfip_claims)) + expect_true("county_geoids" %in% params) + expect_true("file_name" %in% params) + expect_true("api" %in% params) + + # Check defaults + f <- get_nfip_claims + expect_null(formals(f)$county_geoids) + expect_false(formals(f)$api) +}) + +# --------------------------------------------------------------------------- +# Success tests (use pre-loaded data) +# --------------------------------------------------------------------------- +test_that("get_nfip_claims returns expected columns", { + skip_if_no_box() + skip_if(is.null(nfip_claims_test_data), "NFIP claims test data not loaded") + + expected_cols <- c( + "state_fips", "county_geoid", "county_name", "occupancy_type", + "year_loss", "year_construction", "count_units_insured", + "deductible_building", "deductible_contents", + "value_building", "value_contents", + "replacement_cost_building", "replacement_cost_contents", + "insurance_coverage_building", "insurance_coverage_contents", + "damage_building", "damage_contents", + "net_payment_building", "net_payment_contents", "net_payment_icc" + ) + + for (col in expected_cols) { + expect_true(col %in% names(nfip_claims_test_data), info = paste("Missing column:", col)) + } +}) + +test_that("get_nfip_claims returns a data frame", { + skip_if_no_box() + skip_if(is.null(nfip_claims_test_data), "NFIP claims test data not loaded") + + expect_true(is.data.frame(nfip_claims_test_data)) +}) + +test_that("get_nfip_claims county_geoid is 5 characters", { + skip_if_no_box() + skip_if(is.null(nfip_claims_test_data), "NFIP claims test data not loaded") + + geoids <- nfip_claims_test_data$county_geoid[!is.na(nfip_claims_test_data$county_geoid)] + if (length(geoids) > 0) { + expect_true(all(nchar(geoids) == 5)) + } +}) + +test_that("get_nfip_claims occupancy_type has expected categories", { + skip_if_no_box() + skip_if(is.null(nfip_claims_test_data), "NFIP claims test data not loaded") + + valid_types <- c("single family", "multi-family", "mobile/manufactured home", "non-residential", NA) + occupancy <- nfip_claims_test_data$occupancy_type + expect_true(all(occupancy %in% valid_types)) +}) + +test_that("get_nfip_claims year_loss is numeric", { + skip_if_no_box() + skip_if(is.null(nfip_claims_test_data), "NFIP claims test data not loaded") + + expect_type(nfip_claims_test_data$year_loss, "double") +}) diff --git a/tests/testthat/test-get_nfip_policies.R b/tests/testthat/test-get_nfip_policies.R new file mode 100644 index 0000000..a84596f --- /dev/null +++ b/tests/testthat/test-get_nfip_policies.R @@ -0,0 +1,105 @@ +# Tests for get_nfip_policies.R +# +# NOTE: This function relies on large input data and is slow. +# Data is loaded once at the top of the test file to avoid repeated I/O. +# Validation/error tests call the function directly (not using cached data). + +# --------------------------------------------------------------------------- +# Load data once for success tests (skip if Box path unavailable) +# --------------------------------------------------------------------------- +nfip_policies_test_data <- NULL + +skip_if_no_box <- function() { + box_path <- tryCatch(get_box_path(), error = function(e) NULL) + if (is.null(box_path) || !dir.exists(box_path)) { + skip("Box path not available") + } +} + +# Attempt to load data once for all success tests (small state: DC) +local({ + box_path <- tryCatch(get_box_path(), error = function(e) NULL) + if (!is.null(box_path) && dir.exists(box_path)) { + nfip_policies_test_data <<- tryCatch( + suppressWarnings(suppressMessages( + get_nfip_policies(state_abbreviation = "DC", api = FALSE) + )), + error = function(e) NULL + ) + } +}) + +# --------------------------------------------------------------------------- +# Validation tests (expected to fail - call function directly) +# --------------------------------------------------------------------------- +test_that("get_nfip_policies validates file_name when api=FALSE", { + expect_error( + get_nfip_policies( + state_abbreviation = "TX", + file_name = "nonexistent_file.parquet", + api = FALSE + ), + "invalid" + ) +}) + +test_that("get_nfip_policies function signature is correct", { + expect_true(is.function(get_nfip_policies)) + + # Check parameter names + params <- names(formals(get_nfip_policies)) + expect_true("state_abbreviation" %in% params) + expect_true("county_geoids" %in% params) + expect_true("file_name" %in% params) + expect_true("api" %in% params) + + # Check defaults + f <- get_nfip_policies + expect_null(formals(f)$county_geoids) + expect_false(formals(f)$api) +}) + +# --------------------------------------------------------------------------- +# Success tests (use pre-loaded data) +# --------------------------------------------------------------------------- +test_that("get_nfip_policies returns expected columns", { + skip_if_no_box() + skip_if(is.null(nfip_policies_test_data), "NFIP policies test data not loaded") + + expected_cols <- c( + "state_fips", "state_abbreviation", "county_geoid", "county_name", + "census_tract", "policy_cost", "policy_count", "policy_rated_flood_zone", + "policy_premium_total_cost", "policy_date_termination", "policy_date_effective", + "building_occupancy_type", "building_replacement_cost" + ) + + for (col in expected_cols) { + expect_true(col %in% names(nfip_policies_test_data), info = paste("Missing column:", col)) + } +}) + +test_that("get_nfip_policies returns a data frame", { + skip_if_no_box() + skip_if(is.null(nfip_policies_test_data), "NFIP policies test data not loaded") + + expect_true(is.data.frame(nfip_policies_test_data)) +}) + +test_that("get_nfip_policies county_geoid is 5 characters", { + skip_if_no_box() + skip_if(is.null(nfip_policies_test_data), "NFIP policies test data not loaded") + + geoids <- nfip_policies_test_data$county_geoid[!is.na(nfip_policies_test_data$county_geoid)] + if (length(geoids) > 0) { + expect_true(all(nchar(geoids) == 5)) + } +}) + +test_that("get_nfip_policies building_occupancy_type has expected categories", { + skip_if_no_box() + skip_if(is.null(nfip_policies_test_data), "NFIP policies test data not loaded") + + valid_types <- c("single family", "multi-family", "mobile/manufactured home", "non-residential", NA) + occupancy <- nfip_policies_test_data$building_occupancy_type + expect_true(all(occupancy %in% valid_types)) +}) diff --git a/tests/testthat/test-get_preliminary_damage_assessments.R b/tests/testthat/test-get_preliminary_damage_assessments.R new file mode 100644 index 0000000..7e5a7c1 --- /dev/null +++ b/tests/testthat/test-get_preliminary_damage_assessments.R @@ -0,0 +1,15 @@ +# Tests for get_preliminary_damage_assessments.R + +test_that("get_preliminary_damage_assessments function signature is correct", { + expect_true(is.function(get_preliminary_damage_assessments)) + + # Check parameter names + params <- names(formals(get_preliminary_damage_assessments)) + expect_true("file_path" %in% params) + expect_true("directory_path" %in% params) + expect_true("use_cache" %in% params) + + # Check defaults + f <- get_preliminary_damage_assessments + expect_true(formals(f)$use_cache) +}) diff --git a/tests/testthat/test-get_public_assistance.R b/tests/testthat/test-get_public_assistance.R new file mode 100644 index 0000000..fb29429 --- /dev/null +++ b/tests/testthat/test-get_public_assistance.R @@ -0,0 +1,20 @@ +# Tests for get_public_assistance.R + +test_that("get_public_assistance validates state_abbreviations", { + expect_error( + get_public_assistance(state_abbreviations = "XX"), + "Only the 50 states and DC" + ) +}) + +test_that("get_public_assistance function signature is correct", { + expect_true(is.function(get_public_assistance)) + + # Check parameter names + params <- names(formals(get_public_assistance)) + expect_true("file_path" %in% params) + expect_true("state_abbreviations" %in% params) + + # Check default for state_abbreviations is NULL (meaning all states) + expect_null(formals(get_public_assistance)$state_abbreviations) +}) diff --git a/tests/testthat/test-get_sba_loans.R b/tests/testthat/test-get_sba_loans.R new file mode 100644 index 0000000..e837eb1 --- /dev/null +++ b/tests/testthat/test-get_sba_loans.R @@ -0,0 +1,15 @@ +# Tests for get_sba_loans.R + +test_that("get_sba_loans function signature is correct", { + expect_true(is.function(get_sba_loans)) + + # Function has no parameters + params <- names(formals(get_sba_loans)) + expect_equal(length(params), 0) +}) + +test_that("get_sba_loans validates data path internally", { + # The function checks if the path exists + # Since it uses get_box_path(), it will error if Box is not set up + expect_true(is.function(get_sba_loans)) +}) diff --git a/tests/testthat/test-get_sheldus.R b/tests/testthat/test-get_sheldus.R new file mode 100644 index 0000000..3f0ae08 --- /dev/null +++ b/tests/testthat/test-get_sheldus.R @@ -0,0 +1,16 @@ +# Tests for get_sheldus.R + +test_that("get_sheldus validates file_path", { + expect_error( + get_sheldus(file_path = "/nonexistent/path/file.csv"), + "does not point to a valid file" + ) +}) + +test_that("get_sheldus function signature is correct", { + expect_true(is.function(get_sheldus)) + + # Check parameter name exists + params <- names(formals(get_sheldus)) + expect_true("file_path" %in% params) +}) diff --git a/tests/testthat/test-get_structures.R b/tests/testthat/test-get_structures.R new file mode 100644 index 0000000..42716b6 --- /dev/null +++ b/tests/testthat/test-get_structures.R @@ -0,0 +1,36 @@ +# Tests for get_structures.R + +test_that("get_structures validates geography parameter", { + expect_error( + get_structures( + boundaries = sf::st_sfc(sf::st_point(c(0, 0)), crs = 4326), + geography = "invalid" + ), + "must be one of" + ) +}) + +test_that("get_structures validates boundaries CRS", { + # Create boundary without CRS + boundary_no_crs <- sf::st_sfc(sf::st_point(c(0, 0))) + + expect_error( + get_structures(boundaries = boundary_no_crs), + "must specify a spatial vector object" + ) +}) + +test_that("get_structures function signature is correct", { + expect_true(is.function(get_structures)) + + # Check parameter names + params <- names(formals(get_structures)) + expect_true("boundaries" %in% params) + expect_true("geography" %in% params) + expect_true("keep_structures" %in% params) + + # Check defaults + f <- get_structures + expect_equal(formals(f)$geography, "county") + expect_false(formals(f)$keep_structures) +}) diff --git a/tests/testthat/test-interpolate_demographics.R b/tests/testthat/test-interpolate_demographics.R new file mode 100644 index 0000000..1974881 --- /dev/null +++ b/tests/testthat/test-interpolate_demographics.R @@ -0,0 +1,30 @@ +# Tests for interpolate_demographics.R + +test_that("interpolate_demographics validates weights parameter", { + # Create a minimal sf object for testing + simple_poly <- sf::st_sfc( + sf::st_polygon(list(matrix(c(0, 0, 1, 0, 1, 1, 0, 1, 0, 0), ncol = 2, byrow = TRUE))), + crs = 5070 + ) + zones_sf <- sf::st_as_sf(data.frame(zone_id = "A"), geometry = simple_poly) + + # weights must be one of "population" or "housing" + # The function doesn't explicitly validate this but tidycensus will error + expect_true(is.function(interpolate_demographics)) +}) + +test_that("interpolate_demographics function signature is correct", { + expect_true(is.function(interpolate_demographics)) + + # Check parameter names + params <- names(formals(interpolate_demographics)) + expect_true("zones_sf" %in% params) + expect_true("sociodemographic_tracts_sf" %in% params) + expect_true("id_column" %in% params) + expect_true("weights" %in% params) + + # Check defaults + f <- interpolate_demographics + expect_null(formals(f)$sociodemographic_tracts_sf) + expect_equal(formals(f)$weights, "population") +}) diff --git a/tests/testthat/test-qualtrics_analysis.R b/tests/testthat/test-qualtrics_analysis.R new file mode 100644 index 0000000..e20c872 --- /dev/null +++ b/tests/testthat/test-qualtrics_analysis.R @@ -0,0 +1,182 @@ +# Tests for qualtrics_analysis.R functions + +test_that("qualtrics_format_metadata validates input parameters", { + # Create mock metadata + mock_metadata <- data.frame( + qname = c("Q1", "Q2", "Q3"), + main = c("Question 1 text", "Question 2 text", "Question 3 text"), + sub = c("Sub 1", "Sub 2", "Sub 3"), + stringsAsFactors = FALSE + ) + + # Test that function runs without error with valid inputs + expect_no_error(qualtrics_format_metadata(mock_metadata)) + + # Test with sections parameter + sections <- c("Section A" = 2, "Section B" = 3) + expect_no_error(qualtrics_format_metadata(mock_metadata, sections = sections)) +}) + +test_that("qualtrics_format_metadata returns expected structure", { + mock_metadata <- data.frame( + qname = c("Q1", "Q2", "Q3", "Q4"), + main = c("Main 1", "Main 2", "Main 3", "Main 4"), + sub = c("Sub 1", "Sub 2", "Sub 3", "Sub 4"), + stringsAsFactors = FALSE + ) + + result <- qualtrics_format_metadata(mock_metadata) + + # Check that result is a data frame/tibble + expect_true(is.data.frame(result)) + + # Check expected columns exist + expect_true("question_number" %in% names(result)) + expect_true("question_name" %in% names(result)) + expect_true("text_main" %in% names(result)) + expect_true("text_sub" %in% names(result)) + + # Check row count matches input +expect_equal(nrow(result), 4) + + # Check question_number is sequential + expect_equal(result$question_number, 1:4) +}) + +test_that("qualtrics_format_metadata handles sections correctly", { + mock_metadata <- data.frame( + qname = c("Q1", "Q2", "Q3", "Q4"), + main = c("Main 1", "Main 2", "Main 3", "Main 4"), + sub = c("Sub 1", "Sub 2", "Sub 3", "Sub 4"), + stringsAsFactors = FALSE + ) + + sections <- c("Section A" = 2, "Section B" = 4) + result <- qualtrics_format_metadata(mock_metadata, sections = sections) + + # Check survey_section column exists + expect_true("survey_section" %in% names(result)) + + # Check sections are filled correctly (upward fill) + expect_equal(result$survey_section[1:2], c("Section A", "Section A")) + expect_equal(result$survey_section[3:4], c("Section B", "Section B")) +}) + +test_that("qualtrics_get_metadata validates input parameters", { + mock_metadata <- data.frame( + question_number = 1:3, + question_name = c("Q1", "Q2", "Q3"), + text_main = c("Main 1", "Main 2", "Main 3"), + text_sub = c("Sub 1", "Sub 2", "Sub 3"), + survey_section = c("A", "A", "B"), + stringsAsFactors = FALSE + ) + + # Test that function errors when neither question_name nor survey_section provided + expect_error( + qualtrics_get_metadata(mock_metadata), + "One of `survey_section` and `question_name` must be supplied" + ) + + # Test that function runs with question_name + expect_no_error(qualtrics_get_metadata(mock_metadata, question_name = "Q1")) + + # Test that function runs with survey_section + expect_no_error(qualtrics_get_metadata(mock_metadata, survey_section = "A")) +}) + +test_that("qualtrics_get_metadata returns correct values", { + mock_metadata <- data.frame( + question_number = 1:3, + question_name = c("Q1", "Q2", "Q3"), + text_main = c("Main 1", "Main 2", "Main 3"), + text_sub = c("Sub 1", "Sub 2", "Sub 3"), + survey_section = c("A", "A", "B"), + stringsAsFactors = FALSE + ) + + # Test filtering by question_name + result <- qualtrics_get_metadata(mock_metadata, question_name = "Q1") + expect_equal(result, "Sub 1") + + # Test filtering by survey_section + result <- qualtrics_get_metadata(mock_metadata, survey_section = "A") + expect_equal(length(result), 2) + + # Test custom return_values + result <- qualtrics_get_metadata(mock_metadata, question_name = "Q2", return_values = "text_main") + expect_equal(result, "Main 2") +}) + +test_that("qualtrics_define_missing validates input parameters", { + mock_df <- data.frame( + Q1_a = c("Yes", NA, "Yes"), + Q1_b = c(NA, "No", "Yes"), + Q2 = c("Yes", "No", NA), + stringsAsFactors = FALSE + ) + + # Test that default_values must be a list of length 3 + expect_error( + qualtrics_define_missing(mock_df, question_code_include = "Q1", default_values = list("No")), + "`default_values` must be a list of length 3" + ) + + expect_error( + qualtrics_define_missing(mock_df, question_code_include = "Q1", default_values = "No"), + "`default_values` must be a list of length 3" + ) + + # Test that function runs with valid inputs + expect_no_error( + qualtrics_define_missing(mock_df, question_code_include = "Q1") + ) +}) + +test_that("qualtrics_define_missing returns expected structure", { + mock_df <- data.frame( + Q1_a = c("Yes", NA, "Yes"), + Q1_b = c(NA, "No", "Yes"), + Q2 = c("Yes", "No", NA), + stringsAsFactors = FALSE + ) + + result <- qualtrics_define_missing(mock_df, question_code_include = "Q1") + + # Check that result is a data frame + expect_true(is.data.frame(result)) + + # Check that only Q1 columns are returned (not Q2) + expect_true("Q1_a" %in% names(result)) + expect_true("Q1_b" %in% names(result)) + expect_false("Q2" %in% names(result)) +}) + +test_that("qualtrics_define_missing handles predicate question validation", { + mock_df <- data.frame( + predicate = c("Yes", "No", NA), + Q1_a = c("A", NA, "C"), + Q1_b = c(NA, "B", "D"), + stringsAsFactors = FALSE + ) + + # Test that predicate_question must exist in df + expect_error( + qualtrics_define_missing( + mock_df, + question_code_include = "Q1", + predicate_question = "nonexistent" + ), + "Predicate question not found" + ) + + # Test that predicate_question_negative_value must be provided with predicate_question + expect_error( + qualtrics_define_missing( + mock_df, + question_code_include = "Q1", + predicate_question = "predicate" + ), + "negative value must also be provided" + ) +}) diff --git a/tests/testthat/test-read_ipums_cached.R b/tests/testthat/test-read_ipums_cached.R new file mode 100644 index 0000000..bd342f5 --- /dev/null +++ b/tests/testthat/test-read_ipums_cached.R @@ -0,0 +1,60 @@ +# Tests for read_ipums_cached.R + +test_that("read_ipums_cached validates filename parameter", { + expect_error( + read_ipums_cached( + filename = 123, + download_directory = tempdir(), + extract_definition = NULL + ), + "must be a character string" + ) +}) + +test_that("read_ipums_cached validates download_directory parameter", { + expect_error( + read_ipums_cached( + filename = "test", + download_directory = 123, + extract_definition = NULL + ), + "must be a character string" + ) +}) + +test_that("read_ipums_cached validates refresh parameter", { + expect_error( + read_ipums_cached( + filename = "test", + download_directory = tempdir(), + extract_definition = NULL, + refresh = "yes" + ), + "must be either" + ) +}) + +test_that("read_ipums_cached validates download_directory exists", { + expect_error( + read_ipums_cached( + filename = "test", + download_directory = "/nonexistent/path", + extract_definition = NULL + ), + "does not exist" + ) +}) + +test_that("read_ipums_cached function signature is correct", { + expect_true(is.function(read_ipums_cached)) + + # Check parameter names + params <- names(formals(read_ipums_cached)) + expect_true("filename" %in% params) + expect_true("download_directory" %in% params) + expect_true("extract_definition" %in% params) + expect_true("refresh" %in% params) + + # Check default for refresh + expect_false(formals(read_ipums_cached)$refresh) +}) diff --git a/tests/testthat/test-spatial_analysis.R b/tests/testthat/test-spatial_analysis.R new file mode 100644 index 0000000..4d18ed8 --- /dev/null +++ b/tests/testthat/test-spatial_analysis.R @@ -0,0 +1,142 @@ +# Tests for spatial_analysis.R functions + +test_that("subdivide_linestring runs without error with valid inputs", { + # Create a linestring in projected CRS (5070 uses meters) + # Line from (0,0) to (1000,0) in meters + + coords <- matrix(c(0, 0, 1000, 0), ncol = 2, byrow = TRUE) + line <- sf::st_sfc(sf::st_linestring(coords), crs = 5070) + line_sf <- sf::st_as_sf(data.frame(id = 1), geometry = line) + + # Test that function runs without error with valid inputs + expect_no_error(subdivide_linestring(line_sf, max_length = 500)) +}) + +test_that("subdivide_linestring applies crs parameter correctly", { + # Create test data in projected CRS + coords <- matrix(c(0, 0, 1000, 0), ncol = 2, byrow = TRUE) + line <- sf::st_sfc(sf::st_linestring(coords), crs = 5070) + line_sf <- sf::st_as_sf(data.frame(id = 1), geometry = line) + + # Test with default crs (5070) + result <- subdivide_linestring(line_sf, max_length = 500) + expect_equal(sf::st_crs(result)$epsg, 5070) + + # Test with explicit different crs + result2 <- subdivide_linestring(line_sf, max_length = 500, crs = 3857) + expect_equal(sf::st_crs(result2)$epsg, 3857) +}) + +test_that("subdivide_linestring returns expected structure", { + # Create test data - line of 1000 meters + coords <- matrix(c(0, 0, 1000, 0), ncol = 2, byrow = TRUE) + line <- sf::st_sfc(sf::st_linestring(coords), crs = 5070) + line_sf <- sf::st_as_sf(data.frame(attr1 = "test", attr2 = 123), geometry = line) + + result <- subdivide_linestring(line_sf, max_length = 500, crs = 5070) + + # Check that result is an sf object + expect_s3_class(result, "sf") + + # Check that geometry type is LINESTRING + expect_true(all(sf::st_geometry_type(result) == "LINESTRING")) + + # Check that original attributes are preserved + expect_true("attr1" %in% names(result)) + expect_true("attr2" %in% names(result)) + + # Check that row_id column exists + expect_true("row_id" %in% names(result)) +}) + +test_that("subdivide_linestring subdivides long lines", { + # Create a line that's 1000 meters long + coords <- matrix(c(0, 0, 1000, 0), ncol = 2, byrow = TRUE) + line <- sf::st_sfc(sf::st_linestring(coords), crs = 5070) + line_sf <- sf::st_as_sf(data.frame(id = 1), geometry = line) + + # Subdivide into segments of max 400 meters - should create 3 segments + result <- subdivide_linestring(line_sf, max_length = 400, crs = 5070) + + # Should have more than 1 row (line was subdivided) + expect_gt(nrow(result), 1) +}) + +test_that("subdivide_linestring preserves short lines unchanged", { + # Create a line that's only 100 meters long + coords <- matrix(c(0, 0, 100, 0), ncol = 2, byrow = TRUE) + line <- sf::st_sfc(sf::st_linestring(coords), crs = 5070) + line_sf <- sf::st_as_sf(data.frame(id = 1), geometry = line) + + # With max_length = 500, line should not be subdivided + result <- subdivide_linestring(line_sf, max_length = 500, crs = 5070) + + # Should still have 1 row + expect_equal(nrow(result), 1) +}) + +test_that("polygons_to_linestring runs without error with valid inputs", { + + # Create a MULTIPOLYGON for testing (function uses L3 from st_coordinates which + # requires MULTIPOLYGON geometry) + coords <- matrix(c(0, 0, 1, 0, 1, 1, 0, 1, 0, 0), ncol = 2, byrow = TRUE) + poly <- sf::st_polygon(list(coords)) + mpoly <- sf::st_sfc(sf::st_multipolygon(list(list(coords))), crs = 5070) + poly_sf <- sf::st_as_sf(data.frame(id = 1), geometry = mpoly) + + # Test that function runs without error with valid inputs + expect_no_error(polygons_to_linestring(poly_sf)) +}) + +test_that("polygons_to_linestring returns expected structure", { + # Create MULTIPOLYGON test data + coords <- matrix(c(0, 0, 1, 0, 1, 1, 0, 1, 0, 0), ncol = 2, byrow = TRUE) + mpoly <- sf::st_sfc(sf::st_multipolygon(list(list(coords))), crs = 5070) + poly_sf <- sf::st_as_sf(data.frame(attr1 = "test"), geometry = mpoly) + + result <- polygons_to_linestring(poly_sf) + + # Check that result is an sf object + expect_s3_class(result, "sf") + + # Check that geometry type is LINESTRING + expect_true(all(sf::st_geometry_type(result) == "LINESTRING")) + + # Check that polygon_id column exists + expect_true("polygon_id" %in% names(result)) + + # Check that line_id column exists + expect_true("line_id" %in% names(result)) + + # Check that original attributes are preserved + expect_true("attr1" %in% names(result)) + + # A square polygon should produce 4 line segments (one per edge) + expect_equal(nrow(result), 4) +}) + +test_that("polygons_to_linestring preserves original attributes", { + # Create MULTIPOLYGON test data with multiple attributes + coords <- matrix(c(0, 0, 1, 0, 1, 1, 0, 1, 0, 0), ncol = 2, byrow = TRUE) + mpoly <- sf::st_sfc(sf::st_multipolygon(list(list(coords))), crs = 5070) + poly_sf <- sf::st_as_sf( + data.frame( + name = "test_polygon", + value = 42, + category = "A" + ), + geometry = mpoly + ) + + result <- polygons_to_linestring(poly_sf) + + # Check all original attributes are present + expect_true("name" %in% names(result)) + expect_true("value" %in% names(result)) + expect_true("category" %in% names(result)) + + # Check that attribute values are correct + expect_equal(unique(result$name), "test_polygon") + expect_equal(unique(result$value), 42) + expect_equal(unique(result$category), "A") +}) diff --git a/tests/testthat/test-utilities.R b/tests/testthat/test-utilities.R new file mode 100644 index 0000000..bc8c097 --- /dev/null +++ b/tests/testthat/test-utilities.R @@ -0,0 +1,118 @@ +# Tests for utilities.R functions + +test_that("get_system_username returns a character string", { + result <- get_system_username() + expect_type(result, "character") + expect_true(nchar(result) > 0) +}) + +test_that("get_box_path returns a valid path string", { + result <- get_box_path() + expect_type(result, "character") + expect_true(stringr::str_detect(result, "Box")) + expect_true(stringr::str_detect(result, "METRO Climate and Communities")) +}) + +test_that("get_dataset_columns validates input parameters", { + # Test that dataset must be character + expect_error(get_dataset_columns(123)) + + # Test that dataset must be length one + expect_error(get_dataset_columns(c("nfip_policies", "ihp_registrations"))) + + # Test that dataset must be a valid option + expect_error( + get_dataset_columns("invalid_dataset"), + "must be one of" + ) + + # Test valid inputs work + expect_no_error(get_dataset_columns("nfip_policies")) + expect_no_error(get_dataset_columns("ihp_registrations")) +}) + +test_that("get_dataset_columns returns expected structure", { + # Test nfip_policies + result <- get_dataset_columns("nfip_policies") + expect_type(result, "character") + expect_true(length(result) > 0) + expect_true("censusTract" %in% result) + expect_true("policyCost" %in% result) + + # Test ihp_registrations + result <- get_dataset_columns("ihp_registrations") + expect_type(result, "character") + expect_true(length(result) > 0) + expect_true("disasterNumber" %in% result) +}) + +test_that("get_geography_metadata validates input parameters", { + # Test that geography_type must be valid + expect_error( + get_geography_metadata(geography_type = "invalid") + ) + + # Test valid inputs work (note: these require API access, so we just check they don't error on parameter validation) + expect_true(is.function(get_geography_metadata)) +}) + +test_that("get_spatial_extent_census validates input parameters", { + # Function exists and is callable + expect_true(is.function(get_spatial_extent_census)) +}) + +test_that("read_xlsx_from_url validates input parameters", { + # Test that urls and file_names must be same length + expect_error( + read_xlsx_from_url( + urls = c("http://example.com/a.xlsx", "http://example.com/b.xlsx"), + directory = tempdir(), + file_names = c("file1.xlsx") + ), + "same length" + ) + + # Test that directory cannot be a file path + expect_error( + read_xlsx_from_url( + urls = "http://example.com/test.xlsx", + directory = "path/to/file.xlsx" + ), + "must point to a directory" + ) +}) + +test_that("inflation_adjust validates and processes correctly", { + # Create test data + test_df <- tibble::tibble( + year = c(2020, 2021, 2022), + amount = c(100, 100, 100) + ) + + # Function exists + expect_true(is.function(inflation_adjust)) +}) + +test_that("convert_delimited_to_parquet validates input parameters", { + # Test that outpath check works + temp_file <- tempfile(fileext = ".parquet") + file.create(temp_file) + + expect_error( + convert_delimited_to_parquet( + inpath = "test.csv", + outpath = temp_file + ), + "file already exists" + ) + + unlink(temp_file) + + # Test that dataset validation works + expect_error( + convert_delimited_to_parquet( + inpath = "test.csv", + dataset = "invalid_dataset" + ) + ) +})