diff --git a/.Rbuildignore b/.Rbuildignore index e283198..c174047 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,9 +1,11 @@ -^renv$ -^renv\.lock$ -^.*\.Rproj$ -^\.Rproj\.user$ -^LICENSE\.md$ -^_pkgdown\.yml$ -^docs$ -^pkgdown$ -^\.github$ +^renv$ +^renv\.lock$ +^.*\.Rproj$ +^\.Rproj\.user$ +^LICENSE\.md$ +^_pkgdown\.yml$ +^docs$ +^pkgdown$ +^\.github$ +^doc$ +^Meta$ diff --git a/.beads/.gitignore b/.beads/.gitignore deleted file mode 100644 index d27a1db..0000000 --- a/.beads/.gitignore +++ /dev/null @@ -1,44 +0,0 @@ -# SQLite databases -*.db -*.db?* -*.db-journal -*.db-wal -*.db-shm - -# Daemon runtime files -daemon.lock -daemon.log -daemon.pid -bd.sock -sync-state.json -last-touched - -# Local version tracking (prevents upgrade notification spam after git ops) -.local_version - -# Legacy database files -db.sqlite -bd.db - -# Worktree redirect file (contains relative path to main repo's .beads/) -# Must not be committed as paths would be wrong in other clones -redirect - -# Merge artifacts (temporary files from 3-way merge) -beads.base.jsonl -beads.base.meta.json -beads.left.jsonl -beads.left.meta.json -beads.right.jsonl -beads.right.meta.json - -# Sync state (local-only, per-machine) -# These files are machine-specific and should not be shared across clones -.sync.lock -sync_base.jsonl - -# NOTE: Do NOT add negation patterns (e.g., !issues.jsonl) here. -# They would override fork protection in .git/info/exclude, allowing -# contributors to accidentally commit upstream issue databases. -# The JSONL files (issues.jsonl, interactions.jsonl) and config files -# are tracked by git by default since no pattern above ignores them. diff --git a/.beads/README.md b/.beads/README.md deleted file mode 100644 index 50f281f..0000000 --- a/.beads/README.md +++ /dev/null @@ -1,81 +0,0 @@ -# Beads - AI-Native Issue Tracking - -Welcome to Beads! This repository uses **Beads** for issue tracking - a modern, AI-native tool designed to live directly in your codebase alongside your code. - -## What is Beads? - -Beads is issue tracking that lives in your repo, making it perfect for AI coding agents and developers who want their issues close to their code. No web UI required - everything works through the CLI and integrates seamlessly with git. - -**Learn more:** [github.com/steveyegge/beads](https://github.com/steveyegge/beads) - -## Quick Start - -### Essential Commands - -```bash -# Create new issues -bd create "Add user authentication" - -# View all issues -bd list - -# View issue details -bd show - -# Update issue status -bd update --status in_progress -bd update --status done - -# Sync with git remote -bd sync -``` - -### Working with Issues - -Issues in Beads are: -- **Git-native**: Stored in `.beads/issues.jsonl` and synced like code -- **AI-friendly**: CLI-first design works perfectly with AI coding agents -- **Branch-aware**: Issues can follow your branch workflow -- **Always in sync**: Auto-syncs with your commits - -## Why Beads? - -✨ **AI-Native Design** -- Built specifically for AI-assisted development workflows -- CLI-first interface works seamlessly with AI coding agents -- No context switching to web UIs - -🚀 **Developer Focused** -- Issues live in your repo, right next to your code -- Works offline, syncs when you push -- Fast, lightweight, and stays out of your way - -🔧 **Git Integration** -- Automatic sync with git commits -- Branch-aware issue tracking -- Intelligent JSONL merge resolution - -## Get Started with Beads - -Try Beads in your own projects: - -```bash -# Install Beads -curl -sSL https://raw.githubusercontent.com/steveyegge/beads/main/scripts/install.sh | bash - -# Initialize in your repo -bd init - -# Create your first issue -bd create "Try out Beads" -``` - -## Learn More - -- **Documentation**: [github.com/steveyegge/beads/docs](https://github.com/steveyegge/beads/tree/main/docs) -- **Quick Start Guide**: Run `bd quickstart` -- **Examples**: [github.com/steveyegge/beads/examples](https://github.com/steveyegge/beads/tree/main/examples) - ---- - -*Beads: Issue tracking that moves at the speed of thought* ⚡ diff --git a/.beads/config.yaml b/.beads/config.yaml deleted file mode 100644 index f242785..0000000 --- a/.beads/config.yaml +++ /dev/null @@ -1,62 +0,0 @@ -# Beads Configuration File -# This file configures default behavior for all bd commands in this repository -# All settings can also be set via environment variables (BD_* prefix) -# or overridden with command-line flags - -# Issue prefix for this repository (used by bd init) -# If not set, bd init will auto-detect from directory name -# Example: issue-prefix: "myproject" creates issues like "myproject-1", "myproject-2", etc. -# issue-prefix: "" - -# Use no-db mode: load from JSONL, no SQLite, write back after each command -# When true, bd will use .beads/issues.jsonl as the source of truth -# instead of SQLite database -# no-db: false - -# Disable daemon for RPC communication (forces direct database access) -# no-daemon: false - -# Disable auto-flush of database to JSONL after mutations -# no-auto-flush: false - -# Disable auto-import from JSONL when it's newer than database -# no-auto-import: false - -# Enable JSON output by default -# json: false - -# Default actor for audit trails (overridden by BD_ACTOR or --actor) -# actor: "" - -# Path to database (overridden by BEADS_DB or --db) -# db: "" - -# Auto-start daemon if not running (can also use BEADS_AUTO_START_DAEMON) -# auto-start-daemon: true - -# Debounce interval for auto-flush (can also use BEADS_FLUSH_DEBOUNCE) -# flush-debounce: "5s" - -# Git branch for beads commits (bd sync will commit to this branch) -# IMPORTANT: Set this for team projects so all clones use the same sync branch. -# This setting persists across clones (unlike database config which is gitignored). -# Can also use BEADS_SYNC_BRANCH env var for local override. -# If not set, bd sync will require you to run 'bd config set sync.branch '. -# sync-branch: "beads-sync" - -# Multi-repo configuration (experimental - bd-307) -# Allows hydrating from multiple repositories and routing writes to the correct JSONL -# repos: -# primary: "." # Primary repo (where this database lives) -# additional: # Additional repos to hydrate from (read-only) -# - ~/beads-planning # Personal planning repo -# - ~/work-planning # Work planning repo - -# Integration settings (access with 'bd config get/set') -# These are stored in the database, not in this file: -# - jira.url -# - jira.project -# - linear.url -# - linear.api-key -# - github.org -# - github.repo diff --git a/.beads/interactions.jsonl b/.beads/interactions.jsonl deleted file mode 100644 index e69de29..0000000 diff --git a/.beads/issues.jsonl b/.beads/issues.jsonl deleted file mode 100644 index e69de29..0000000 diff --git a/.beads/metadata.json b/.beads/metadata.json deleted file mode 100644 index c787975..0000000 --- a/.beads/metadata.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "database": "beads.db", - "jsonl_export": "issues.jsonl" -} \ No newline at end of file diff --git a/.gitignore b/.gitignore index 6ea6533..97e56b2 100644 --- a/.gitignore +++ b/.gitignore @@ -84,3 +84,7 @@ config.yml docs CLAUDE.md settings.local.json +/doc/ +/Meta/ + +/.quarto/ diff --git a/AGENTS.md b/AGENTS.md index df7a4af..e58b193 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -35,6 +35,6 @@ bd sync # Sync with git **CRITICAL RULES:** - Work is NOT complete until `git push` succeeds - NEVER stop before pushing - that leaves work stranded locally -- NEVER say "ready to push when you are" - YOU must push +- NEVER say "ready to push when you are" - YOU (Claude) must push - If push fails, resolve and retry until it succeeds diff --git a/R/crosswalk_data.R b/R/crosswalk_data.R index f909391..441d69c 100644 --- a/R/crosswalk_data.R +++ b/R/crosswalk_data.R @@ -4,8 +4,9 @@ #' Interpolate data using a crosswalk(s) #' #' Applies geographic crosswalk weights to transform data from a source geography -#' to a target geography. Accepts the output from `get_crosswalk()` and automatically -#' applies all crosswalk steps sequentially for multi-step transformations. +#' to a target geography. Can either accept a pre-fetched crosswalk from +#' `get_crosswalk()` or fetch the crosswalk automatically using the provided +#' geography and year parameters. #' #' @param data A data frame or tibble containing the data to crosswalk. #' @param crosswalk The output from `get_crosswalk()` - a list containing: @@ -15,9 +16,24 @@ #' \item{message}{Description of the crosswalk chain} #' } #' Alternatively, a single crosswalk tibble can be provided for backwards -#' compatibility. +#' compatibility. If NULL, the crosswalk will be fetched using `source_geography` +#' and `target_geography` parameters. +#' @param source_geography Character or NULL. Source geography name. Required if +#' `crosswalk` is NULL. One of c("block", "block group", "tract", "place", +#' "county", "urban_area", "zcta", "puma", "cd118", "cd119", +#' "core_based_statistical_area"). +#' @param target_geography Character or NULL. Target geography name. Required if +#' `crosswalk` is NULL. Same options as `source_geography`. +#' @param source_year Numeric or NULL. Year of the source geography. If NULL and +#' crosswalk is being fetched, uses same-year crosswalk via Geocorr. +#' @param target_year Numeric or NULL. Year of the target geography. If NULL and +#' crosswalk is being fetched, uses same-year crosswalk via Geocorr. +#' @param weight Character. Weighting variable for Geocorr crosswalks when fetching. +#' One of c("population", "housing", "land"). Default is "population". +#' @param cache Directory path or NULL. Where to cache fetched crosswalks. If NULL +#' (default), crosswalk is fetched but not saved to disk. #' @param geoid_column Character. The name of the column in `data` containing -#' the source geography identifiers (GEOIDs). Default is "geoid". +#' the source geography identifiers (GEOIDs). Default is "source_geoid". #' @param count_columns Character vector or NULL. Column names in `data` that represent #' count variables. These will be summed after multiplying by the allocation factor. #' If NULL (default), automatically detects columns with the prefix "count_". @@ -47,6 +63,15 @@ #' underlying crosswalk (access via `attr(result, "crosswalk_metadata")`). #' #' @details +#' **Two usage patterns**: +#' +#' 1. **Pre-fetched crosswalk**: Pass the output of `get_crosswalk()` to the +#' `crosswalk` parameter. Useful when you want to inspect or reuse the crosswalk. +#' +#' 2. **Direct crosswalking**: Pass `source_geography` and `target_geography` +#' (and optionally `source_year`, `target_year`, `weight`, `cache`) and the +#' crosswalk will be fetched automatically. Useful for one-off transformations. +#' #' **Count variables** (specified in `count_columns`) are interpolated by summing #' the product of the value and the allocation factor across all source geographies #' that overlap with each target geography. @@ -72,7 +97,7 @@ #' @export #' @examples #' \dontrun{ -#' # Single-step crosswalk +#' # Option 1: Pre-fetched crosswalk #' crosswalk <- get_crosswalk( #' source_geography = "tract", #' target_geography = "zcta", @@ -84,22 +109,34 @@ #' geoid_column = "tract_geoid", #' count_columns = c("count_population", "count_housing_units")) #' -#' # Multi-step crosswalk (geography + year change) -#' crosswalk <- get_crosswalk( +#' # Option 2: Direct crosswalking (crosswalk fetched automatically) +#' result <- crosswalk_data( +#' data = my_tract_data, #' source_geography = "tract", #' target_geography = "zcta", -#' source_year = 2010, -#' target_year = 2020, -#' weight = "population") +#' weight = "population", +#' geoid_column = "tract_geoid", +#' count_columns = c("count_population", "count_housing_units")) #' -#' # Automatically applies both steps +#' # Direct crosswalking with year change #' result <- crosswalk_data( #' data = my_data, -#' crosswalk = crosswalk, +#' source_geography = "tract", +#' target_geography = "zcta", +#' source_year = 2010, +#' target_year = 2020, +#' weight = "population", #' geoid_column = "tract_geoid", #' count_columns = "count_population") #' -#' # To get intermediate results +#' # Pre-fetched crosswalk with intermediate results +#' crosswalk <- get_crosswalk( +#' source_geography = "tract", +#' target_geography = "zcta", +#' source_year = 2010, +#' target_year = 2020, +#' weight = "population") +#' #' result <- crosswalk_data( #' data = my_data, #' crosswalk = crosswalk, @@ -114,13 +151,47 @@ crosswalk_data <- function( data, - crosswalk, - geoid_column = "geoid", + crosswalk = NULL, + source_geography = NULL, + target_geography = NULL, + source_year = NULL, + target_year = NULL, + weight = "population", + cache = NULL, + geoid_column = "source_geoid", count_columns = NULL, non_count_columns = NULL, return_intermediate = FALSE, show_join_quality = TRUE) { + # Determine if we need to fetch the crosswalk + crosswalk_provided <- !is.null(crosswalk) + geography_provided <- !is.null(source_geography) && !is.null(target_geography) + + if (!crosswalk_provided && !geography_provided) { + stop( + "Either provide a crosswalk via the 'crosswalk' parameter, or provide ", + "'source_geography' and 'target_geography' to fetch a crosswalk automatically.") + } + + if (crosswalk_provided && geography_provided) { + warning( + "Both 'crosswalk' and geography parameters provided. ", + "Using the provided 'crosswalk' and ignoring geography parameters.") + } + + # Fetch crosswalk if not provided + if (!crosswalk_provided) { + message("Fetching crosswalk from ", source_geography, " to ", target_geography, "...") + crosswalk <- get_crosswalk( + source_geography = source_geography, + target_geography = target_geography, + source_year = source_year, + target_year = target_year, + weight = weight, + cache = cache) + } + # Determine if crosswalk is a list (from get_crosswalk) or a single tibble crosswalk_list <- extract_crosswalk_list(crosswalk) @@ -236,7 +307,8 @@ extract_crosswalk_list <- function(crosswalk) { stop( "Invalid crosswalk input. Expected either:\n", " 1. Output from get_crosswalk() (a list with $crosswalks element), or\n", - " 2. A single crosswalk tibble with columns: source_geoid, target_geoid, allocation_factor_source_to_target") + " 2. A single crosswalk tibble with columns: source_geoid, target_geoid, allocation_factor_source_to_target\n", + "Alternatively, leave 'crosswalk' NULL and provide 'source_geography' and 'target_geography' to fetch automatically.") } @@ -617,8 +689,13 @@ apply_single_crosswalk <- function( # Identify "other" columns (not geoid, count, or non-count columns) # These will be aggregated by taking the first non-missing value + # Include both original crosswalk column names AND their renamed versions + # (e.g., "geography_name" which comes from "target_geography_name" after renaming) + # to prevent duplicates in multi-step crosswalks crosswalk_cols <- c("source_geoid", "target_geoid", "allocation_factor_source_to_target", - "target_geography_name", "weighting_factor", "source_year", "target_year", + "source_geography_name", "target_geography_name", + "geography_name", "geoid", + "weighting_factor", "source_year", "target_year", "population_2020", "housing_2020", "land_area_sqmi") other_cols <- setdiff( names(data), diff --git a/R/get_crosswalk.R b/R/get_crosswalk.R index 5f8b0f3..0cab106 100644 --- a/R/get_crosswalk.R +++ b/R/get_crosswalk.R @@ -203,9 +203,10 @@ get_crosswalk_single <- function( use_geocorr <- is.null(source_year) || is.null(target_year) || (!is.null(source_year) && !is.null(target_year) && isTRUE(source_year == target_year)) - # Use CTData for 2020 to 2022 (Connecticut planning region changes) + # Use CTData for 2020 <-> 2022 (Connecticut planning region changes) use_ctdata <- !is.null(source_year_chr) && !is.null(target_year_chr) && - source_year_chr == "2020" && target_year_chr == "2022" + ((source_year_chr == "2020" && target_year_chr == "2022") || + (source_year_chr == "2022" && target_year_chr == "2020")) if (use_geocorr) { crosswalk_source <- "geocorr" @@ -219,6 +220,8 @@ get_crosswalk_single <- function( if (crosswalk_source == "ctdata_2020_2022") { result <- get_crosswalk_2020_2022( geography = source_geography, + source_year = source_year, + target_year = target_year, cache = cache) } else if (crosswalk_source == "nhgis") { @@ -317,22 +320,25 @@ get_crosswalk_single <- function( } -#' Get 2020 to 2022 Crosswalk (National) +#' Get 2020 <-> 2022 Crosswalk (National) #' -#' Internal function that handles the special case of 2020 to 2022 crosswalks. +#' Internal function that handles the special case of 2020 to 2022 crosswalks +#' (and the reverse direction for identity crosswalks). #' Returns a nationally comprehensive crosswalk with Connecticut data from #' CT Data Collaborative (handling the planning region changes) and identity #' mappings for all other states (where no changes occurred). #' #' @param geography Character. Geography type: one of "block", "block_group", -#' "tract", or "county". +#' "tract", or "county" (county only for 2020 -> 2022 direction). +#' @param source_year Numeric. Year of the source geography, either 2020 or 2022. +#' @param target_year Numeric. Year of the target geography, either 2020 or 2022. #' @param cache Directory path for caching component crosswalks. #' -#' @return A tibble containing the national 2020-2022 crosswalk with Connecticut +#' @return A tibble containing the national crosswalk with Connecticut #' from CTData and identity mappings for other states. #' @keywords internal #' @noRd -get_crosswalk_2020_2022 <- function(geography, cache = NULL) { +get_crosswalk_2020_2022 <- function(geography, source_year = 2020, target_year = 2022, cache = NULL) { geography_standardized <- geography |> stringr::str_to_lower() |> @@ -348,12 +354,14 @@ get_crosswalk_2020_2022 <- function(geography, cache = NULL) { if (is.na(geography_standardized)) { stop( -"2020 to 2022 crosswalks are only available for blocks, block groups, tracts, +"2020 <-> 2022 crosswalks are only available for blocks, block groups, tracts, and counties. The provided geography '", geography, "' is not supported.")} # get_ctdata_crosswalk() now returns nationally comprehensive data result <- get_ctdata_crosswalk( geography = geography_standardized, + source_year = source_year, + target_year = target_year, cache = cache) return(result) diff --git a/R/get_ctdata_crosswalk.R b/R/get_ctdata_crosswalk.R index 2dbc355..ebbc705 100644 --- a/R/get_ctdata_crosswalk.R +++ b/R/get_ctdata_crosswalk.R @@ -15,26 +15,55 @@ #' Since no geographic changes occurred between 2020 and 2022 outside Connecticut, #' source_geoid equals target_geoid with allocation_factor = 1. #' +#' **Reverse direction (2022 -> 2020)**: For block, block_group, and tract geographies, +#' the crosswalk can be reversed by swapping source/target columns since these are +#' identity mappings (same boundaries, different FIPS codes). County 2022 -> 2020 is +#' NOT supported because Connecticut's county boundaries changed (9 planning regions +#' to 8 counties requires different allocation factors). +#' #' @param geography Character. Geography type: one of "block", "block_group", "tract", -#' or "county". +#' or "county" (county only for 2020 -> 2022 direction). +#' @param source_year Numeric. Year of the source geography, either 2020 or 2022. +#' Default is 2020. +#' @param target_year Numeric. Year of the target geography, either 2020 or 2022. +#' Default is 2022. #' @param cache Directory path. Where to download the crosswalk to. If NULL (default), #' crosswalk is returned but not saved to disk. #' -#' @return A tibble containing the national 2020-2022 crosswalk with columns: +#' @return A tibble containing the national crosswalk with columns: #' \describe{ -#' \item{source_geoid}{The 2020 FIPS code} -#' \item{target_geoid}{The 2022 FIPS code} +#' \item{source_geoid}{The source year FIPS code} +#' \item{target_geoid}{The target year FIPS code} #' \item{source_geography_name}{The geography type} #' \item{target_geography_name}{The geography type} -#' \item{source_year}{2020} -#' \item{target_year}{2022} +#' \item{source_year}{The source year (2020 or 2022)} +#' \item{target_year}{The target year (2020 or 2022)} #' \item{allocation_factor_source_to_target}{1 for all records (identity or CT FIPS change)} #' \item{weighting_factor}{"identity" for non-CT, varies for CT county} #' \item{state_fips}{Two-digit state FIPS code} #' } #' @keywords internal #' @noRd -get_ctdata_crosswalk <- function(geography, cache = NULL) { +get_ctdata_crosswalk <- function(geography, source_year = 2020, target_year = 2022, cache = NULL) { + + # Validate year parameters + source_year <- as.numeric(source_year) + target_year <- as.numeric(target_year) + + valid_combinations <- list( + c(2020, 2022), + c(2022, 2020)) + + year_combo <- c(source_year, target_year) + is_valid_combo <- any(sapply(valid_combinations, function(x) identical(x, year_combo))) + + if (!is_valid_combo) { + stop( +"CTData crosswalks are only available for 2020 <-> 2022. +The provided years (", source_year, " -> ", target_year, ") are not supported.")} + + # Determine direction + is_reverse <- source_year == 2022 && target_year == 2020 geography_standardized <- geography |> stringr::str_to_lower() |> @@ -53,6 +82,14 @@ get_ctdata_crosswalk <- function(geography, cache = NULL) { "2020-2022 crosswalks are only available for blocks, block groups, tracts, and counties. The provided geography '", geography, "' is not supported.")} + # County 2022 -> 2020 is not supported (requires different allocation factors) + if (is_reverse && geography_standardized == "county") { + stop( +"County crosswalks from 2022 to 2020 are not supported. +Connecticut's county boundaries changed (9 planning regions -> 8 counties), +requiring population-weighted disaggregation which is not implemented. +Only block, block_group, and tract geographies support the 2022 -> 2020 direction.")} + if (is.null(cache)) { cache_path <- tempdir() } else { @@ -61,7 +98,7 @@ The provided geography '", geography, "' is not supported.")} csv_path <- file.path( cache_path, - stringr::str_c("crosswalk_national_2020_to_2022_", geography_standardized, ".csv")) + stringr::str_c("crosswalk_national_", source_year, "_to_", target_year, "_", geography_standardized, ".csv")) ctdata_urls <- list( block = "https://raw.githubusercontent.com/CT-Data-Collaborative/2022-block-crosswalk/main/2022blockcrosswalk.csv", @@ -76,7 +113,7 @@ The provided geography '", geography, "' is not supported.")} # Check cache for full national crosswalk if (file.exists(csv_path) & !is.null(cache)) { - message("Reading national 2020-2022 crosswalk from cache.") + message(stringr::str_c("Reading national ", source_year, "-", target_year, " crosswalk from cache.")) result <- readr::read_csv( csv_path, col_types = readr::cols(.default = readr::col_character(), @@ -84,11 +121,7 @@ The provided geography '", geography, "' is not supported.")} show_col_types = FALSE) # Weighting note for metadata - weighting_note <- if (geography_standardized == "county") { - "CT county crosswalk uses population-weighted allocation factors from ACS 2021." - } else { - "All records have allocation_factor = 1 (identity mapping or CT FIPS code change)." - } + weighting_note <- "All records have allocation_factor = 1 (identity mapping or CT FIPS code change)." attr(result, "crosswalk_metadata") <- list( data_source = "ctdata_nhgis_combined", @@ -96,14 +129,10 @@ The provided geography '", geography, "' is not supported.")} ctdata_download_url = ctdata_download_url, ctdata_github_repository = "https://github.com/CT-Data-Collaborative", ctdata_documentation_url = "https://github.com/CT-Data-Collaborative/2022-tract-crosswalk", - nhgis_crosswalk_used = if (geography_standardized != "county") { - stringr::str_c(geography_standardized, "2010_", geography_standardized, "2020") - } else { - "N/A (county GEOIDs from tidycensus)" - }, + nhgis_crosswalk_used = stringr::str_c(geography_standardized, "2010_", geography_standardized, "2020"), nhgis_citation_url = "https://www.nhgis.org/citation-and-use-nhgis-data", - source_year = "2020", - target_year = "2022", + source_year = as.character(source_year), + target_year = as.character(target_year), source_geography = geography, source_geography_standardized = geography_standardized, target_geography = geography, @@ -303,6 +332,28 @@ The provided geography '", geography, "' is not supported.")} format(nrow(non_ct_crosswalk), big.mark = ","), " non-CT records = ", format(nrow(result), big.mark = ","), " total records.")) + # =========================================================================== + # STEP 4b: Reverse direction if needed (2022 -> 2020) + # =========================================================================== + + if (is_reverse) { + message("Reversing crosswalk direction to 2022 -> 2020...") + + # For identity crosswalks (block, block_group, tract), simply swap columns + # Note: County is not supported for reverse direction (checked earlier) + result <- result |> + dplyr::mutate( + # Swap source and target geoids + temp_geoid = source_geoid, + source_geoid = target_geoid, + target_geoid = temp_geoid, + # Update years + source_year = "2022", + target_year = "2020") |> + dplyr::select(-temp_geoid) |> + dplyr::arrange(source_geoid) + } + # =========================================================================== # STEP 5: Cache and return # =========================================================================== @@ -315,17 +366,13 @@ The provided geography '", geography, "' is not supported.")} message(stringr::str_c("Cached to: ", csv_path)) } - message( -"National 2020-2022 crosswalk constructed: + message(stringr::str_c( +"National ", source_year, "-", target_year, " crosswalk constructed: - Connecticut: CT Data Collaborative (https://github.com/CT-Data-Collaborative) -- Other states: Identity mapping derived from NHGIS 2010-2020 crosswalk") +- Other states: Identity mapping derived from NHGIS 2010-2020 crosswalk")) # Attach metadata to result - weighting_note <- if (geography_standardized == "county") { - "CT county crosswalk uses population-weighted allocation factors from ACS 2021." - } else { - "All records have allocation_factor = 1 (identity mapping or CT FIPS code change)." - } + weighting_note <- "All records have allocation_factor = 1 (identity mapping or CT FIPS code change)." attr(result, "crosswalk_metadata") <- list( data_source = "ctdata_nhgis_combined", @@ -333,14 +380,10 @@ The provided geography '", geography, "' is not supported.")} ctdata_download_url = ctdata_download_url, ctdata_github_repository = "https://github.com/CT-Data-Collaborative", ctdata_documentation_url = "https://github.com/CT-Data-Collaborative/2022-tract-crosswalk", - nhgis_crosswalk_used = if (geography_standardized != "county") { - stringr::str_c(geography_standardized, "2010_", geography_standardized, "2020") - } else { - "N/A (county GEOIDs from tidycensus)" - }, + nhgis_crosswalk_used = stringr::str_c(geography_standardized, "2010_", geography_standardized, "2020"), nhgis_citation_url = "https://www.nhgis.org/citation-and-use-nhgis-data", - source_year = "2020", - target_year = "2022", + source_year = as.character(source_year), + target_year = as.character(target_year), source_geography = geography, source_geography_standardized = geography_standardized, target_geography = geography, @@ -363,5 +406,5 @@ utils::globalVariables(c( "block_fips_2020", "block_fips_2022", "ce_fips_2022", "county_fips_2020", "county_fips_2022", "geoid_2020", "population_2020", "population_2020_total", "source_geography_name", "source_geoid", "source_year", "state_fips", - "target_geography_name", "target_geoid", "target_year", "tract_fips_2020", - "tract_fips_2022", "weighting_factor")) \ No newline at end of file + "target_geography_name", "target_geoid", "target_year", "temp_geoid", + "tract_fips_2020", "tract_fips_2022", "weighting_factor")) \ No newline at end of file diff --git a/R/get_geocorr_crosswalk.R b/R/get_geocorr_crosswalk.R index 0dbcf20..acbbe73 100644 --- a/R/get_geocorr_crosswalk.R +++ b/R/get_geocorr_crosswalk.R @@ -300,8 +300,8 @@ get_geocorr_crosswalk <- function( .fn = ~ stringr::str_replace_all(.x, c("state" = "state_fips", "stab" = "state_abbreviation"))) |> dplyr::select( dplyr::matches("state"), - source_geoid = source_geography, - target_geoid = target_geography, + source_geoid = !!rlang::sym(source_geography), + target_geoid = !!rlang::sym(target_geography), source_geography_name = !!stringr::str_c(source_geography, "_name"), target_geography_name = !!stringr::str_c(target_geography, "_name"), allocation_factor_source_to_target = afact, diff --git a/README.Rmd b/README.Rmd index 3be4410..563c2b3 100644 --- a/README.Rmd +++ b/README.Rmd @@ -8,8 +8,12 @@ knitr::opts_chunk$set( comment = "#>", fig.path = "man/figures/README-", out.width = "100%", - eval = FALSE -) + eval = TRUE, + echo = TRUE, + message = FALSE, + warning = FALSE) + +devtools::load_all() ``` # crosswalk @@ -62,13 +66,6 @@ source_data = tidycensus::get_acs( source_geoid = GEOID, count_below_poverty_level = below_poverty_levelE) -get_crosswalk( - source_geography = "block", - target_geography = "puma", - source_year = 2010, - target_year = 2020, - weight = "population") - # Get a crosswalk from ZCTAs to PUMAs (same year, uses Geocorr (2022)) zcta_puma_crosswalk <- get_crosswalk( source_geography = "zcta", @@ -77,9 +74,15 @@ zcta_puma_crosswalk <- get_crosswalk( # Apply the crosswalk to your data crosswalked_data <- crosswalk_data( - geoid_column = "source_geoid", data = source_data, crosswalk = zcta_puma_crosswalk) + +## Or in a single step +crosswalked_data = crosswalk_data( + data = source_data, + source_geography = "zcta", + target_geography = "puma22", + weight = "population") ``` What does the crosswalk(s) reflect and how was it sourced? diff --git a/README.html b/README.html deleted file mode 100644 index 575b5f1..0000000 --- a/README.html +++ /dev/null @@ -1,976 +0,0 @@ - - - - - - - - - - - - - - - - - - - -

crosswalk

-

An R interface to inter-geography and inter-temporal crosswalks.

-

Overview

-

This package provides a consistent API and standardized versions of -crosswalks to enable consistent approaches that work across different -geography and year combinations. The package also facilitates -interpolation–that is, adjusting source geography/year values by their -crosswalk weights and translating these values to the desired target -geography/year–including diagnostics of the joins between source data -and crosswalks.

-

The package sources crosswalks from:

-
    -
  • Geocorr 2022 (Missouri Census Data Center) - for -same-year crosswalks between geographies
  • -
  • IPUMS NHGIS - for inter-temporal crosswalks (across -different census years)
  • -
  • CT Data Collaborative - for Connecticut 2020→2022 -crosswalks (planning region changes)
  • -
-

Why Use crosswalk?

-
    -
  • Programmatic access: No more manual downloads from -web interfaces
  • -
  • Standardized output: Consistent column names across -all crosswalk sources
  • -
  • Metadata tracking: Full provenance stored as -attributes
  • -
  • Multi-step handling: Automatic chaining when both -geography and year change
  • -
  • Local caching: Reproducible workflows with cached -crosswalks
  • -
-

Installation

-
# Install from GitHub
-renv::install("UI-Research/crosswalk")
-

Overview

-

First we obtain a crosswalk and apply it to our data:

-
library(crosswalk)
-library(dplyr)
-library(stringr)
-library(sf)
-
-source_data = tidycensus::get_acs(
-    year = 2023,
-    geography = "zcta",
-    output = "wide",
-    variables = c(below_poverty_level = "B17001_002")) %>%
-  dplyr::select(
-    source_geoid = GEOID,
-    count_below_poverty_level = below_poverty_levelE)
-
-get_crosswalk(
-  source_geography = "block",
-  target_geography = "puma",
-  source_year = 2010,
-  target_year = 2020,
-  weight = "population")
-
-# Get a crosswalk from ZCTAs to PUMAs (same year, uses Geocorr (2022))
-zcta_puma_crosswalk <- get_crosswalk(
-  source_geography = "zcta",
-  target_geography = "puma22",
-  weight = "population")
-
-# Apply the crosswalk to your data
-crosswalked_data <- crosswalk_data(
-  geoid_column = "source_geoid",
-  data = source_data,
-  crosswalk = zcta_puma_crosswalk)
-

What does the crosswalk(s) reflect and how was it sourced?

-
attr(crosswalked_data, "crosswalk_metadata")
-

How well did the crosswalk join to our source data?

-
## look at all the characteristics of the join(s) between the source data
-## and the crosswalks
-join_quality = attr(crosswalked_data, "join_quality")
-
-## what share of records in the source data do not join to a crosswalk and
-## thus are dropped during the crosswalking process?
-join_quality$pct_data_unmatched
-
-## zctas aren't nested within states, otherwise join_quality$state_analysis_data 
-## would help us to ID whether non-joining source data were clustered within one
-## or a few states. instead we can join to spatial data to diagnose further:
-zctas_sf = tigris::zctas(year = 2023)
-states_sf = tigris::states(year = 2023, cb = TRUE)
-
-## apart from DC, which has a disproportionate number of non-joining ZCTAs--
-## seemingly corresponding to federal areas and buildings--the distribution of
-## non-joining ZCTAs appears proportionate to state-level populations and is 
-## distributed across many states:
-zctas_sf %>% 
-  dplyr::filter(GEOID20 %in% join_quality$data_geoids_unmatched) %>%
-  sf::st_intersection(states_sf %>% select(NAME)) %>%
-  sf::st_drop_geometry() %>%
-  dplyr::count(NAME, sort = TRUE)
-

And how accurate was the crosswalking process?

-
comparison_data = tidycensus::get_acs(
-    year = 2023,
-    geography = "puma",
-    output = "wide",
-    variables = c(
-      below_poverty_level = "B17001_002")) %>%
-  dplyr::select(
-    source_geoid = GEOID,
-    count_below_poverty_level_acs = below_poverty_levelE)
-
-combined_data = dplyr::left_join(
-  comparison_data,
-  crosswalked_data,
-  by = c("source_geoid" = "geoid")) 
-  
-combined_data %>%
-  dplyr::select(source_geoid, dplyr::matches("count")) %>%
-  dplyr::mutate(difference_percent = (count_below_poverty_level_acs - count_below_poverty_level) / count_below_poverty_level_acs) %>%
-  ggplot2::ggplot() +
-    ggplot2::geom_histogram(ggplot2::aes(x = difference_percent)) +
-    ggplot2::theme_minimal() +
-    ggplot2::theme(panel.grid = ggplot2::element_blank()) +
-    ggplot2::scale_x_continuous(labels = scales::percent) +
-    ggplot2::labs(
-      title = "Crosswalked data approximates observed values",
-      subtitle = "Block group-level source data would produce more accurate crosswalked values",
-      y = "",
-      x = "Percent difference between observed and crosswalked values")
-

Core Functions

-

The package has two main functions:

- - - - - - - - - - - - - - - - - -
FunctionPurpose
get_crosswalk()Fetch crosswalk(s)
crosswalk_data()Apply crosswalk(s) to interpolate data to the target -geography-year
-

Understanding -get_crosswalk() Output

-

get_crosswalk() always returns a list -structured as follows:

-
result <- get_crosswalk(
-  source_geography = "tract",
-  target_geography = "zcta",
-  source_year = 2010,
-  target_year = 2020,
-  weight = "population"
-)
-
-names(result)
-#> [1] "crosswalks" "plan" "message"
-

The list contains three elements:

- - - - - - - - - - - - - - - - - - - - - -
ElementDescription
crosswalksA named list of crosswalks (step_1, -step_2, etc.) of length one or greater
planDetails about what crosswalks are being fetched
messageA human-readable description of the crosswalk chain
-

Single-Step vs. Multi-Step -Crosswalks

-

Single-step crosswalks (same year, different -geography OR same geography, different year):

-
# Same year, different geography (Geocorr)
-result <- get_crosswalk(
-  source_geography = "tract",
-  target_geography = "zcta",
-  weight = "population"
-)
-# result$crosswalks$step_1 contains one crosswalk
-
-# Same geography, different year (NHGIS)
-result <- get_crosswalk(
-  source_geography = "tract",
-  target_geography = "tract",
-  source_year = 2010,
-  target_year = 2020
-)
-# result$crosswalks$step_1 contains one crosswalk
-

Multi-step crosswalks (different geography AND -different year):

-

When both geography and year change, no single crosswalk source -provides this directly. The package automatically plans and fetches a -two-step chain:

-
    -
  1. Step 1 (NHGIS): Change year, keep geography -constant
  2. -
  3. Step 2 (Geocorr): Change geography at target -year
  4. -
-
result <- get_crosswalk(
-  source_geography = "tract",
-  target_geography = "zcta",
-  source_year = 2010,
-  target_year = 2020,
-  weight = "population"
-)
-
-# Two crosswalks are returned
-names(result$crosswalks)
-#> [1] "step_1" "step_2"
-
-# Step 1: 2010 tracts -> 2020 tracts (NHGIS)
-# Step 2: 2020 tracts -> 2020 ZCTAs (Geocorr)
-

Crosswalk Structure

-

Each crosswalk contains standardized columns:

- - - - - - - - - - - - - - - - - - - - - - - - - -
ColumnDescription
source_geoidIdentifier for source geography
target_geoidIdentifier for target geography
allocation_factor_source_to_targetWeight for interpolating values
weighting_factorWhat attribute was used (population, housing, land)
-

Additional columns may include source_year, -target_year, population_2020, -housing_2020, and land_area_sqmi depending on -the source.

-

Accessing Metadata

-

Each crosswalk tibble has a crosswalk_metadata attribute -that documents what the crosswalk represents and how it was created:

-
metadata <- attr(result$crosswalks$step_1, "crosswalk_metadata")
-names(metadata)
-#> [1] "call_parameters" "data_source" "data_source_full_name" "download_url" ...
-

Using -crosswalk_data() to Interpolate Data

-

crosswalk_data() applies crosswalk weights to transform -your data. It automatically handles multi-step crosswalks.

-

Column Naming Convention

-

The function auto-detects columns based on prefixes:

- - - - - - - - - - - - - - - - - -
PrefixTreatment
count_Summed after weighting (for counts like population, housing -units)
mean_, median_, percent_, -ratio_Weighted mean (for rates, percentages, averages)
-

You can also specify columns explicitly via -count_columns and non_count_columns. All -non-count variables are interpolated using weighted means, weighting by -the allocation factor from the crosswalk.

-

Supported Geography -and Year Combinations

-

Inter-Geography Crosswalks -(Geocorr)

-

2022-vintage crosswalks between any of these geographies:

-
    -
  • block, block group, tract, county
  • -
  • place, zcta, puma22
  • -
  • cd118, cd119, urban_area, core_based_statistical_area
  • -
-

Inter-Temporal Crosswalks -(NHGIS)

-

NHGIS provides cross-decade crosswalks with the following -structure:

-

Source geographies: block, block_group, tract

-

Target geographies: - From blocks (decennial years -only): block, block_group, tract, county, place, zcta, puma, urban_area, -cbsa - From block_group or tract: block_group, tract, county

- - - - - - - - - - - - - - - - - - - - - -
Source YearsTarget Years
1990, 20002010, 2014, 2015, 2020, 2022
2010, 2011, 2012, 2014, 20151990, 2000, 2020, 2022
2020, 20221990, 2000, 2010, 2014, 2015
-

Notes: - Within-decade crosswalks (e.g., 2010→2014) -are not available from NHGIS - Block→ZCTA, Block→PUMA, etc. are only -available for decennial years (1990, 2000, 2010, 2020) - The package -automatically uses direct NHGIS crosswalks when available (e.g., -get_crosswalk(source_geography = "block", target_geography = "zcta", source_year = 2010, target_year = 2020) -returns a single-step NHGIS crosswalk)

-

2020→2022 Crosswalks -(CTData)

-

For 2020 to 2022 transformations, the package uses CT Data -Collaborative crosswalks for Connecticut (where planning regions -replaced counties) and identity mappings for other states (where no -changes occurred).

-

API Keys

-

NHGIS crosswalks require an IPUMS API key. Get one at https://account.ipums.org/api_keys -and add to your .Renviron:

-
usethis::edit_r_environ()
-# Add: IPUMS_API_KEY=your_key_here
-

Caching

-

Use the cache parameter to save crosswalks locally for -ease:

-
result <- get_crosswalk(
-  source_geography = "tract",
-  target_geography = "zcta",
-  weight = "population",
-  cache = here::here("crosswalks-cache"))
-

Citations

-

The intellectual credit for the underlying crosswalks belongs to the -original developers.

-

For NHGIS, see citation requirements at: https://www.nhgis.org/citation-and-use-nhgis-data

-

For Geocorr, a suggested citation:

-
-

Missouri Census Data Center, University of Missouri. (2022). Geocorr -2022: Geographic Correspondence Engine. Retrieved from: https://mcdc.missouri.edu/applications/geocorr2022.html

-
- - - diff --git a/README.md b/README.md index 7d6b80c..c419584 100644 --- a/README.md +++ b/README.md @@ -1,349 +1,505 @@ - -# crosswalk - -An R interface to inter-geography and inter-temporal crosswalks. - -## Overview - -This package provides a consistent API and standardized versions of -crosswalks to enable consistent approaches that work across different -geography and year combinations. The package also facilitates -interpolation–that is, adjusting source geography/year values by their -crosswalk weights and translating these values to the desired target -geography/year–including diagnostics of the joins between source data -and crosswalks. - -The package sources crosswalks from: - -- **Geocorr 2022** (Missouri Census Data Center) - for same-year - crosswalks between geographies -- **IPUMS NHGIS** - for inter-temporal crosswalks (across different - census years) -- **CT Data Collaborative** - for Connecticut 2020→2022 crosswalks - (planning region changes) - -## Why Use `crosswalk`? - -- **Programmatic access**: No more manual downloads from web interfaces -- **Standardized output**: Consistent column names across all crosswalk - sources -- **Metadata tracking**: Full provenance stored as attributes -- **Multi-step handling**: Automatic chaining when both geography and - year change -- **Local caching**: Reproducible workflows with cached crosswalks - -## Installation - -``` r -# Install from GitHub -renv::install("UI-Research/crosswalk") -``` - -## Overview - -First we obtain a crosswalk and apply it to our data: - -``` r -library(crosswalk) -library(dplyr) -library(stringr) -library(sf) - -source_data = tidycensus::get_acs( - year = 2023, - geography = "zcta", - output = "wide", - variables = c(below_poverty_level = "B17001_002")) %>% - dplyr::select( - source_geoid = GEOID, - count_below_poverty_level = below_poverty_levelE) - -get_crosswalk( - source_geography = "block", - target_geography = "puma", - source_year = 2010, - target_year = 2020, - weight = "population") - -# Get a crosswalk from ZCTAs to PUMAs (same year, uses Geocorr (2022)) -zcta_puma_crosswalk <- get_crosswalk( - source_geography = "zcta", - target_geography = "puma22", - weight = "population") - -# Apply the crosswalk to your data -crosswalked_data <- crosswalk_data( - geoid_column = "source_geoid", - data = source_data, - crosswalk = zcta_puma_crosswalk) -``` - -What does the crosswalk(s) reflect and how was it sourced? - -``` r -attr(crosswalked_data, "crosswalk_metadata") -``` - -How well did the crosswalk join to our source data? - -``` r -## look at all the characteristics of the join(s) between the source data -## and the crosswalks -join_quality = attr(crosswalked_data, "join_quality") - -## what share of records in the source data do not join to a crosswalk and -## thus are dropped during the crosswalking process? -join_quality$pct_data_unmatched - -## zctas aren't nested within states, otherwise join_quality$state_analysis_data -## would help us to ID whether non-joining source data were clustered within one -## or a few states. instead we can join to spatial data to diagnose further: -zctas_sf = tigris::zctas(year = 2023) -states_sf = tigris::states(year = 2023, cb = TRUE) - -## apart from DC, which has a disproportionate number of non-joining ZCTAs-- -## seemingly corresponding to federal areas and buildings--the distribution of -## non-joining ZCTAs appears proportionate to state-level populations and is -## distributed across many states: -zctas_sf %>% - dplyr::filter(GEOID20 %in% join_quality$data_geoids_unmatched) %>% - sf::st_intersection(states_sf %>% select(NAME)) %>% - sf::st_drop_geometry() %>% - dplyr::count(NAME, sort = TRUE) -``` - -And how accurate was the crosswalking process? - -``` r -comparison_data = tidycensus::get_acs( - year = 2023, - geography = "puma", - output = "wide", - variables = c( - below_poverty_level = "B17001_002")) %>% - dplyr::select( - source_geoid = GEOID, - count_below_poverty_level_acs = below_poverty_levelE) - -combined_data = dplyr::left_join( - comparison_data, - crosswalked_data, - by = c("source_geoid" = "geoid")) - -combined_data %>% - dplyr::select(source_geoid, dplyr::matches("count")) %>% - dplyr::mutate(difference_percent = (count_below_poverty_level_acs - count_below_poverty_level) / count_below_poverty_level_acs) %>% - ggplot2::ggplot() + - ggplot2::geom_histogram(ggplot2::aes(x = difference_percent)) + - ggplot2::theme_minimal() + - ggplot2::theme(panel.grid = ggplot2::element_blank()) + - ggplot2::scale_x_continuous(labels = scales::percent) + - ggplot2::labs( - title = "Crosswalked data approximates observed values", - subtitle = "Block group-level source data would produce more accurate crosswalked values", - y = "", - x = "Percent difference between observed and crosswalked values") -``` - -## Core Functions - -The package has two main functions: - -| Function | Purpose | -|--------------------|---------------------------------------------------------------------| -| `get_crosswalk()` | Fetch crosswalk(s) | -| `crosswalk_data()` | Apply crosswalk(s) to interpolate data to the target geography-year | - -## Understanding `get_crosswalk()` Output - -`get_crosswalk()` **always returns a list** structured as follows: - -``` r -result <- get_crosswalk( - source_geography = "tract", - target_geography = "zcta", - source_year = 2010, - target_year = 2020, - weight = "population" -) - -names(result) -#> [1] "crosswalks" "plan" "message" -``` - -The list contains three elements: - -| Element | Description | -|--------------|--------------------------------------------------------------------------------| -| `crosswalks` | A named list of crosswalks (`step_1`, `step_2`, etc.) of length one or greater | -| `plan` | Details about what crosswalks are being fetched | -| `message` | A human-readable description of the crosswalk chain | - -### Single-Step vs. Multi-Step Crosswalks - -**Single-step crosswalks** (same year, different geography OR same -geography, different year): - -``` r -# Same year, different geography (Geocorr) -result <- get_crosswalk( - source_geography = "tract", - target_geography = "zcta", - weight = "population" -) -# result$crosswalks$step_1 contains one crosswalk - -# Same geography, different year (NHGIS) -result <- get_crosswalk( - source_geography = "tract", - target_geography = "tract", - source_year = 2010, - target_year = 2020 -) -# result$crosswalks$step_1 contains one crosswalk -``` - -**Multi-step crosswalks** (different geography AND different year): - -When both geography and year change, no single crosswalk source provides -this directly. The package automatically plans and fetches a two-step -chain: - -1. **Step 1 (NHGIS)**: Change year, keep geography constant -2. **Step 2 (Geocorr)**: Change geography at target year - -``` r -result <- get_crosswalk( - source_geography = "tract", - target_geography = "zcta", - source_year = 2010, - target_year = 2020, - weight = "population" -) - -# Two crosswalks are returned -names(result$crosswalks) -#> [1] "step_1" "step_2" - -# Step 1: 2010 tracts -> 2020 tracts (NHGIS) -# Step 2: 2020 tracts -> 2020 ZCTAs (Geocorr) -``` - -### Crosswalk Structure - -Each crosswalk contains standardized columns: - -| Column | Description | -|--------------------------------------|-----------------------------------------------------| -| `source_geoid` | Identifier for source geography | -| `target_geoid` | Identifier for target geography | -| `allocation_factor_source_to_target` | Weight for interpolating values | -| `weighting_factor` | What attribute was used (population, housing, land) | - -Additional columns may include `source_year`, `target_year`, -`population_2020`, `housing_2020`, and `land_area_sqmi` depending on the -source. - -### Accessing Metadata - -Each crosswalk tibble has a `crosswalk_metadata` attribute that -documents what the crosswalk represents and how it was created: - -``` r -metadata <- attr(result$crosswalks$step_1, "crosswalk_metadata") -names(metadata) -#> [1] "call_parameters" "data_source" "data_source_full_name" "download_url" ... -``` - -## Using `crosswalk_data()` to Interpolate Data - -`crosswalk_data()` applies crosswalk weights to transform your data. It -automatically handles multi-step crosswalks. - -### Column Naming Convention - -The function auto-detects columns based on prefixes: - -| Prefix | Treatment | -|------------------------------------------|--------------------------------------------------------------------| -| `count_` | Summed after weighting (for counts like population, housing units) | -| `mean_`, `median_`, `percent_`, `ratio_` | Weighted mean (for rates, percentages, averages) | - -You can also specify columns explicitly via `count_columns` and -`non_count_columns`. All non-count variables are interpolated using -weighted means, weighting by the allocation factor from the crosswalk. - -## Supported Geography and Year Combinations - -### Inter-Geography Crosswalks (Geocorr) - -2022-vintage crosswalks between any of these geographies: - -- block, block group, tract, county -- place, zcta, puma22 -- cd118, cd119, urban_area, core_based_statistical_area - -### Inter-Temporal Crosswalks (NHGIS) - -NHGIS provides cross-decade crosswalks with the following structure: - -**Source geographies:** block, block_group, tract - -**Target geographies:** - From blocks (decennial years only): block, -block_group, tract, county, place, zcta, puma, urban_area, cbsa - From -block_group or tract: block_group, tract, county - -| Source Years | Target Years | -|------------------------------|------------------------------| -| 1990, 2000 | 2010, 2014, 2015, 2020, 2022 | -| 2010, 2011, 2012, 2014, 2015 | 1990, 2000, 2020, 2022 | -| 2020, 2022 | 1990, 2000, 2010, 2014, 2015 | - -**Notes:** - Within-decade crosswalks (e.g., 2010→2014) are not -available from NHGIS - Block→ZCTA, Block→PUMA, etc. are only available -for decennial years (1990, 2000, 2010, 2020) - The package automatically -uses direct NHGIS crosswalks when available (e.g., -`get_crosswalk(source_geography = "block", target_geography = "zcta", source_year = 2010, target_year = 2020)` -returns a single-step NHGIS crosswalk) - -### 2020→2022 Crosswalks (CTData) - -For 2020 to 2022 transformations, the package uses CT Data Collaborative -crosswalks for Connecticut (where planning regions replaced counties) -and identity mappings for other states (where no changes occurred). - -## API Keys - -NHGIS crosswalks require an IPUMS API key. Get one at - and add to your `.Renviron`: - -``` r -usethis::edit_r_environ() -# Add: IPUMS_API_KEY=your_key_here -``` - -## Caching - -Use the `cache` parameter to save crosswalks locally for ease: - -``` r -result <- get_crosswalk( - source_geography = "tract", - target_geography = "zcta", - weight = "population", - cache = here::here("crosswalks-cache")) -``` - -## Citations - -The intellectual credit for the underlying crosswalks belongs to the -original developers. - -**For NHGIS**, see citation requirements at: - - -**For Geocorr**, a suggested citation: - -> Missouri Census Data Center, University of Missouri. (2022). Geocorr -> 2022: Geographic Correspondence Engine. Retrieved from: -> + +# crosswalk + +An R interface to inter-geography and inter-temporal crosswalks. + +## Overview + +This package provides a consistent API and standardized versions of +crosswalks to enable consistent approaches that work across different +geography and year combinations. The package also facilitates +interpolation–that is, adjusting source geography/year values by their +crosswalk weights and translating these values to the desired target +geography/year–including diagnostics of the joins between source data +and crosswalks. + +The package sources crosswalks from: + +- **Geocorr 2022** (Missouri Census Data Center) - for same-year + crosswalks between geographies +- **IPUMS NHGIS** - for inter-temporal crosswalks (across different + census years) +- **CT Data Collaborative** - for Connecticut 2020→2022 crosswalks + (planning region changes) + +## Why Use `crosswalk`? + +- **Programmatic access**: No more manual downloads from web interfaces +- **Standardized output**: Consistent column names across all crosswalk + sources +- **Metadata tracking**: Full provenance stored as attributes +- **Multi-step handling**: Automatic chaining when both geography and + year change +- **Local caching**: Reproducible workflows with cached crosswalks + +## Installation + +``` r +# Install from GitHub +renv::install("UI-Research/crosswalk") +#> The following package(s) will be installed: +#> - crosswalk [UI-Research/crosswalk] +#> These packages will be installed into "C:/Users/wcurrangroome/AppData/Local/Temp/RtmpSkgo68/temp_libpathd7e02418a38". +#> +#> # Installing packages -------------------------------------------------------- +#> - Installing crosswalk 0.0.0.9001 ... OK [copied from cache in 0.24s] +#> Successfully installed 1 package in 0.26 seconds. +``` + +## Overview + +First we obtain a crosswalk and apply it to our data: + +``` r +library(crosswalk) +library(dplyr) +library(stringr) +library(sf) + +source_data = tidycensus::get_acs( + year = 2023, + geography = "zcta", + output = "wide", + variables = c(below_poverty_level = "B17001_002")) %>% + dplyr::select( + source_geoid = GEOID, + count_below_poverty_level = below_poverty_levelE) + +# Get a crosswalk from ZCTAs to PUMAs (same year, uses Geocorr (2022)) +zcta_puma_crosswalk <- get_crosswalk( + source_geography = "zcta", + target_geography = "puma22", + weight = "population") + +# Apply the crosswalk to your data +crosswalked_data <- crosswalk_data( + data = source_data, + crosswalk = zcta_puma_crosswalk) + +## Or in a single step +crosswalked_data = crosswalk_data( + data = source_data, + source_geography = "zcta", + target_geography = "puma22", + weight = "population") +``` + +What does the crosswalk(s) reflect and how was it sourced? + +``` r +attr(crosswalked_data, "crosswalk_metadata") +#> $call_parameters +#> $call_parameters$source_geography +#> [1] "zcta" +#> +#> $call_parameters$target_geography +#> [1] "puma22" +#> +#> $call_parameters$source_year +#> NULL +#> +#> $call_parameters$target_year +#> NULL +#> +#> $call_parameters$weight +#> [1] "population" +#> +#> $call_parameters$cache +#> NULL +#> +#> +#> $data_source +#> [1] "geocorr" +#> +#> $data_source_full_name +#> [1] "Geocorr 2022 (Missouri Census Data Center)" +#> +#> $download_url +#> NULL +#> +#> $api_endpoint +#> [1] "https://mcdc.missouri.edu/cgi-bin/broker" +#> +#> $documentation_url +#> [1] "https://mcdc.missouri.edu/applications/geocorr2022.html" +#> +#> $citation_url +#> NULL +#> +#> $github_repository +#> NULL +#> +#> $source_geography +#> [1] "zcta" +#> +#> $source_geography_standardized +#> NULL +#> +#> $target_geography +#> [1] "puma22" +#> +#> $target_geography_standardized +#> NULL +#> +#> $source_year +#> NULL +#> +#> $target_year +#> NULL +#> +#> $reference_year +#> [1] "2022" +#> +#> $weighting_variable +#> [1] "population" +#> +#> $state_coverage +#> NULL +#> +#> $notes +#> NULL +#> +#> $retrieved_at +#> [1] "2026-02-01 00:09:21 EST" +#> +#> $cached +#> [1] FALSE +#> +#> $cache_path +#> NULL +#> +#> $read_from_cache +#> NULL +#> +#> $is_multi_step +#> [1] FALSE +#> +#> $crosswalk_package_version +#> [1] "0.0.0.9001" +``` + +How well did the crosswalk join to our source data? + +``` r +## look at all the characteristics of the join(s) between the source data +## and the crosswalks +join_quality = attr(crosswalked_data, "join_quality") + +## what share of records in the source data do not join to a crosswalk and +## thus are dropped during the crosswalking process? +join_quality$pct_data_unmatched +#> [1] 0.4234277 + +## zctas aren't nested within states, otherwise join_quality$state_analysis_data +## would help us to ID whether non-joining source data were clustered within one +## or a few states. instead we can join to spatial data to diagnose further: +zctas_sf = tigris::zctas(year = 2023) +#> | | | 0% | | | 1% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |== | 4% | |=== | 4% | |=== | 5% | |==== | 5% | |==== | 6% | |===== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 9% | |======= | 10% | |======= | 11% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========= | 14% | |========== | 14% | |========== | 15% | |=========== | 15% | |=========== | 16% | |============ | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 19% | |============== | 20% | |============== | 21% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================ | 24% | |================= | 24% | |================= | 25% | |================== | 25% | |================== | 26% | |=================== | 26% | |=================== | 27% | |=================== | 28% | |==================== | 28% | |==================== | 29% | |===================== | 29% | |===================== | 30% | |===================== | 31% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================= | 34% | |======================== | 34% | |======================== | 35% | |========================= | 35% | |========================= | 36% | |========================== | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 39% | |============================ | 40% | |============================ | 41% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |============================== | 44% | |=============================== | 44% | |=============================== | 45% | |================================ | 45% | |================================ | 46% | |================================= | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 49% | |=================================== | 50% | |=================================== | 51% | |==================================== | 51% | |==================================== | 52% | |===================================== | 52% | |===================================== | 53% | |===================================== | 54% | |====================================== | 54% | |====================================== | 55% | |======================================= | 55% | |======================================= | 56% | |======================================== | 56% | |======================================== | 57% | |======================================== | 58% | |========================================= | 58% | |========================================= | 59% | |========================================== | 59% | |========================================== | 60% | |========================================== | 61% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================ | 64% | |============================================= | 64% | |============================================= | 65% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 68% | |================================================ | 69% | |================================================= | 69% | |================================================= | 70% | |================================================= | 71% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 72% | |=================================================== | 73% | |=================================================== | 74% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 75% | |===================================================== | 76% | |====================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 79% | |======================================================== | 80% | |======================================================== | 81% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 82% | |========================================================== | 83% | |========================================================== | 84% | |=========================================================== | 84% | |=========================================================== | 85% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 89% | |=============================================================== | 90% | |=============================================================== | 91% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 92% | |================================================================= | 93% | |================================================================= | 94% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 99% | |======================================================================| 100% +states_sf = tigris::states(year = 2023, cb = TRUE) +#> | | | 0% | |= | 1% | |= | 2% | |== | 3% | |== | 4% | |=== | 4% | |==== | 5% | |==== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 10% | |======= | 11% | |======== | 11% | |======== | 12% | |========= | 13% | |========== | 14% | |========== | 15% | |=========== | 15% | |=========== | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 20% | |============== | 21% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================= | 24% | |================= | 25% | |================== | 25% | |================== | 26% | |=================== | 27% | |=================== | 28% | |==================== | 28% | |==================== | 29% | |===================== | 30% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================== | 34% | |======================== | 35% | |========================= | 35% | |========================= | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 39% | |============================ | 40% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |=============================== | 44% | |=============================== | 45% | |================================ | 45% | |================================ | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 49% | |=================================== | 50% | |==================================== | 51% | |==================================== | 52% | |===================================== | 52% | |===================================== | 53% | |====================================== | 54% | |====================================== | 55% | |======================================= | 55% | |======================================= | 56% | |======================================== | 57% | |========================================= | 58% | |========================================= | 59% | |========================================== | 59% | |========================================== | 60% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================= | 64% | |============================================= | 65% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 66% | |=============================================== | 67% | |================================================ | 68% | |================================================ | 69% | |================================================= | 70% | |================================================= | 71% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 73% | |=================================================== | 74% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 75% | |===================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 80% | |======================================================== | 81% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 83% | |=========================================================== | 84% | |=========================================================== | 85% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 90% | |=============================================================== | 91% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 92% | |================================================================= | 93% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 100% + +## apart from DC, which has a disproportionate number of non-joining ZCTAs-- +## seemingly corresponding to federal areas and buildings--the distribution of +## non-joining ZCTAs appears proportionate to state-level populations and is +## distributed across many states: +zctas_sf %>% + dplyr::filter(GEOID20 %in% join_quality$data_geoids_unmatched) %>% + sf::st_intersection(states_sf %>% select(NAME)) %>% + sf::st_drop_geometry() %>% + dplyr::count(NAME, sort = TRUE) +#> NAME n +#> 1 District of Columbia 19 +#> 2 New York 15 +#> 3 Texas 9 +#> 4 California 8 +#> 5 Colorado 6 +#> 6 Utah 6 +#> 7 Florida 5 +#> 8 Pennsylvania 5 +#> 9 Tennessee 5 +#> 10 Virginia 5 +#> 11 Alabama 4 +#> 12 Arizona 4 +#> 13 Kentucky 4 +#> 14 Maryland 4 +#> 15 Ohio 4 +#> 16 Washington 4 +#> 17 Georgia 3 +#> 18 Louisiana 3 +#> 19 Michigan 3 +#> 20 North Carolina 3 +#> 21 Alaska 2 +#> 22 Massachusetts 2 +#> 23 Mississippi 2 +#> 24 North Dakota 2 +#> 25 Arkansas 1 +#> 26 Hawaii 1 +#> 27 Idaho 1 +#> 28 Illinois 1 +#> 29 Indiana 1 +#> 30 Iowa 1 +#> 31 Kansas 1 +#> 32 Maine 1 +#> 33 Minnesota 1 +#> 34 Missouri 1 +#> 35 Montana 1 +#> 36 Nebraska 1 +#> 37 Nevada 1 +#> 38 New Mexico 1 +#> 39 Oregon 1 +#> 40 South Carolina 1 +#> 41 Vermont 1 +#> 42 Wisconsin 1 +``` + +And how accurate was the crosswalking process? + +``` r +comparison_data = tidycensus::get_acs( + year = 2023, + geography = "puma", + output = "wide", + variables = c( + below_poverty_level = "B17001_002")) %>% + dplyr::select( + source_geoid = GEOID, + count_below_poverty_level_acs = below_poverty_levelE) + +combined_data = dplyr::left_join( + comparison_data, + crosswalked_data, + by = c("source_geoid" = "geoid")) + +combined_data %>% + dplyr::select(source_geoid, dplyr::matches("count")) %>% + dplyr::mutate(difference_percent = (count_below_poverty_level_acs - count_below_poverty_level) / count_below_poverty_level_acs) %>% + ggplot2::ggplot() + + ggplot2::geom_histogram(ggplot2::aes(x = difference_percent)) + + ggplot2::theme_minimal() + + ggplot2::theme(panel.grid = ggplot2::element_blank()) + + ggplot2::scale_x_continuous(labels = scales::percent) + + ggplot2::labs( + title = "Crosswalked data approximates observed values", + subtitle = "Block group-level source data would produce more accurate crosswalked values", + y = "", + x = "Percent difference between observed and crosswalked values") +``` + + + +## Core Functions + +The package has two main functions: + +| Function | Purpose | +|----|----| +| `get_crosswalk()` | Fetch crosswalk(s) | +| `crosswalk_data()` | Apply crosswalk(s) to interpolate data to the target geography-year | + +## Understanding `get_crosswalk()` Output + +`get_crosswalk()` **always returns a list** structured as follows: + +``` r +result <- get_crosswalk( + source_geography = "tract", + target_geography = "zcta", + source_year = 2010, + target_year = 2020, + weight = "population" +) + +names(result) +#> [1] "crosswalks" "plan" "message" +#> [1] "crosswalks" "plan" "message" +``` + +The list contains three elements: + +| Element | Description | +|----|----| +| `crosswalks` | A named list of crosswalks (`step_1`, `step_2`, etc.) of length one or greater | +| `plan` | Details about what crosswalks are being fetched | +| `message` | A human-readable description of the crosswalk chain | + +### Single-Step vs. Multi-Step Crosswalks + +**Single-step crosswalks** (same year, different geography OR same +geography, different year): + +``` r +# Same year, different geography (Geocorr) +result <- get_crosswalk( + source_geography = "tract", + target_geography = "zcta", + weight = "population" +) +# result$crosswalks$step_1 contains one crosswalk + +# Same geography, different year (NHGIS) +result <- get_crosswalk( + source_geography = "tract", + target_geography = "tract", + source_year = 2010, + target_year = 2020 +) +# result$crosswalks$step_1 contains one crosswalk +``` + +**Multi-step crosswalks** (different geography AND different year): + +When both geography and year change, no single crosswalk source provides +this directly. The package automatically plans and fetches a two-step +chain: + +1. **Step 1 (NHGIS)**: Change year, keep geography constant +2. **Step 2 (Geocorr)**: Change geography at target year + +``` r +result <- get_crosswalk( + source_geography = "tract", + target_geography = "zcta", + source_year = 2010, + target_year = 2020, + weight = "population" +) + +# Two crosswalks are returned +names(result$crosswalks) +#> [1] "step_1" "step_2" +#> [1] "step_1" "step_2" + +# Step 1: 2010 tracts -> 2020 tracts (NHGIS) +# Step 2: 2020 tracts -> 2020 ZCTAs (Geocorr) +``` + +### Crosswalk Structure + +Each crosswalk contains standardized columns: + +| Column | Description | +|----|----| +| `source_geoid` | Identifier for source geography | +| `target_geoid` | Identifier for target geography | +| `allocation_factor_source_to_target` | Weight for interpolating values | +| `weighting_factor` | What attribute was used (population, housing, land) | + +Additional columns may include `source_year`, `target_year`, +`population_2020`, `housing_2020`, and `land_area_sqmi` depending on the +source. + +### Accessing Metadata + +Each crosswalk tibble has a `crosswalk_metadata` attribute that +documents what the crosswalk represents and how it was created: + +``` r +metadata <- attr(result$crosswalks$step_1, "crosswalk_metadata") +names(metadata) +#> [1] "call_parameters" "data_source" +#> [3] "data_source_full_name" "download_url" +#> [5] "api_endpoint" "documentation_url" +#> [7] "citation_url" "github_repository" +#> [9] "source_geography" "source_geography_standardized" +#> [11] "target_geography" "target_geography_standardized" +#> [13] "source_year" "target_year" +#> [15] "reference_year" "weighting_variable" +#> [17] "state_coverage" "notes" +#> [19] "retrieved_at" "cached" +#> [21] "cache_path" "read_from_cache" +#> [23] "is_multi_step" "crosswalk_package_version" +#> [1] "call_parameters" "data_source" "data_source_full_name" "download_url" ... +``` + +## Using `crosswalk_data()` to Interpolate Data + +`crosswalk_data()` applies crosswalk weights to transform your data. It +automatically handles multi-step crosswalks. + +### Column Naming Convention + +The function auto-detects columns based on prefixes: + +| Prefix | Treatment | +|----|----| +| `count_` | Summed after weighting (for counts like population, housing units) | +| `mean_`, `median_`, `percent_`, `ratio_` | Weighted mean (for rates, percentages, averages) | + +You can also specify columns explicitly via `count_columns` and +`non_count_columns`. All non-count variables are interpolated using +weighted means, weighting by the allocation factor from the crosswalk. + +## Supported Geography and Year Combinations + +### Inter-Geography Crosswalks (Geocorr) + +2022-vintage crosswalks between any of these geographies: + +- block, block group, tract, county +- place, zcta, puma22 +- cd118, cd119, urban_area, core_based_statistical_area + +### Inter-Temporal Crosswalks (NHGIS) + +NHGIS provides cross-decade crosswalks with the following structure: + +**Source geographies:** block, block_group, tract + +**Target geographies:** - From blocks (decennial years only): block, +block_group, tract, county, place, zcta, puma, urban_area, cbsa - From +block_group or tract: block_group, tract, county + +| Source Years | Target Years | +|------------------------------|------------------------------| +| 1990, 2000 | 2010, 2014, 2015, 2020, 2022 | +| 2010, 2011, 2012, 2014, 2015 | 1990, 2000, 2020, 2022 | +| 2020, 2022 | 1990, 2000, 2010, 2014, 2015 | + +**Notes:** - Within-decade crosswalks (e.g., 2010→2014) are not +available from NHGIS - Block→ZCTA, Block→PUMA, etc. are only available +for decennial years (1990, 2000, 2010, 2020) - The package automatically +uses direct NHGIS crosswalks when available (e.g., +`get_crosswalk(source_geography = "block", target_geography = "zcta", source_year = 2010, target_year = 2020)` +returns a single-step NHGIS crosswalk) + +### 2020→2022 Crosswalks (CTData) + +For 2020 to 2022 transformations, the package uses CT Data Collaborative +crosswalks for Connecticut (where planning regions replaced counties) +and identity mappings for other states (where no changes occurred). + +## API Keys + +NHGIS crosswalks require an IPUMS API key. Get one at + and add to your `.Renviron`: + +``` r +usethis::edit_r_environ() +# Add: IPUMS_API_KEY=your_key_here +``` + +## Caching + +Use the `cache` parameter to save crosswalks locally for ease: + +``` r +result <- get_crosswalk( + source_geography = "tract", + target_geography = "zcta", + weight = "population", + cache = here::here("crosswalks-cache")) +``` + +## Citations + +The intellectual credit for the underlying crosswalks belongs to the +original developers. + +**For NHGIS**, see citation requirements at: + + +**For Geocorr**, a suggested citation: + +> Missouri Census Data Center, University of Missouri. (2022). Geocorr +> 2022: Geographic Correspondence Engine. Retrieved from: +> diff --git a/man/crosswalk_data.Rd b/man/crosswalk_data.Rd index d6d0e62..cd65ea0 100644 --- a/man/crosswalk_data.Rd +++ b/man/crosswalk_data.Rd @@ -6,8 +6,14 @@ \usage{ crosswalk_data( data, - crosswalk, - geoid_column = "geoid", + crosswalk = NULL, + source_geography = NULL, + target_geography = NULL, + source_year = NULL, + target_year = NULL, + weight = "population", + cache = NULL, + geoid_column = "source_geoid", count_columns = NULL, non_count_columns = NULL, return_intermediate = FALSE, @@ -24,10 +30,31 @@ crosswalk_data( \item{message}{Description of the crosswalk chain} } Alternatively, a single crosswalk tibble can be provided for backwards -compatibility.} +compatibility. If NULL, the crosswalk will be fetched using \code{source_geography} +and \code{target_geography} parameters.} + +\item{source_geography}{Character or NULL. Source geography name. Required if +\code{crosswalk} is NULL. One of c("block", "block group", "tract", "place", +"county", "urban_area", "zcta", "puma", "cd118", "cd119", +"core_based_statistical_area").} + +\item{target_geography}{Character or NULL. Target geography name. Required if +\code{crosswalk} is NULL. Same options as \code{source_geography}.} + +\item{source_year}{Numeric or NULL. Year of the source geography. If NULL and +crosswalk is being fetched, uses same-year crosswalk via Geocorr.} + +\item{target_year}{Numeric or NULL. Year of the target geography. If NULL and +crosswalk is being fetched, uses same-year crosswalk via Geocorr.} + +\item{weight}{Character. Weighting variable for Geocorr crosswalks when fetching. +One of c("population", "housing", "land"). Default is "population".} + +\item{cache}{Directory path or NULL. Where to cache fetched crosswalks. If NULL +(default), crosswalk is fetched but not saved to disk.} \item{geoid_column}{Character. The name of the column in \code{data} containing -the source geography identifiers (GEOIDs). Default is "geoid".} +the source geography identifiers (GEOIDs). Default is "source_geoid".} \item{count_columns}{Character vector or NULL. Column names in \code{data} that represent count variables. These will be summed after multiplying by the allocation factor. @@ -63,10 +90,20 @@ underlying crosswalk (access via \code{attr(result, "crosswalk_metadata")}). } \description{ Applies geographic crosswalk weights to transform data from a source geography -to a target geography. Accepts the output from \code{get_crosswalk()} and automatically -applies all crosswalk steps sequentially for multi-step transformations. +to a target geography. Can either accept a pre-fetched crosswalk from +\code{get_crosswalk()} or fetch the crosswalk automatically using the provided +geography and year parameters. } \details{ +\strong{Two usage patterns}: +\enumerate{ +\item \strong{Pre-fetched crosswalk}: Pass the output of \code{get_crosswalk()} to the +\code{crosswalk} parameter. Useful when you want to inspect or reuse the crosswalk. +\item \strong{Direct crosswalking}: Pass \code{source_geography} and \code{target_geography} +(and optionally \code{source_year}, \code{target_year}, \code{weight}, \code{cache}) and the +crosswalk will be fetched automatically. Useful for one-off transformations. +} + \strong{Count variables} (specified in \code{count_columns}) are interpolated by summing the product of the value and the allocation factor across all source geographies that overlap with each target geography. @@ -93,7 +130,7 @@ automatically applies them in sequence. } \examples{ \dontrun{ -# Single-step crosswalk +# Option 1: Pre-fetched crosswalk crosswalk <- get_crosswalk( source_geography = "tract", target_geography = "zcta", @@ -105,22 +142,34 @@ result <- crosswalk_data( geoid_column = "tract_geoid", count_columns = c("count_population", "count_housing_units")) -# Multi-step crosswalk (geography + year change) -crosswalk <- get_crosswalk( +# Option 2: Direct crosswalking (crosswalk fetched automatically) +result <- crosswalk_data( + data = my_tract_data, source_geography = "tract", target_geography = "zcta", - source_year = 2010, - target_year = 2020, - weight = "population") + weight = "population", + geoid_column = "tract_geoid", + count_columns = c("count_population", "count_housing_units")) -# Automatically applies both steps +# Direct crosswalking with year change result <- crosswalk_data( data = my_data, - crosswalk = crosswalk, + source_geography = "tract", + target_geography = "zcta", + source_year = 2010, + target_year = 2020, + weight = "population", geoid_column = "tract_geoid", count_columns = "count_population") -# To get intermediate results +# Pre-fetched crosswalk with intermediate results +crosswalk <- get_crosswalk( + source_geography = "tract", + target_geography = "zcta", + source_year = 2010, + target_year = 2020, + weight = "population") + result <- crosswalk_data( data = my_data, crosswalk = crosswalk, diff --git a/man/figures/README-unnamed-chunk-6-1.png b/man/figures/README-unnamed-chunk-6-1.png new file mode 100644 index 0000000..c9ec9ef Binary files /dev/null and b/man/figures/README-unnamed-chunk-6-1.png differ diff --git a/renv.lock b/renv.lock index 45cd8e6..ce6a285 100644 --- a/renv.lock +++ b/renv.lock @@ -207,7 +207,7 @@ }, "cpp11": { "Package": "cpp11", - "Version": "0.5.2", + "Version": "0.5.3", "Source": "Repository", "Title": "A C++11 Interface for R's C Interface", "Authors@R": "c( person(\"Davis\", \"Vaughan\", email = \"davis@posit.co\", role = c(\"aut\", \"cre\"), comment = c(ORCID = \"0000-0003-4777-038X\")), person(\"Jim\",\"Hester\", role = \"aut\", comment = c(ORCID = \"0000-0002-2739-7082\")), person(\"Romain\", \"François\", role = \"aut\", comment = c(ORCID = \"0000-0002-2444-4226\")), person(\"Benjamin\", \"Kietzman\", role = \"ctb\"), person(\"Posit Software, PBC\", role = c(\"cph\", \"fnd\")) )", @@ -248,7 +248,7 @@ "Encoding": "UTF-8", "RoxygenNote": "7.3.2", "NeedsCompilation": "no", - "Author": "Davis Vaughan [aut, cre] (), Jim Hester [aut] (), Romain François [aut] (), Benjamin Kietzman [ctb], Posit Software, PBC [cph, fnd]", + "Author": "Davis Vaughan [aut, cre] (ORCID: ), Jim Hester [aut] (ORCID: ), Romain François [aut] (ORCID: ), Benjamin Kietzman [ctb], Posit Software, PBC [cph, fnd]", "Maintainer": "Davis Vaughan ", "Repository": "CRAN" }, @@ -284,7 +284,7 @@ }, "curl": { "Package": "curl", - "Version": "6.4.0", + "Version": "7.0.0", "Source": "Repository", "Type": "Package", "Title": "A Modern and Flexible Web Client for R", @@ -308,7 +308,7 @@ "Depends": [ "R (>= 3.0.0)" ], - "RoxygenNote": "7.3.2.9000", + "RoxygenNote": "7.3.2", "Encoding": "UTF-8", "Language": "en-US", "NeedsCompilation": "yes", @@ -318,11 +318,11 @@ }, "data.table": { "Package": "data.table", - "Version": "1.17.8", + "Version": "1.18.2.1", "Source": "Repository", "Title": "Extension of `data.frame`", "Depends": [ - "R (>= 3.3.0)" + "R (>= 3.4.0)" ], "Imports": [ "methods" @@ -330,7 +330,7 @@ "Suggests": [ "bit64 (>= 4.0.0)", "bit (>= 4.0.4)", - "R.utils", + "R.utils (>= 2.13.0)", "xts", "zoo (>= 1.8-1)", "yaml", @@ -344,9 +344,9 @@ "VignetteBuilder": "knitr", "Encoding": "UTF-8", "ByteCompile": "TRUE", - "Authors@R": "c( person(\"Tyson\",\"Barrett\", role=c(\"aut\",\"cre\"), email=\"t.barrett88@gmail.com\", comment = c(ORCID=\"0000-0002-2137-1391\")), person(\"Matt\",\"Dowle\", role=\"aut\", email=\"mattjdowle@gmail.com\"), person(\"Arun\",\"Srinivasan\", role=\"aut\", email=\"asrini@pm.me\"), person(\"Jan\",\"Gorecki\", role=\"aut\"), person(\"Michael\",\"Chirico\", role=\"aut\", comment = c(ORCID=\"0000-0003-0787-087X\")), person(\"Toby\",\"Hocking\", role=\"aut\", comment = c(ORCID=\"0000-0002-3146-0865\")), person(\"Benjamin\",\"Schwendinger\",role=\"aut\", comment = c(ORCID=\"0000-0003-3315-8114\")), person(\"Ivan\", \"Krylov\", role=\"aut\", email=\"ikrylov@disroot.org\", comment = c(ORCID=\"0000-0002-0172-3812\")), person(\"Pasha\",\"Stetsenko\", role=\"ctb\"), person(\"Tom\",\"Short\", role=\"ctb\"), person(\"Steve\",\"Lianoglou\", role=\"ctb\"), person(\"Eduard\",\"Antonyan\", role=\"ctb\"), person(\"Markus\",\"Bonsch\", role=\"ctb\"), person(\"Hugh\",\"Parsonage\", role=\"ctb\"), person(\"Scott\",\"Ritchie\", role=\"ctb\"), person(\"Kun\",\"Ren\", role=\"ctb\"), person(\"Xianying\",\"Tan\", role=\"ctb\"), person(\"Rick\",\"Saporta\", role=\"ctb\"), person(\"Otto\",\"Seiskari\", role=\"ctb\"), person(\"Xianghui\",\"Dong\", role=\"ctb\"), person(\"Michel\",\"Lang\", role=\"ctb\"), person(\"Watal\",\"Iwasaki\", role=\"ctb\"), person(\"Seth\",\"Wenchel\", role=\"ctb\"), person(\"Karl\",\"Broman\", role=\"ctb\"), person(\"Tobias\",\"Schmidt\", role=\"ctb\"), person(\"David\",\"Arenburg\", role=\"ctb\"), person(\"Ethan\",\"Smith\", role=\"ctb\"), person(\"Francois\",\"Cocquemas\", role=\"ctb\"), person(\"Matthieu\",\"Gomez\", role=\"ctb\"), person(\"Philippe\",\"Chataignon\", role=\"ctb\"), person(\"Nello\",\"Blaser\", role=\"ctb\"), person(\"Dmitry\",\"Selivanov\", role=\"ctb\"), person(\"Andrey\",\"Riabushenko\", role=\"ctb\"), person(\"Cheng\",\"Lee\", role=\"ctb\"), person(\"Declan\",\"Groves\", role=\"ctb\"), person(\"Daniel\",\"Possenriede\", role=\"ctb\"), person(\"Felipe\",\"Parages\", role=\"ctb\"), person(\"Denes\",\"Toth\", role=\"ctb\"), person(\"Mus\",\"Yaramaz-David\", role=\"ctb\"), person(\"Ayappan\",\"Perumal\", role=\"ctb\"), person(\"James\",\"Sams\", role=\"ctb\"), person(\"Martin\",\"Morgan\", role=\"ctb\"), person(\"Michael\",\"Quinn\", role=\"ctb\"), person(\"@javrucebo\",\"\", role=\"ctb\"), person(\"@marc-outins\",\"\", role=\"ctb\"), person(\"Roy\",\"Storey\", role=\"ctb\"), person(\"Manish\",\"Saraswat\", role=\"ctb\"), person(\"Morgan\",\"Jacob\", role=\"ctb\"), person(\"Michael\",\"Schubmehl\", role=\"ctb\"), person(\"Davis\",\"Vaughan\", role=\"ctb\"), person(\"Leonardo\",\"Silvestri\", role=\"ctb\"), person(\"Jim\",\"Hester\", role=\"ctb\"), person(\"Anthony\",\"Damico\", role=\"ctb\"), person(\"Sebastian\",\"Freundt\", role=\"ctb\"), person(\"David\",\"Simons\", role=\"ctb\"), person(\"Elliott\",\"Sales de Andrade\", role=\"ctb\"), person(\"Cole\",\"Miller\", role=\"ctb\"), person(\"Jens Peder\",\"Meldgaard\", role=\"ctb\"), person(\"Vaclav\",\"Tlapak\", role=\"ctb\"), person(\"Kevin\",\"Ushey\", role=\"ctb\"), person(\"Dirk\",\"Eddelbuettel\", role=\"ctb\"), person(\"Tony\",\"Fischetti\", role=\"ctb\"), person(\"Ofek\",\"Shilon\", role=\"ctb\"), person(\"Vadim\",\"Khotilovich\", role=\"ctb\"), person(\"Hadley\",\"Wickham\", role=\"ctb\"), person(\"Bennet\",\"Becker\", role=\"ctb\"), person(\"Kyle\",\"Haynes\", role=\"ctb\"), person(\"Boniface Christian\",\"Kamgang\", role=\"ctb\"), person(\"Olivier\",\"Delmarcell\", role=\"ctb\"), person(\"Josh\",\"O'Brien\", role=\"ctb\"), person(\"Dereck\",\"de Mezquita\", role=\"ctb\"), person(\"Michael\",\"Czekanski\", role=\"ctb\"), person(\"Dmitry\", \"Shemetov\", role=\"ctb\"), person(\"Nitish\", \"Jha\", role=\"ctb\"), person(\"Joshua\", \"Wu\", role=\"ctb\"), person(\"Iago\", \"Giné-Vázquez\", role=\"ctb\"), person(\"Anirban\", \"Chetia\", role=\"ctb\"), person(\"Doris\", \"Amoakohene\", role=\"ctb\"), person(\"Angel\", \"Feliz\", role=\"ctb\"), person(\"Michael\",\"Young\", role=\"ctb\"), person(\"Mark\", \"Seeto\", role=\"ctb\"), person(\"Philippe\", \"Grosjean\", role=\"ctb\"), person(\"Vincent\", \"Runge\", role=\"ctb\"), person(\"Christian\", \"Wia\", role=\"ctb\"), person(\"Elise\", \"Maigné\", role=\"ctb\"), person(\"Vincent\", \"Rocher\", role=\"ctb\"), person(\"Vijay\", \"Lulla\", role=\"ctb\"), person(\"Aljaž\", \"Sluga\", role=\"ctb\"), person(\"Bill\", \"Evans\", role=\"ctb\") )", + "Authors@R": "c( person(\"Tyson\",\"Barrett\", role=c(\"aut\",\"cre\"), email=\"t.barrett88@gmail.com\", comment = c(ORCID=\"0000-0002-2137-1391\")), person(\"Matt\",\"Dowle\", role=\"aut\", email=\"mattjdowle@gmail.com\"), person(\"Arun\",\"Srinivasan\", role=\"aut\", email=\"asrini@pm.me\"), person(\"Jan\",\"Gorecki\", role=\"aut\", email=\"j.gorecki@wit.edu.pl\"), person(\"Michael\",\"Chirico\", role=\"aut\", email=\"michaelchirico4@gmail.com\", comment = c(ORCID=\"0000-0003-0787-087X\")), person(\"Toby\",\"Hocking\", role=\"aut\", email=\"toby.hocking@r-project.org\", comment = c(ORCID=\"0000-0002-3146-0865\")), person(\"Benjamin\",\"Schwendinger\",role=\"aut\", comment = c(ORCID=\"0000-0003-3315-8114\")), person(\"Ivan\", \"Krylov\", role=\"aut\", email=\"ikrylov@disroot.org\", comment = c(ORCID=\"0000-0002-0172-3812\")), person(\"Pasha\",\"Stetsenko\", role=\"ctb\"), person(\"Tom\",\"Short\", role=\"ctb\"), person(\"Steve\",\"Lianoglou\", role=\"ctb\"), person(\"Eduard\",\"Antonyan\", role=\"ctb\"), person(\"Markus\",\"Bonsch\", role=\"ctb\"), person(\"Hugh\",\"Parsonage\", role=\"ctb\"), person(\"Scott\",\"Ritchie\", role=\"ctb\"), person(\"Kun\",\"Ren\", role=\"ctb\"), person(\"Xianying\",\"Tan\", role=\"ctb\"), person(\"Rick\",\"Saporta\", role=\"ctb\"), person(\"Otto\",\"Seiskari\", role=\"ctb\"), person(\"Xianghui\",\"Dong\", role=\"ctb\"), person(\"Michel\",\"Lang\", role=\"ctb\"), person(\"Watal\",\"Iwasaki\", role=\"ctb\"), person(\"Seth\",\"Wenchel\", role=\"ctb\"), person(\"Karl\",\"Broman\", role=\"ctb\"), person(\"Tobias\",\"Schmidt\", role=\"ctb\"), person(\"David\",\"Arenburg\", role=\"ctb\"), person(\"Ethan\",\"Smith\", role=\"ctb\"), person(\"Francois\",\"Cocquemas\", role=\"ctb\"), person(\"Matthieu\",\"Gomez\", role=\"ctb\"), person(\"Philippe\",\"Chataignon\", role=\"ctb\"), person(\"Nello\",\"Blaser\", role=\"ctb\"), person(\"Dmitry\",\"Selivanov\", role=\"ctb\"), person(\"Andrey\",\"Riabushenko\", role=\"ctb\"), person(\"Cheng\",\"Lee\", role=\"ctb\"), person(\"Declan\",\"Groves\", role=\"ctb\"), person(\"Daniel\",\"Possenriede\", role=\"ctb\"), person(\"Felipe\",\"Parages\", role=\"ctb\"), person(\"Denes\",\"Toth\", role=\"ctb\"), person(\"Mus\",\"Yaramaz-David\", role=\"ctb\"), person(\"Ayappan\",\"Perumal\", role=\"ctb\"), person(\"James\",\"Sams\", role=\"ctb\"), person(\"Martin\",\"Morgan\", role=\"ctb\"), person(\"Michael\",\"Quinn\", role=\"ctb\"), person(given=\"@javrucebo\", role=\"ctb\", comment=\"GitHub user\"), person(\"Marc\",\"Halperin\", role=\"ctb\"), person(\"Roy\",\"Storey\", role=\"ctb\"), person(\"Manish\",\"Saraswat\", role=\"ctb\"), person(\"Morgan\",\"Jacob\", role=\"ctb\"), person(\"Michael\",\"Schubmehl\", role=\"ctb\"), person(\"Davis\",\"Vaughan\", role=\"ctb\"), person(\"Leonardo\",\"Silvestri\", role=\"ctb\"), person(\"Jim\",\"Hester\", role=\"ctb\"), person(\"Anthony\",\"Damico\", role=\"ctb\"), person(\"Sebastian\",\"Freundt\", role=\"ctb\"), person(\"David\",\"Simons\", role=\"ctb\"), person(\"Elliott\",\"Sales de Andrade\", role=\"ctb\"), person(\"Cole\",\"Miller\", role=\"ctb\"), person(\"Jens Peder\",\"Meldgaard\", role=\"ctb\"), person(\"Vaclav\",\"Tlapak\", role=\"ctb\"), person(\"Kevin\",\"Ushey\", role=\"ctb\"), person(\"Dirk\",\"Eddelbuettel\", role=\"ctb\"), person(\"Tony\",\"Fischetti\", role=\"ctb\"), person(\"Ofek\",\"Shilon\", role=\"ctb\"), person(\"Vadim\",\"Khotilovich\", role=\"ctb\"), person(\"Hadley\",\"Wickham\", role=\"ctb\"), person(\"Bennet\",\"Becker\", role=\"ctb\"), person(\"Kyle\",\"Haynes\", role=\"ctb\"), person(\"Boniface Christian\",\"Kamgang\", role=\"ctb\"), person(\"Olivier\",\"Delmarcell\", role=\"ctb\"), person(\"Josh\",\"O'Brien\", role=\"ctb\"), person(\"Dereck\",\"de Mezquita\", role=\"ctb\"), person(\"Michael\",\"Czekanski\", role=\"ctb\"), person(\"Dmitry\", \"Shemetov\", role=\"ctb\"), person(\"Nitish\", \"Jha\", role=\"ctb\"), person(\"Joshua\", \"Wu\", role=\"ctb\"), person(\"Iago\", \"Giné-Vázquez\", role=\"ctb\"), person(\"Anirban\", \"Chetia\", role=\"ctb\"), person(\"Doris\", \"Amoakohene\", role=\"ctb\"), person(\"Angel\", \"Feliz\", role=\"ctb\"), person(\"Michael\",\"Young\", role=\"ctb\"), person(\"Mark\", \"Seeto\", role=\"ctb\"), person(\"Philippe\", \"Grosjean\", role=\"ctb\"), person(\"Vincent\", \"Runge\", role=\"ctb\"), person(\"Christian\", \"Wia\", role=\"ctb\"), person(\"Elise\", \"Maigné\", role=\"ctb\"), person(\"Vincent\", \"Rocher\", role=\"ctb\"), person(\"Vijay\", \"Lulla\", role=\"ctb\"), person(\"Aljaž\", \"Sluga\", role=\"ctb\"), person(\"Bill\", \"Evans\", role=\"ctb\"), person(\"Reino\", \"Bruner\", role=\"ctb\"), person(given=\"@badasahog\", role=\"ctb\", comment=\"GitHub user\"), person(\"Vinit\", \"Thakur\", role=\"ctb\"), person(\"Mukul\", \"Kumar\", role=\"ctb\"), person(\"Ildikó\", \"Czeller\", role=\"ctb\"), person(\"Manmita\", \"Das\", role=\"ctb\") )", "NeedsCompilation": "yes", - "Author": "Tyson Barrett [aut, cre] (ORCID: ), Matt Dowle [aut], Arun Srinivasan [aut], Jan Gorecki [aut], Michael Chirico [aut] (ORCID: ), Toby Hocking [aut] (ORCID: ), Benjamin Schwendinger [aut] (ORCID: ), Ivan Krylov [aut] (ORCID: ), Pasha Stetsenko [ctb], Tom Short [ctb], Steve Lianoglou [ctb], Eduard Antonyan [ctb], Markus Bonsch [ctb], Hugh Parsonage [ctb], Scott Ritchie [ctb], Kun Ren [ctb], Xianying Tan [ctb], Rick Saporta [ctb], Otto Seiskari [ctb], Xianghui Dong [ctb], Michel Lang [ctb], Watal Iwasaki [ctb], Seth Wenchel [ctb], Karl Broman [ctb], Tobias Schmidt [ctb], David Arenburg [ctb], Ethan Smith [ctb], Francois Cocquemas [ctb], Matthieu Gomez [ctb], Philippe Chataignon [ctb], Nello Blaser [ctb], Dmitry Selivanov [ctb], Andrey Riabushenko [ctb], Cheng Lee [ctb], Declan Groves [ctb], Daniel Possenriede [ctb], Felipe Parages [ctb], Denes Toth [ctb], Mus Yaramaz-David [ctb], Ayappan Perumal [ctb], James Sams [ctb], Martin Morgan [ctb], Michael Quinn [ctb], @javrucebo [ctb], @marc-outins [ctb], Roy Storey [ctb], Manish Saraswat [ctb], Morgan Jacob [ctb], Michael Schubmehl [ctb], Davis Vaughan [ctb], Leonardo Silvestri [ctb], Jim Hester [ctb], Anthony Damico [ctb], Sebastian Freundt [ctb], David Simons [ctb], Elliott Sales de Andrade [ctb], Cole Miller [ctb], Jens Peder Meldgaard [ctb], Vaclav Tlapak [ctb], Kevin Ushey [ctb], Dirk Eddelbuettel [ctb], Tony Fischetti [ctb], Ofek Shilon [ctb], Vadim Khotilovich [ctb], Hadley Wickham [ctb], Bennet Becker [ctb], Kyle Haynes [ctb], Boniface Christian Kamgang [ctb], Olivier Delmarcell [ctb], Josh O'Brien [ctb], Dereck de Mezquita [ctb], Michael Czekanski [ctb], Dmitry Shemetov [ctb], Nitish Jha [ctb], Joshua Wu [ctb], Iago Giné-Vázquez [ctb], Anirban Chetia [ctb], Doris Amoakohene [ctb], Angel Feliz [ctb], Michael Young [ctb], Mark Seeto [ctb], Philippe Grosjean [ctb], Vincent Runge [ctb], Christian Wia [ctb], Elise Maigné [ctb], Vincent Rocher [ctb], Vijay Lulla [ctb], Aljaž Sluga [ctb], Bill Evans [ctb]", + "Author": "Tyson Barrett [aut, cre] (ORCID: ), Matt Dowle [aut], Arun Srinivasan [aut], Jan Gorecki [aut], Michael Chirico [aut] (ORCID: ), Toby Hocking [aut] (ORCID: ), Benjamin Schwendinger [aut] (ORCID: ), Ivan Krylov [aut] (ORCID: ), Pasha Stetsenko [ctb], Tom Short [ctb], Steve Lianoglou [ctb], Eduard Antonyan [ctb], Markus Bonsch [ctb], Hugh Parsonage [ctb], Scott Ritchie [ctb], Kun Ren [ctb], Xianying Tan [ctb], Rick Saporta [ctb], Otto Seiskari [ctb], Xianghui Dong [ctb], Michel Lang [ctb], Watal Iwasaki [ctb], Seth Wenchel [ctb], Karl Broman [ctb], Tobias Schmidt [ctb], David Arenburg [ctb], Ethan Smith [ctb], Francois Cocquemas [ctb], Matthieu Gomez [ctb], Philippe Chataignon [ctb], Nello Blaser [ctb], Dmitry Selivanov [ctb], Andrey Riabushenko [ctb], Cheng Lee [ctb], Declan Groves [ctb], Daniel Possenriede [ctb], Felipe Parages [ctb], Denes Toth [ctb], Mus Yaramaz-David [ctb], Ayappan Perumal [ctb], James Sams [ctb], Martin Morgan [ctb], Michael Quinn [ctb], @javrucebo [ctb] (GitHub user), Marc Halperin [ctb], Roy Storey [ctb], Manish Saraswat [ctb], Morgan Jacob [ctb], Michael Schubmehl [ctb], Davis Vaughan [ctb], Leonardo Silvestri [ctb], Jim Hester [ctb], Anthony Damico [ctb], Sebastian Freundt [ctb], David Simons [ctb], Elliott Sales de Andrade [ctb], Cole Miller [ctb], Jens Peder Meldgaard [ctb], Vaclav Tlapak [ctb], Kevin Ushey [ctb], Dirk Eddelbuettel [ctb], Tony Fischetti [ctb], Ofek Shilon [ctb], Vadim Khotilovich [ctb], Hadley Wickham [ctb], Bennet Becker [ctb], Kyle Haynes [ctb], Boniface Christian Kamgang [ctb], Olivier Delmarcell [ctb], Josh O'Brien [ctb], Dereck de Mezquita [ctb], Michael Czekanski [ctb], Dmitry Shemetov [ctb], Nitish Jha [ctb], Joshua Wu [ctb], Iago Giné-Vázquez [ctb], Anirban Chetia [ctb], Doris Amoakohene [ctb], Angel Feliz [ctb], Michael Young [ctb], Mark Seeto [ctb], Philippe Grosjean [ctb], Vincent Runge [ctb], Christian Wia [ctb], Elise Maigné [ctb], Vincent Rocher [ctb], Vijay Lulla [ctb], Aljaž Sluga [ctb], Bill Evans [ctb], Reino Bruner [ctb], @badasahog [ctb] (GitHub user), Vinit Thakur [ctb], Mukul Kumar [ctb], Ildikó Czeller [ctb], Manmita Das [ctb]", "Maintainer": "Tyson Barrett ", "Repository": "CRAN" }, @@ -488,13 +488,17 @@ }, "hms": { "Package": "hms", - "Version": "1.1.3", + "Version": "1.1.4", "Source": "Repository", "Title": "Pretty Time of Day", - "Date": "2023-03-21", - "Authors@R": "c( person(\"Kirill\", \"Müller\", role = c(\"aut\", \"cre\"), email = \"kirill@cynkra.com\", comment = c(ORCID = \"0000-0002-1416-3412\")), person(\"R Consortium\", role = \"fnd\"), person(\"RStudio\", role = \"fnd\") )", + "Date": "2025-10-11", + "Authors@R": "c( person(\"Kirill\", \"Müller\", , \"kirill@cynkra.com\", role = c(\"aut\", \"cre\"), comment = c(ORCID = \"0000-0002-1416-3412\")), person(\"R Consortium\", role = \"fnd\"), person(\"Posit Software, PBC\", role = \"fnd\", comment = c(ROR = \"03wc8by49\")) )", "Description": "Implements an S3 class for storing and formatting time-of-day values, based on the 'difftime' class.", + "License": "MIT + file LICENSE", + "URL": "https://hms.tidyverse.org/, https://github.com/tidyverse/hms", + "BugReports": "https://github.com/tidyverse/hms/issues", "Imports": [ + "cli", "lifecycle", "methods", "pkgconfig", @@ -507,17 +511,12 @@ "pillar (>= 1.1.0)", "testthat (>= 3.0.0)" ], - "License": "MIT + file LICENSE", - "Encoding": "UTF-8", - "URL": "https://hms.tidyverse.org/, https://github.com/tidyverse/hms", - "BugReports": "https://github.com/tidyverse/hms/issues", - "RoxygenNote": "7.2.3", - "Config/testthat/edition": "3", - "Config/autostyle/scope": "line_breaks", - "Config/autostyle/strict": "false", "Config/Needs/website": "tidyverse/tidytemplate", + "Config/testthat/edition": "3", + "Encoding": "UTF-8", + "RoxygenNote": "7.3.3.9000", "NeedsCompilation": "no", - "Author": "Kirill Müller [aut, cre] (), R Consortium [fnd], RStudio [fnd]", + "Author": "Kirill Müller [aut, cre] (ORCID: ), R Consortium [fnd], Posit Software, PBC [fnd] (ROR: )", "Maintainer": "Kirill Müller ", "Repository": "CRAN" }, @@ -563,7 +562,7 @@ }, "httr2": { "Package": "httr2", - "Version": "1.2.1", + "Version": "1.2.2", "Source": "Repository", "Title": "Perform HTTP Requests and Process the Responses", "Authors@R": "c( person(\"Hadley\", \"Wickham\", , \"hadley@posit.co\", role = c(\"aut\", \"cre\")), person(\"Posit Software, PBC\", role = c(\"cph\", \"fnd\")), person(\"Maximilian\", \"Girlich\", role = \"ctb\") )", @@ -599,7 +598,9 @@ "knitr", "later (>= 1.4.0)", "nanonext", - "paws.common", + "otel (>= 0.2.0)", + "otelsdk (>= 0.2.0)", + "paws.common (>= 0.8.0)", "promises", "rmarkdown", "testthat (>= 3.1.8)", @@ -613,7 +614,7 @@ "Config/testthat/parallel": "true", "Config/testthat/start-first": "resp-stream, req-perform", "Encoding": "UTF-8", - "RoxygenNote": "7.3.2", + "RoxygenNote": "7.3.3", "NeedsCompilation": "no", "Author": "Hadley Wickham [aut, cre], Posit Software, PBC [cph, fnd], Maximilian Girlich [ctb]", "Maintainer": "Hadley Wickham ", @@ -697,7 +698,7 @@ }, "lifecycle": { "Package": "lifecycle", - "Version": "1.0.4", + "Version": "1.0.5", "Source": "Repository", "Title": "Manage the Life Cycle of your Package Functions", "Authors@R": "c( person(\"Lionel\", \"Henry\", , \"lionel@posit.co\", role = c(\"aut\", \"cre\")), person(\"Hadley\", \"Wickham\", , \"hadley@posit.co\", role = \"aut\", comment = c(ORCID = \"0000-0003-4757-117X\")), person(\"Posit Software, PBC\", role = c(\"cph\", \"fnd\")) )", @@ -710,29 +711,28 @@ ], "Imports": [ "cli (>= 3.4.0)", - "glue", "rlang (>= 1.1.0)" ], "Suggests": [ "covr", - "crayon", "knitr", - "lintr", + "lintr (>= 3.1.0)", "rmarkdown", "testthat (>= 3.0.1)", "tibble", "tidyverse", "tools", "vctrs", - "withr" + "withr", + "xml2" ], "VignetteBuilder": "knitr", "Config/Needs/website": "tidyverse/tidytemplate, usethis", "Config/testthat/edition": "3", "Encoding": "UTF-8", - "RoxygenNote": "7.2.1", + "RoxygenNote": "7.3.3", "NeedsCompilation": "no", - "Author": "Lionel Henry [aut, cre], Hadley Wickham [aut] (), Posit Software, PBC [cph, fnd]", + "Author": "Lionel Henry [aut, cre], Hadley Wickham [aut] (ORCID: ), Posit Software, PBC [cph, fnd]", "Maintainer": "Lionel Henry ", "Repository": "CRAN" }, @@ -784,11 +784,11 @@ }, "magrittr": { "Package": "magrittr", - "Version": "2.0.3", + "Version": "2.0.4", "Source": "Repository", "Type": "Package", "Title": "A Forward-Pipe Operator for R", - "Authors@R": "c( person(\"Stefan Milton\", \"Bache\", , \"stefan@stefanbache.dk\", role = c(\"aut\", \"cph\"), comment = \"Original author and creator of magrittr\"), person(\"Hadley\", \"Wickham\", , \"hadley@rstudio.com\", role = \"aut\"), person(\"Lionel\", \"Henry\", , \"lionel@rstudio.com\", role = \"cre\"), person(\"RStudio\", role = c(\"cph\", \"fnd\")) )", + "Authors@R": "c( person(\"Stefan Milton\", \"Bache\", , \"stefan@stefanbache.dk\", role = c(\"aut\", \"cph\"), comment = \"Original author and creator of magrittr\"), person(\"Hadley\", \"Wickham\", , \"hadley@posit.co\", role = \"aut\"), person(\"Lionel\", \"Henry\", , \"lionel@posit.co\", role = \"cre\"), person(\"Posit Software, PBC\", role = c(\"cph\", \"fnd\"), comment = c(ROR = \"03wc8by49\")) )", "Description": "Provides a mechanism for chaining commands with a new forward-pipe operator, %>%. This operator will forward a value, or the result of an expression, into the next function call/expression. There is flexible support for the type of right-hand side expressions. For more information, see package vignette. To quote Rene Magritte, \"Ceci n'est pas un pipe.\"", "License": "MIT + file LICENSE", "URL": "https://magrittr.tidyverse.org, https://github.com/tidyverse/magrittr", @@ -807,10 +807,10 @@ "ByteCompile": "Yes", "Config/Needs/website": "tidyverse/tidytemplate", "Encoding": "UTF-8", - "RoxygenNote": "7.1.2", + "RoxygenNote": "7.3.3", "NeedsCompilation": "yes", - "Author": "Stefan Milton Bache [aut, cph] (Original author and creator of magrittr), Hadley Wickham [aut], Lionel Henry [cre], RStudio [cph, fnd]", - "Maintainer": "Lionel Henry ", + "Author": "Stefan Milton Bache [aut, cph] (Original author and creator of magrittr), Hadley Wickham [aut], Lionel Henry [cre], Posit Software, PBC [cph, fnd] (ROR: )", + "Maintainer": "Lionel Henry ", "Repository": "CRAN" }, "mime": { @@ -836,7 +836,7 @@ }, "openssl": { "Package": "openssl", - "Version": "2.3.3", + "Version": "2.3.4", "Source": "Repository", "Type": "Package", "Title": "Toolkit for Encryption, Signatures and Certificates Based on OpenSSL", @@ -869,7 +869,7 @@ }, "pillar": { "Package": "pillar", - "Version": "1.11.0", + "Version": "1.11.1", "Source": "Repository", "Title": "Coloured Formatting for Columns", "Authors@R": "c(person(given = \"Kirill\", family = \"M\\u00fcller\", role = c(\"aut\", \"cre\"), email = \"kirill@cynkra.com\", comment = c(ORCID = \"0000-0002-1416-3412\")), person(given = \"Hadley\", family = \"Wickham\", role = \"aut\"), person(given = \"RStudio\", role = \"cph\"))", @@ -911,7 +911,7 @@ ], "VignetteBuilder": "knitr", "Encoding": "UTF-8", - "RoxygenNote": "7.3.2.9000", + "RoxygenNote": "7.3.3.9000", "Config/testthat/edition": "3", "Config/testthat/parallel": "true", "Config/testthat/start-first": "format_multi_fuzz, format_multi_fuzz_2, format_multi, ctl_colonnade, ctl_colonnade_1, ctl_colonnade_2", @@ -1008,7 +1008,7 @@ }, "purrr": { "Package": "purrr", - "Version": "1.1.0", + "Version": "1.2.1", "Source": "Repository", "Title": "Functional Programming Tools", "Authors@R": "c( person(\"Hadley\", \"Wickham\", , \"hadley@posit.co\", role = c(\"aut\", \"cre\"), comment = c(ORCID = \"0000-0003-4757-117X\")), person(\"Lionel\", \"Henry\", , \"lionel@posit.co\", role = \"aut\"), person(\"Posit Software, PBC\", role = c(\"cph\", \"fnd\"), comment = c(ROR = \"https://ror.org/03wc8by49\")) )", @@ -1027,13 +1027,13 @@ "vctrs (>= 0.6.3)" ], "Suggests": [ - "carrier (>= 0.2.0)", + "carrier (>= 0.3.0)", "covr", "dplyr (>= 0.7.8)", "httr", "knitr", "lubridate", - "mirai (>= 2.4.0)", + "mirai (>= 2.5.1)", "rmarkdown", "testthat (>= 3.0.0)", "tibble", @@ -1049,7 +1049,7 @@ "Config/testthat/edition": "3", "Config/testthat/parallel": "TRUE", "Encoding": "UTF-8", - "RoxygenNote": "7.3.2", + "RoxygenNote": "7.3.3", "NeedsCompilation": "yes", "Author": "Hadley Wickham [aut, cre] (ORCID: ), Lionel Henry [aut], Posit Software, PBC [cph, fnd] (ROR: )", "Maintainer": "Hadley Wickham ", @@ -1057,36 +1057,38 @@ }, "rappdirs": { "Package": "rappdirs", - "Version": "0.3.3", + "Version": "0.3.4", "Source": "Repository", "Type": "Package", "Title": "Application Directories: Determine Where to Save Data, Caches, and Logs", - "Authors@R": "c(person(given = \"Hadley\", family = \"Wickham\", role = c(\"trl\", \"cre\", \"cph\"), email = \"hadley@rstudio.com\"), person(given = \"RStudio\", role = \"cph\"), person(given = \"Sridhar\", family = \"Ratnakumar\", role = \"aut\"), person(given = \"Trent\", family = \"Mick\", role = \"aut\"), person(given = \"ActiveState\", role = \"cph\", comment = \"R/appdir.r, R/cache.r, R/data.r, R/log.r translated from appdirs\"), person(given = \"Eddy\", family = \"Petrisor\", role = \"ctb\"), person(given = \"Trevor\", family = \"Davis\", role = c(\"trl\", \"aut\")), person(given = \"Gabor\", family = \"Csardi\", role = \"ctb\"), person(given = \"Gregory\", family = \"Jefferis\", role = \"ctb\"))", + "Authors@R": "c( person(\"Hadley\", \"Wickham\", , \"hadley@posit.co\", role = c(\"trl\", \"cre\", \"cph\")), person(\"Sridhar\", \"Ratnakumar\", role = \"aut\"), person(\"Trent\", \"Mick\", role = \"aut\"), person(\"ActiveState\", role = \"cph\", comment = \"R/appdir.r, R/cache.r, R/data.r, R/log.r translated from appdirs\"), person(\"Eddy\", \"Petrisor\", role = \"ctb\"), person(\"Trevor\", \"Davis\", role = c(\"trl\", \"aut\"), comment = c(ORCID = \"0000-0001-6341-4639\")), person(\"Gabor\", \"Csardi\", role = \"ctb\"), person(\"Gregory\", \"Jefferis\", role = \"ctb\"), person(\"Posit Software, PBC\", role = c(\"cph\", \"fnd\"), comment = c(ROR = \"03wc8by49\")) )", "Description": "An easy way to determine which directories on the users computer you should use to save data, caches and logs. A port of Python's 'Appdirs' () to R.", "License": "MIT + file LICENSE", "URL": "https://rappdirs.r-lib.org, https://github.com/r-lib/rappdirs", "BugReports": "https://github.com/r-lib/rappdirs/issues", "Depends": [ - "R (>= 3.2)" + "R (>= 4.1)" ], "Suggests": [ - "roxygen2", - "testthat (>= 3.0.0)", "covr", + "roxygen2", + "testthat (>= 3.2.0)", "withr" ], - "Copyright": "Original python appdirs module copyright (c) 2010 ActiveState Software Inc. R port copyright Hadley Wickham, RStudio. See file LICENSE for details.", - "Encoding": "UTF-8", - "RoxygenNote": "7.1.1", + "Config/Needs/website": "tidyverse/tidytemplate", "Config/testthat/edition": "3", + "Config/usethis/last-upkeep": "2025-05-05", + "Copyright": "Original python appdirs module copyright (c) 2010 ActiveState Software Inc. R port copyright Hadley Wickham, Posit, PBC. See file LICENSE for details.", + "Encoding": "UTF-8", + "RoxygenNote": "7.3.3", "NeedsCompilation": "yes", - "Author": "Hadley Wickham [trl, cre, cph], RStudio [cph], Sridhar Ratnakumar [aut], Trent Mick [aut], ActiveState [cph] (R/appdir.r, R/cache.r, R/data.r, R/log.r translated from appdirs), Eddy Petrisor [ctb], Trevor Davis [trl, aut], Gabor Csardi [ctb], Gregory Jefferis [ctb]", - "Maintainer": "Hadley Wickham ", + "Author": "Hadley Wickham [trl, cre, cph], Sridhar Ratnakumar [aut], Trent Mick [aut], ActiveState [cph] (R/appdir.r, R/cache.r, R/data.r, R/log.r translated from appdirs), Eddy Petrisor [ctb], Trevor Davis [trl, aut] (ORCID: ), Gabor Csardi [ctb], Gregory Jefferis [ctb], Posit Software, PBC [cph, fnd] (ROR: )", + "Maintainer": "Hadley Wickham ", "Repository": "CRAN" }, "readr": { "Package": "readr", - "Version": "2.1.5", + "Version": "2.1.6", "Source": "Repository", "Title": "Read Rectangular Text Data", "Authors@R": "c( person(\"Hadley\", \"Wickham\", , \"hadley@posit.co\", role = \"aut\"), person(\"Jim\", \"Hester\", role = \"aut\"), person(\"Romain\", \"Francois\", role = \"ctb\"), person(\"Jennifer\", \"Bryan\", , \"jenny@posit.co\", role = c(\"aut\", \"cre\"), comment = c(ORCID = \"0000-0002-6983-2759\")), person(\"Shelby\", \"Bearrows\", role = \"ctb\"), person(\"Posit Software, PBC\", role = c(\"cph\", \"fnd\")), person(\"https://github.com/mandreyel/\", role = \"cph\", comment = \"mio library\"), person(\"Jukka\", \"Jylänki\", role = c(\"ctb\", \"cph\"), comment = \"grisu3 implementation\"), person(\"Mikkel\", \"Jørgensen\", role = c(\"ctb\", \"cph\"), comment = \"grisu3 implementation\") )", @@ -1134,15 +1136,15 @@ "Config/testthat/parallel": "false", "Encoding": "UTF-8", "Language": "en-US", - "RoxygenNote": "7.2.3", + "RoxygenNote": "7.3.3", "NeedsCompilation": "yes", - "Author": "Hadley Wickham [aut], Jim Hester [aut], Romain Francois [ctb], Jennifer Bryan [aut, cre] (), Shelby Bearrows [ctb], Posit Software, PBC [cph, fnd], https://github.com/mandreyel/ [cph] (mio library), Jukka Jylänki [ctb, cph] (grisu3 implementation), Mikkel Jørgensen [ctb, cph] (grisu3 implementation)", + "Author": "Hadley Wickham [aut], Jim Hester [aut], Romain Francois [ctb], Jennifer Bryan [aut, cre] (ORCID: ), Shelby Bearrows [ctb], Posit Software, PBC [cph, fnd], https://github.com/mandreyel/ [cph] (mio library), Jukka Jylänki [ctb, cph] (grisu3 implementation), Mikkel Jørgensen [ctb, cph] (grisu3 implementation)", "Maintainer": "Jennifer Bryan ", - "Repository": "RSPM" + "Repository": "CRAN" }, "renv": { "Package": "renv", - "Version": "1.1.4", + "Version": "1.1.7", "Source": "Repository", "Type": "Package", "Title": "Project Environments", @@ -1160,7 +1162,9 @@ "compiler", "covr", "cpp11", + "curl", "devtools", + "generics", "gitcreds", "jsonlite", "jsonvalidate", @@ -1182,20 +1186,20 @@ "webfakes" ], "Encoding": "UTF-8", - "RoxygenNote": "7.3.2", + "RoxygenNote": "7.3.3", "VignetteBuilder": "knitr", "Config/Needs/website": "tidyverse/tidytemplate", "Config/testthat/edition": "3", "Config/testthat/parallel": "true", "Config/testthat/start-first": "bioconductor,python,install,restore,snapshot,retrieve,remotes", "NeedsCompilation": "no", - "Author": "Kevin Ushey [aut, cre] (), Hadley Wickham [aut] (), Posit Software, PBC [cph, fnd]", + "Author": "Kevin Ushey [aut, cre] (ORCID: ), Hadley Wickham [aut] (ORCID: ), Posit Software, PBC [cph, fnd]", "Maintainer": "Kevin Ushey ", "Repository": "CRAN" }, "rlang": { "Package": "rlang", - "Version": "1.1.6", + "Version": "1.1.7", "Source": "Repository", "Title": "Functions for Base Types and Core R and 'Tidyverse' Features", "Description": "A toolbox for working with base types, core R features like the condition system, and core 'Tidyverse' features like tidy evaluation.", @@ -1204,7 +1208,7 @@ "ByteCompile": "true", "Biarch": "true", "Depends": [ - "R (>= 3.5.0)" + "R (>= 4.0.0)" ], "Imports": [ "utils" @@ -1233,7 +1237,7 @@ "winch" ], "Encoding": "UTF-8", - "RoxygenNote": "7.3.2", + "RoxygenNote": "7.3.3", "URL": "https://rlang.r-lib.org, https://github.com/r-lib/rlang", "BugReports": "https://github.com/r-lib/rlang/issues", "Config/build/compilation-database": "true", @@ -1246,16 +1250,16 @@ }, "rvest": { "Package": "rvest", - "Version": "1.0.4", + "Version": "1.0.5", "Source": "Repository", "Title": "Easily Harvest (Scrape) Web Pages", - "Authors@R": "c( person(\"Hadley\", \"Wickham\", , \"hadley@posit.co\", role = c(\"aut\", \"cre\")), person(\"Posit Software, PBC\", role = c(\"cph\", \"fnd\")) )", + "Authors@R": "c( person(\"Hadley\", \"Wickham\", , \"hadley@posit.co\", role = c(\"aut\", \"cre\")), person(\"Posit Software, PBC\", role = c(\"cph\", \"fnd\"), comment = c(ROR = \"03wc8by49\")) )", "Description": "Wrappers around the 'xml2' and 'httr' packages to make it easy to download, then manipulate, HTML and XML.", "License": "MIT + file LICENSE", "URL": "https://rvest.tidyverse.org/, https://github.com/tidyverse/rvest", "BugReports": "https://github.com/tidyverse/rvest/issues", "Depends": [ - "R (>= 3.6)" + "R (>= 4.1)" ], "Imports": [ "cli", @@ -1266,12 +1270,13 @@ "rlang (>= 1.1.0)", "selectr", "tibble", - "xml2 (>= 1.3)" + "xml2 (>= 1.4.0)" ], "Suggests": [ "chromote", "covr", "knitr", + "purrr", "R6", "readr", "repurrrsive", @@ -1279,6 +1284,7 @@ "spelling", "stringi (>= 0.3.1)", "testthat (>= 3.0.2)", + "tidyr", "webfakes" ], "VignetteBuilder": "knitr", @@ -1287,19 +1293,18 @@ "Config/testthat/parallel": "true", "Encoding": "UTF-8", "Language": "en-US", - "RoxygenNote": "7.3.1", + "RoxygenNote": "7.3.2", "NeedsCompilation": "no", - "Author": "Hadley Wickham [aut, cre], Posit Software, PBC [cph, fnd]", + "Author": "Hadley Wickham [aut, cre], Posit Software, PBC [cph, fnd] (ROR: )", "Maintainer": "Hadley Wickham ", "Repository": "CRAN" }, "selectr": { "Package": "selectr", - "Version": "0.4-2", + "Version": "0.5-1", "Source": "Repository", "Type": "Package", "Title": "Translate CSS Selectors to XPath Expressions", - "Date": "2019-11-20", "Authors@R": "c(person(\"Simon\", \"Potter\", role = c(\"aut\", \"trl\", \"cre\"), email = \"simon@sjp.co.nz\"), person(\"Simon\", \"Sapin\", role = \"aut\"), person(\"Ian\", \"Bicking\", role = \"aut\"))", "License": "BSD_3_clause + file LICENCE", "Depends": [ @@ -1315,9 +1320,9 @@ "XML", "xml2" ], - "URL": "https://sjp.co.nz/projects/selectr", + "URL": "https://sjp.co.nz/projects/selectr/", "BugReports": "https://github.com/sjp/selectr/issues", - "Description": "Translates a CSS3 selector into an equivalent XPath expression. This allows us to use CSS selectors when working with the XML package as it can only evaluate XPath expressions. Also provided are convenience functions useful for using CSS selectors on XML nodes. This package is a port of the Python package 'cssselect' ().", + "Description": "Translates a CSS selector into an equivalent XPath expression. This allows us to use CSS selectors when working with the XML package as it can only evaluate XPath expressions. Also provided are convenience functions useful for using CSS selectors on XML nodes. This package is a port of the Python package 'cssselect' ().", "NeedsCompilation": "no", "Author": "Simon Potter [aut, trl, cre], Simon Sapin [aut], Ian Bicking [aut]", "Maintainer": "Simon Potter ", @@ -1390,7 +1395,7 @@ }, "stringr": { "Package": "stringr", - "Version": "1.5.1", + "Version": "1.6.0", "Source": "Repository", "Title": "Simple, Consistent Wrappers for Common String Operations", "Authors@R": "c( person(\"Hadley\", \"Wickham\", , \"hadley@posit.co\", role = c(\"aut\", \"cre\", \"cph\")), person(\"Posit Software, PBC\", role = c(\"cph\", \"fnd\")) )", @@ -1399,7 +1404,7 @@ "URL": "https://stringr.tidyverse.org, https://github.com/tidyverse/stringr", "BugReports": "https://github.com/tidyverse/stringr/issues", "Depends": [ - "R (>= 3.6)" + "R (>= 4.1.0)" ], "Imports": [ "cli", @@ -1423,10 +1428,11 @@ ], "VignetteBuilder": "knitr", "Config/Needs/website": "tidyverse/tidytemplate", + "Config/potools/style": "explicit", "Config/testthat/edition": "3", "Encoding": "UTF-8", "LazyData": "true", - "RoxygenNote": "7.2.3", + "RoxygenNote": "7.3.3", "NeedsCompilation": "no", "Author": "Hadley Wickham [aut, cre, cph], Posit Software, PBC [cph, fnd]", "Maintainer": "Hadley Wickham ", @@ -1458,10 +1464,10 @@ }, "tibble": { "Package": "tibble", - "Version": "3.3.0", + "Version": "3.3.1", "Source": "Repository", "Title": "Simple Data Frames", - "Authors@R": "c(person(given = \"Kirill\", family = \"M\\u00fcller\", role = c(\"aut\", \"cre\"), email = \"kirill@cynkra.com\", comment = c(ORCID = \"0000-0002-1416-3412\")), person(given = \"Hadley\", family = \"Wickham\", role = \"aut\", email = \"hadley@rstudio.com\"), person(given = \"Romain\", family = \"Francois\", role = \"ctb\", email = \"romain@r-enthusiasts.com\"), person(given = \"Jennifer\", family = \"Bryan\", role = \"ctb\", email = \"jenny@rstudio.com\"), person(given = \"RStudio\", role = c(\"cph\", \"fnd\")))", + "Authors@R": "c( person(\"Kirill\", \"Müller\", , \"kirill@cynkra.com\", role = c(\"aut\", \"cre\"), comment = c(ORCID = \"0000-0002-1416-3412\")), person(\"Hadley\", \"Wickham\", , \"hadley@rstudio.com\", role = \"aut\"), person(\"Romain\", \"Francois\", , \"romain@r-enthusiasts.com\", role = \"ctb\"), person(\"Jennifer\", \"Bryan\", , \"jenny@rstudio.com\", role = \"ctb\"), person(\"Posit Software, PBC\", role = c(\"cph\", \"fnd\"), comment = c(ROR = \"03wc8by49\")) )", "Description": "Provides a 'tbl_df' class (the 'tibble') with stricter checking and better formatting than the traditional data frame.", "License": "MIT + file LICENSE", "URL": "https://tibble.tidyverse.org/, https://github.com/tidyverse/tibble", @@ -1506,36 +1512,37 @@ "withr" ], "VignetteBuilder": "knitr", - "Encoding": "UTF-8", - "RoxygenNote": "7.3.2.9000", - "Config/testthat/edition": "3", - "Config/testthat/parallel": "true", - "Config/testthat/start-first": "vignette-formats, as_tibble, add, invariants", + "Config/autostyle/rmd": "false", "Config/autostyle/scope": "line_breaks", "Config/autostyle/strict": "true", - "Config/autostyle/rmd": "false", "Config/Needs/website": "tidyverse/tidytemplate", + "Config/testthat/edition": "3", + "Config/testthat/parallel": "true", + "Config/testthat/start-first": "vignette-formats, as_tibble, add, invariants", + "Config/usethis/last-upkeep": "2025-06-07", + "Encoding": "UTF-8", + "RoxygenNote": "7.3.3.9000", "NeedsCompilation": "yes", - "Author": "Kirill Müller [aut, cre] (ORCID: ), Hadley Wickham [aut], Romain Francois [ctb], Jennifer Bryan [ctb], RStudio [cph, fnd]", + "Author": "Kirill Müller [aut, cre] (ORCID: ), Hadley Wickham [aut], Romain Francois [ctb], Jennifer Bryan [ctb], Posit Software, PBC [cph, fnd] (ROR: )", "Maintainer": "Kirill Müller ", "Repository": "CRAN" }, "tidyr": { "Package": "tidyr", - "Version": "1.3.1", + "Version": "1.3.2", "Source": "Repository", "Title": "Tidy Messy Data", "Authors@R": "c( person(\"Hadley\", \"Wickham\", , \"hadley@posit.co\", role = c(\"aut\", \"cre\")), person(\"Davis\", \"Vaughan\", , \"davis@posit.co\", role = \"aut\"), person(\"Maximilian\", \"Girlich\", role = \"aut\"), person(\"Kevin\", \"Ushey\", , \"kevin@posit.co\", role = \"ctb\"), person(\"Posit Software, PBC\", role = c(\"cph\", \"fnd\")) )", - "Description": "Tools to help to create tidy data, where each column is a variable, each row is an observation, and each cell contains a single value. 'tidyr' contains tools for changing the shape (pivoting) and hierarchy (nesting and 'unnesting') of a dataset, turning deeply nested lists into rectangular data frames ('rectangling'), and extracting values out of string columns. It also includes tools for working with missing values (both implicit and explicit).", + "Description": "Tools to help to create tidy data, where each column is a variable, each row is an observation, and each cell contains a single value. 'tidyr' contains tools for changing the shape (pivoting) and hierarchy (nesting and 'unnesting') of a dataset, turning deeply nested lists into rectangular data frames ('rectangling'), and extracting values out of string columns. It also includes tools for working with missing values (both implicit and explicit).", "License": "MIT + file LICENSE", "URL": "https://tidyr.tidyverse.org, https://github.com/tidyverse/tidyr", "BugReports": "https://github.com/tidyverse/tidyr/issues", "Depends": [ - "R (>= 3.6)" + "R (>= 4.1.0)" ], "Imports": [ "cli (>= 3.4.1)", - "dplyr (>= 1.0.10)", + "dplyr (>= 1.1.0)", "glue", "lifecycle (>= 1.0.3)", "magrittr", @@ -1543,7 +1550,7 @@ "rlang (>= 1.1.1)", "stringr (>= 1.5.0)", "tibble (>= 2.1.1)", - "tidyselect (>= 1.2.0)", + "tidyselect (>= 1.2.1)", "utils", "vctrs (>= 0.5.2)" ], @@ -1560,11 +1567,12 @@ "cpp11 (>= 0.4.0)" ], "VignetteBuilder": "knitr", + "Config/build/compilation-database": "true", "Config/Needs/website": "tidyverse/tidytemplate", "Config/testthat/edition": "3", "Encoding": "UTF-8", "LazyData": "true", - "RoxygenNote": "7.3.0", + "RoxygenNote": "7.3.3", "NeedsCompilation": "yes", "Author": "Hadley Wickham [aut, cre], Davis Vaughan [aut], Maximilian Girlich [aut], Kevin Ushey [ctb], Posit Software, PBC [cph, fnd]", "Maintainer": "Hadley Wickham ", @@ -1650,7 +1658,7 @@ }, "timechange": { "Package": "timechange", - "Version": "0.3.0", + "Version": "0.4.0", "Source": "Repository", "Title": "Efficient Manipulation of Date-Times", "Authors@R": "c(person(\"Vitalie\", \"Spinu\", email = \"spinuvit@gmail.com\", role = c(\"aut\", \"cre\")), person(\"Google Inc.\", role = c(\"ctb\", \"cph\")))", @@ -1667,7 +1675,7 @@ "testthat (>= 0.7.1.99)", "knitr" ], - "SystemRequirements": "A system with zoneinfo data (e.g. /usr/share/zoneinfo) as well as a recent-enough C++11 compiler (such as g++-4.8 or later). On Windows the zoneinfo included with R is used.", + "SystemRequirements": "A system with zoneinfo data (e.g. /usr/share/zoneinfo). On Windows the zoneinfo included with R is used.", "BugReports": "https://github.com/vspinu/timechange/issues", "URL": "https://github.com/vspinu/timechange/", "RoxygenNote": "7.2.1", @@ -1739,7 +1747,7 @@ }, "vctrs": { "Package": "vctrs", - "Version": "0.6.5", + "Version": "0.7.1", "Source": "Repository", "Title": "Vector Helpers", "Authors@R": "c( person(\"Hadley\", \"Wickham\", , \"hadley@posit.co\", role = \"aut\"), person(\"Lionel\", \"Henry\", , \"lionel@posit.co\", role = \"aut\"), person(\"Davis\", \"Vaughan\", , \"davis@posit.co\", role = c(\"aut\", \"cre\")), person(\"data.table team\", role = \"cph\", comment = \"Radix sort based on data.table's forder() and their contribution to R's order()\"), person(\"Posit Software, PBC\", role = c(\"cph\", \"fnd\")) )", @@ -1748,13 +1756,13 @@ "URL": "https://vctrs.r-lib.org/, https://github.com/r-lib/vctrs", "BugReports": "https://github.com/r-lib/vctrs/issues", "Depends": [ - "R (>= 3.5.0)" + "R (>= 4.0.0)" ], "Imports": [ "cli (>= 3.4.0)", "glue", "lifecycle (>= 1.0.3)", - "rlang (>= 1.1.0)" + "rlang (>= 1.1.7)" ], "Suggests": [ "bit64", @@ -1774,11 +1782,13 @@ "zeallot" ], "VignetteBuilder": "knitr", + "Config/build/compilation-database": "true", "Config/Needs/website": "tidyverse/tidytemplate", "Config/testthat/edition": "3", + "Config/testthat/parallel": "true", "Encoding": "UTF-8", "Language": "en-GB", - "RoxygenNote": "7.2.3", + "RoxygenNote": "7.3.3", "NeedsCompilation": "yes", "Author": "Hadley Wickham [aut], Lionel Henry [aut], Davis Vaughan [aut, cre], data.table team [cph] (Radix sort based on data.table's forder() and their contribution to R's order()), Posit Software, PBC [cph, fnd]", "Maintainer": "Davis Vaughan ", @@ -1786,16 +1796,16 @@ }, "vroom": { "Package": "vroom", - "Version": "1.6.5", + "Version": "1.7.0", "Source": "Repository", "Title": "Read and Write Rectangular Text Data Quickly", - "Authors@R": "c( person(\"Jim\", \"Hester\", role = \"aut\", comment = c(ORCID = \"0000-0002-2739-7082\")), person(\"Hadley\", \"Wickham\", , \"hadley@posit.co\", role = \"aut\", comment = c(ORCID = \"0000-0003-4757-117X\")), person(\"Jennifer\", \"Bryan\", , \"jenny@posit.co\", role = c(\"aut\", \"cre\"), comment = c(ORCID = \"0000-0002-6983-2759\")), person(\"Shelby\", \"Bearrows\", role = \"ctb\"), person(\"https://github.com/mandreyel/\", role = \"cph\", comment = \"mio library\"), person(\"Jukka\", \"Jylänki\", role = \"cph\", comment = \"grisu3 implementation\"), person(\"Mikkel\", \"Jørgensen\", role = \"cph\", comment = \"grisu3 implementation\"), person(\"Posit Software, PBC\", role = c(\"cph\", \"fnd\")) )", + "Authors@R": "c( person(\"Jim\", \"Hester\", role = \"aut\", comment = c(ORCID = \"0000-0002-2739-7082\")), person(\"Hadley\", \"Wickham\", , \"hadley@posit.co\", role = \"aut\", comment = c(ORCID = \"0000-0003-4757-117X\")), person(\"Jennifer\", \"Bryan\", , \"jenny@posit.co\", role = c(\"aut\", \"cre\"), comment = c(ORCID = \"0000-0002-6983-2759\")), person(\"Shelby\", \"Bearrows\", role = \"ctb\"), person(\"https://github.com/mandreyel/\", role = \"cph\", comment = \"mio library\"), person(\"Jukka\", \"Jylänki\", role = \"cph\", comment = \"grisu3 implementation\"), person(\"Mikkel\", \"Jørgensen\", role = \"cph\", comment = \"grisu3 implementation\"), person(\"Posit Software, PBC\", role = c(\"cph\", \"fnd\"), comment = c(ROR = \"03wc8by49\")) )", "Description": "The goal of 'vroom' is to read and write data (like 'csv', 'tsv' and 'fwf') quickly. When reading it uses a quick initial indexing step, then reads the values lazily , so only the data you actually use needs to be read. The writer formats the data in parallel and writes to disk asynchronously from formatting.", "License": "MIT + file LICENSE", - "URL": "https://vroom.r-lib.org, https://github.com/tidyverse/vroom", + "URL": "https://vroom.tidyverse.org, https://github.com/tidyverse/vroom", "BugReports": "https://github.com/tidyverse/vroom/issues", "Depends": [ - "R (>= 3.6)" + "R (>= 4.1)" ], "Imports": [ "bit64", @@ -1805,7 +1815,7 @@ "hms", "lifecycle (>= 1.0.3)", "methods", - "rlang (>= 0.4.2)", + "rlang (>= 1.1.0)", "stats", "tibble (>= 2.0.0)", "tidyselect", @@ -1838,21 +1848,23 @@ ], "LinkingTo": [ "cpp11 (>= 0.2.0)", - "progress (>= 1.2.1)", + "progress (>= 1.2.3)", "tzdb (>= 0.1.1)" ], "VignetteBuilder": "knitr", "Config/Needs/website": "nycflights13, tidyverse/tidytemplate", "Config/testthat/edition": "3", "Config/testthat/parallel": "false", + "Config/usethis/last-upkeep": "2025-11-25", "Copyright": "file COPYRIGHTS", "Encoding": "UTF-8", "Language": "en-US", - "RoxygenNote": "7.2.3.9000", + "RoxygenNote": "7.3.3", + "Config/build/compilation-database": "true", "NeedsCompilation": "yes", - "Author": "Jim Hester [aut] (), Hadley Wickham [aut] (), Jennifer Bryan [aut, cre] (), Shelby Bearrows [ctb], https://github.com/mandreyel/ [cph] (mio library), Jukka Jylänki [cph] (grisu3 implementation), Mikkel Jørgensen [cph] (grisu3 implementation), Posit Software, PBC [cph, fnd]", + "Author": "Jim Hester [aut] (ORCID: ), Hadley Wickham [aut] (ORCID: ), Jennifer Bryan [aut, cre] (ORCID: ), Shelby Bearrows [ctb], https://github.com/mandreyel/ [cph] (mio library), Jukka Jylänki [cph] (grisu3 implementation), Mikkel Jørgensen [cph] (grisu3 implementation), Posit Software, PBC [cph, fnd] (ROR: )", "Maintainer": "Jennifer Bryan ", - "Repository": "RSPM" + "Repository": "CRAN" }, "withr": { "Package": "withr", @@ -1894,7 +1906,7 @@ }, "xml2": { "Package": "xml2", - "Version": "1.3.8", + "Version": "1.5.2", "Source": "Repository", "Title": "Parse XML", "Authors@R": "c( person(\"Hadley\", \"Wickham\", role = \"aut\"), person(\"Jim\", \"Hester\", role = \"aut\"), person(\"Jeroen\", \"Ooms\", email = \"jeroenooms@gmail.com\", role = c(\"aut\", \"cre\")), person(\"Posit Software, PBC\", role = c(\"cph\", \"fnd\")), person(\"R Foundation\", role = \"ctb\", comment = \"Copy of R-project homepage cached as example\") )", @@ -1915,7 +1927,6 @@ "curl", "httr", "knitr", - "magrittr", "mockery", "rmarkdown", "testthat (>= 3.2.0)", @@ -1924,7 +1935,7 @@ "VignetteBuilder": "knitr", "Config/Needs/website": "tidyverse/tidytemplate", "Encoding": "UTF-8", - "RoxygenNote": "7.2.3", + "RoxygenNote": "7.3.3", "SystemRequirements": "libxml2: libxml2-dev (deb), libxml2-devel (rpm)", "Collate": "'S4.R' 'as_list.R' 'xml_parse.R' 'as_xml_document.R' 'classes.R' 'format.R' 'import-standalone-obj-type.R' 'import-standalone-purrr.R' 'import-standalone-types-check.R' 'init.R' 'nodeset_apply.R' 'paths.R' 'utils.R' 'xml2-package.R' 'xml_attr.R' 'xml_children.R' 'xml_document.R' 'xml_find.R' 'xml_missing.R' 'xml_modify.R' 'xml_name.R' 'xml_namespaces.R' 'xml_node.R' 'xml_nodeset.R' 'xml_path.R' 'xml_schema.R' 'xml_serialize.R' 'xml_structure.R' 'xml_text.R' 'xml_type.R' 'xml_url.R' 'xml_write.R' 'zzz.R'", "Config/testthat/edition": "3", diff --git a/renv/activate.R b/renv/activate.R index 256edab..512fdc8 100644 --- a/renv/activate.R +++ b/renv/activate.R @@ -2,7 +2,8 @@ local({ # the requested version of renv - version <- "1.1.4" + version <- "1.1.7" + attr(version, "md5") <- "dd5d60f155dadff4c88c2fc6680504b4" attr(version, "sha") <- NULL # the project directory @@ -168,6 +169,16 @@ local({ if (quiet) return(invisible()) + # also check for config environment variables that should suppress messages + # https://github.com/rstudio/renv/issues/2214 + enabled <- Sys.getenv("RENV_CONFIG_STARTUP_QUIET", unset = NA) + if (!is.na(enabled) && tolower(enabled) %in% c("true", "1")) + return(invisible()) + + enabled <- Sys.getenv("RENV_CONFIG_SYNCHRONIZED_CHECK", unset = NA) + if (!is.na(enabled) && tolower(enabled) %in% c("false", "0")) + return(invisible()) + msg <- sprintf(fmt, ...) cat(msg, file = stdout(), sep = if (appendLF) "\n" else "") @@ -215,6 +226,16 @@ local({ section <- header(sprintf("Bootstrapping renv %s", friendly)) catf(section) + # try to install renv from cache + md5 <- attr(version, "md5", exact = TRUE) + if (length(md5)) { + pkgpath <- renv_bootstrap_find(version) + if (length(pkgpath) && file.exists(pkgpath)) { + file.copy(pkgpath, library, recursive = TRUE) + return(invisible()) + } + } + # attempt to download renv catf("- Downloading renv ... ", appendLF = FALSE) withCallingHandlers( @@ -240,7 +261,6 @@ local({ # add empty line to break up bootstrapping from normal output catf("") - return(invisible()) } @@ -257,12 +277,20 @@ local({ repos <- Sys.getenv("RENV_CONFIG_REPOS_OVERRIDE", unset = NA) if (!is.na(repos)) { - # check for RSPM; if set, use a fallback repository for renv - rspm <- Sys.getenv("RSPM", unset = NA) - if (identical(rspm, repos)) - repos <- c(RSPM = rspm, CRAN = cran) + # split on ';' if present + parts <- strsplit(repos, ";", fixed = TRUE)[[1L]] - return(repos) + # split into named repositories if present + idx <- regexpr("=", parts, fixed = TRUE) + keys <- substring(parts, 1L, idx - 1L) + vals <- substring(parts, idx + 1L) + names(vals) <- keys + + # if we have a single unnamed repository, call it CRAN + if (length(vals) == 1L && identical(keys, "")) + names(vals) <- "CRAN" + + return(vals) } @@ -511,6 +539,51 @@ local({ } + renv_bootstrap_find <- function(version) { + + path <- renv_bootstrap_find_cache(version) + if (length(path) && file.exists(path)) { + catf("- Using renv %s from global package cache", version) + return(path) + } + + } + + renv_bootstrap_find_cache <- function(version) { + + md5 <- attr(version, "md5", exact = TRUE) + if (is.null(md5)) + return() + + # infer path to renv cache + cache <- Sys.getenv("RENV_PATHS_CACHE", unset = "") + if (!nzchar(cache)) { + root <- Sys.getenv("RENV_PATHS_ROOT", unset = NA) + if (!is.na(root)) + cache <- file.path(root, "cache") + } + + if (!nzchar(cache)) { + tools <- asNamespace("tools") + if (is.function(tools$R_user_dir)) { + root <- tools$R_user_dir("renv", "cache") + cache <- file.path(root, "cache") + } + } + + # start completing path to cache + file.path( + cache, + renv_bootstrap_cache_version(), + renv_bootstrap_platform_prefix(), + "renv", + version, + md5, + "renv" + ) + + } + renv_bootstrap_download_tarball <- function(version) { # if the user has provided the path to a tarball via @@ -979,7 +1052,7 @@ local({ renv_bootstrap_validate_version_release <- function(version, description) { expected <- description[["Version"]] - is.character(expected) && identical(expected, version) + is.character(expected) && identical(c(expected), c(version)) } renv_bootstrap_hash_text <- function(text) { @@ -1181,6 +1254,18 @@ local({ } + renv_bootstrap_cache_version <- function() { + # NOTE: users should normally not override the cache version; + # this is provided just to make testing easier + Sys.getenv("RENV_CACHE_VERSION", unset = "v5") + } + + renv_bootstrap_cache_version_previous <- function() { + version <- renv_bootstrap_cache_version() + number <- as.integer(substring(version, 2L)) + paste("v", number - 1L, sep = "") + } + renv_json_read <- function(file = NULL, text = NULL) { jlerr <- NULL diff --git a/tests/testthat/test-crosswalk_data.R b/tests/testthat/test-crosswalk_data.R index b8e692f..a2ac389 100644 --- a/tests/testthat/test-crosswalk_data.R +++ b/tests/testthat/test-crosswalk_data.R @@ -966,3 +966,4 @@ test_that("crosswalk_data other columns work correctly with one-to-many mapping" # Both target geographies should have data_year preserved expect_true(all(result$data_year == 2020)) }) + diff --git a/tests/testthat/test-get_ctdata_crosswalk.R b/tests/testthat/test-get_ctdata_crosswalk.R index 67b1e82..51eb099 100644 --- a/tests/testthat/test-get_ctdata_crosswalk.R +++ b/tests/testthat/test-get_ctdata_crosswalk.R @@ -279,3 +279,118 @@ test_that("CT county crosswalk maps 8 old counties to 9 planning regions", { expect_true(all(non_ct_result$allocation_factor_source_to_target == 1)) expect_true(all(non_ct_result$source_geoid == non_ct_result$target_geoid)) }) + +# ============================================================================== +# Reverse direction tests (2022 -> 2020) +# ============================================================================== + +test_that("get_ctdata_crosswalk supports 2022 -> 2020 for tracts", { + skip_if_offline() + skip_if(Sys.getenv("IPUMS_API_KEY") == "", "IPUMS_API_KEY not set") + + result <- get_ctdata_crosswalk(geography = "tract", source_year = 2022, target_year = 2020) + + expect_s3_class(result, "tbl_df") + + # Check years are reversed + expect_equal(unique(result$source_year), "2022") + expect_equal(unique(result$target_year), "2020") + + # CT tracts should have reversed geoids (2022 FIPS -> 2020 FIPS) + ct_tracts <- result |> dplyr::filter(state_fips == "09") + expect_equal(nrow(ct_tracts), 879) + expect_true(all(ct_tracts$source_geoid != ct_tracts$target_geoid)) + + # Non-CT tracts should still have identical geoids (identity mapping both directions) + non_ct_tracts <- result |> dplyr::filter(state_fips != "09") + expect_true(all(non_ct_tracts$source_geoid == non_ct_tracts$target_geoid)) + + # Allocation factors should all be 1 (identity) + expect_true(all(result$allocation_factor_source_to_target == 1)) +}) + +test_that("get_ctdata_crosswalk supports 2022 -> 2020 for blocks", { + skip_if_offline() + skip_if(Sys.getenv("IPUMS_API_KEY") == "", "IPUMS_API_KEY not set") + + result <- get_ctdata_crosswalk(geography = "block", source_year = 2022, target_year = 2020) + + expect_s3_class(result, "tbl_df") + expect_equal(unique(result$source_year), "2022") + expect_equal(unique(result$target_year), "2020") + + # Block GEOIDs should be 15 characters + expect_true(all(stringr::str_length(result$source_geoid) == 15)) + expect_true(all(stringr::str_length(result$target_geoid) == 15)) + + # All records should have allocation_factor = 1 + expect_true(all(result$allocation_factor_source_to_target == 1)) +}) + +test_that("get_ctdata_crosswalk supports 2022 -> 2020 for block_groups", { + skip_if_offline() + skip_if(Sys.getenv("IPUMS_API_KEY") == "", "IPUMS_API_KEY not set") + + result <- get_ctdata_crosswalk(geography = "block_group", source_year = 2022, target_year = 2020) + + expect_s3_class(result, "tbl_df") + expect_equal(unique(result$source_year), "2022") + expect_equal(unique(result$target_year), "2020") + + # Block group GEOIDs should be 12 characters + expect_true(all(stringr::str_length(result$source_geoid) == 12)) + expect_true(all(stringr::str_length(result$target_geoid) == 12)) + + # All records should have allocation_factor = 1 + expect_true(all(result$allocation_factor_source_to_target == 1)) +}) + +test_that("get_ctdata_crosswalk errors on 2022 -> 2020 for county", { + expect_error( + get_ctdata_crosswalk(geography = "county", source_year = 2022, target_year = 2020), + regexp = "County crosswalks from 2022 to 2020 are not supported") +}) + +test_that("2022 -> 2020 crosswalk is inverse of 2020 -> 2022 for tracts", { + skip_if_offline() + skip_if(Sys.getenv("IPUMS_API_KEY") == "", "IPUMS_API_KEY not set") + + forward <- get_ctdata_crosswalk(geography = "tract", source_year = 2020, target_year = 2022) + reverse <- get_ctdata_crosswalk(geography = "tract", source_year = 2022, target_year = 2020) + + # Same number of rows + expect_equal(nrow(forward), nrow(reverse)) + + # Forward source_geoid should match reverse target_geoid + forward_ct <- forward |> + dplyr::filter(state_fips == "09") |> + dplyr::arrange(source_geoid) + reverse_ct <- reverse |> + dplyr::filter(state_fips == "09") |> + dplyr::arrange(target_geoid) + + expect_equal(forward_ct$source_geoid, reverse_ct$target_geoid) + expect_equal(forward_ct$target_geoid, reverse_ct$source_geoid) +}) + +test_that("get_ctdata_crosswalk metadata reflects direction", { + skip_if_offline() + skip_if(Sys.getenv("IPUMS_API_KEY") == "", "IPUMS_API_KEY not set") + + result <- get_ctdata_crosswalk(geography = "tract", source_year = 2022, target_year = 2020) + + metadata <- attr(result, "crosswalk_metadata") + + expect_equal(metadata$source_year, "2022") + expect_equal(metadata$target_year, "2020") +}) + +test_that("get_ctdata_crosswalk errors on invalid year combinations", { + expect_error( + get_ctdata_crosswalk(geography = "tract", source_year = 2010, target_year = 2020), + regexp = "CTData crosswalks are only available for 2020 <-> 2022") + + expect_error( + get_ctdata_crosswalk(geography = "tract", source_year = 2020, target_year = 2010), + regexp = "CTData crosswalks are only available for 2020 <-> 2022") +}) diff --git a/vignettes/standardizing-longitudinal-data.Rmd b/vignettes/standardizing-longitudinal-data.Rmd index af715eb..bd3603d 100644 --- a/vignettes/standardizing-longitudinal-data.Rmd +++ b/vignettes/standardizing-longitudinal-data.Rmd @@ -11,13 +11,14 @@ vignette: > knitr::opts_chunk$set( collapse = TRUE, comment = "#>", - eval = FALSE -) + message = FALSE, + echo = TRUE, + eval = TRUE) ``` ## Overview -A common challenge with longitudinal tract-level data is that census tract boundaries +A common challenge with longitudinal tract data is that census tract boundaries change between decennial censuses. Data from before 2020 typically uses 2010 tract definitions, while more recent data uses 2020 tract definitions. To analyze trends over time, you need to standardize all years to a consistent tract vintage. @@ -36,6 +37,8 @@ library(purrr) library(readr) library(tibble) library(stringr) +library(tidycensus) +library(ggplot2) ``` ## Step 1: Download the Data @@ -54,7 +57,7 @@ metadata = tibble::tribble( 2022, 2020, "https://urban-data-catalog.s3.amazonaws.com/drupal-root-live/2023/12/20/hmda_tract_2022.csv", 2023, 2020, "https://urban-data-catalog.s3.amazonaws.com/drupal-root-live/2024/12/17/hmda_tract_2023.csv") -## iterate of the metadata object and read in data for each year +## iterate over the metadata object and read in data for each year hmda_data <- pmap(metadata, function(url, year, vintage) { read_csv(url, show_col_types = FALSE) |> mutate( @@ -67,21 +70,20 @@ names(hmda_data) = metadata$year %>% as.character() Let's inspect the structure of the data: ```{r inspect-data} -# Check dimensions of each year -map(hmda_data, dim) - -# View the 2018 data (2010 tract vintage) glimpse(hmda_data[["2018"]]) ``` ## Step 2: Prepare Data for Crosswalking The HMDA data includes a `tractid` column that contains the 11-digit tract GEOID. -Let's prepare a subset of variables for crosswalking. We'll focus on count variables -(total applications by race/ethnicity) which require the `count_` prefix for automatic -detection by `crosswalk_data()`: - -```{r prepare-data} +Let's prepare a subset of variables for crosswalking. We'll focus on a subset of variables +(total applications by race/ethnicity and median loan amounts). We could explicitly pass the +variables we want to crosswalk to the appropriate parameter (`count_columns` or `non_count_columns`), +but it's easy (and nice practice) to prefix these variables with their unit types ("count" and "median", +respectively), and `crosswalk_data()` will crosswalk each appropriately by default since they have these +standard unit prefixes in their names. + +```{r prepare-data, echo = FALSE} prepare_hmda <- function(data) { data |> rename_with(.cols = matches("^geo20"), .fn = ~ "source_geoid") |> @@ -92,18 +94,17 @@ prepare_hmda <- function(data) { # Count variables: rename with count_ prefix for automatic detection count_race_white_purchase = race_white_purchase, count_owner_purchase_originations = owner_purchase_originations, - median_owner_loan_amount = owner_loan_amount_median - ) |> - mutate(source_geoid = as.character(source_geoid)) -} + median_owner_loan_amount = owner_loan_amount_median) |> + mutate(source_geoid = as.character(source_geoid)) } hmda_prepared <- map(hmda_data, prepare_hmda) ``` ## Step 3: Obtain the 2010→2020 Tract Crosswalk -Here's where `crosswalk` shines. We need a single crosswalk to convert all 2010-vintage -tracts to 2020-vintage tracts. The package fetches this from IPUMS NHGIS: +Next we get our crosswalk (the same for each of our 2010-vintage years of HMDA data), +which contains allocation factors that specify how to distribute values from 2010 tracts +definitions to those for 2020 tracts. ```{r get-crosswalk} tract_crosswalk <- get_crosswalk( @@ -117,15 +118,15 @@ tract_crosswalk <- get_crosswalk( tract_crosswalk$message ``` -The crosswalk contains allocation factors that specify how to distribute values from -2010 tracts to 2020 tracts. When tract boundaries changed, a single 2010 tract may -map to multiple 2020 tracts (or vice versa), with allocation factors summing to 1. - ## Step 4: Apply the Crosswalk to 2018-2021 Data -Now we apply the crosswalk to the four years of data that use 2010 tract definitions: +Now we apply the crosswalk to the four years of data that use 2010 tract definitions. +We can see in the console-printed output that relatively small, though not insignificant, +fractions of records in our source data do not join to our crosswalk. When this occurs, source +data is effectively lost because it has no associated target geography nor allocation factor +assigned to it. -```{r apply-crosswalk} +```{r apply-crosswalk, echo = FALSE} # Years that need crosswalking (2010 vintage) years_to_crosswalk <- c("2018", "2019", "2020", "2021") @@ -138,10 +139,27 @@ hmda_crosswalked <- map_if( crosswalk = tract_crosswalk, geoid_column = "source_geoid", show_join_quality = TRUE)) +``` + +## Step 5: Assess Crosswalking Quality + +Is there anything we can learn about our source data that doesn't join to our crosswalk? +Ideally, every record in our source data maps to a record in our crosswalk. + +```{r, diagnose-crosswalking} +## we see that some observations that don't match have "XXXXXX" in lieu of +## a real tract code--which, from reading the data documentation, we know is +## done to to retain valid observations that, in the source data, do not have +## a valid tract identifier but do have valid county and/or state identifies +hmda_crosswalked |> + map(~ + .x |> + attr("join_quality") |> + pluck("data_geoids_unmatched") |> + head(5)) -## how many source records are we unable to crosswalk each year? -## excluding those with an "X"--which is used to retain valid observations -## that, in the source data, do not have a valid tract identifier--very few: +## how many source records are we unable to crosswalk each year, excluding +## those with "X" in their GEOIDs? under 30 each year. hmda_crosswalked |> map(~ .x |> @@ -149,28 +167,30 @@ hmda_crosswalked |> pluck("data_geoids_unmatched") %>% .[!str_detect(., "X")] |> length()) +``` +## Result: A Panel Dataset in 2020 Tract Definitions +We now have a single dataframe with all six years of HMDA data standardized to 2020 +tract definitions. Due to changes in tract geographies between decades, we were unable +to accurately compare neighborhood changes over time. + +Now, we have apples-to-apples measurements for tracts from 2018 through 2023. + +```{r final-summary} hmda_combined <- bind_rows(hmda_crosswalked) |> ## data for years that are crosswalked have slightly different/additional columsn mutate( geoid = if_else(is.na(geoid), source_geoid, geoid)) |> select(-c(geography_name, source_geoid, vintage)) |> - arrange(geoid, data_year) - -# View the result -glimpse(hmda_combined) -``` - - -## Result: A Panel Dataset in 2020 Tract Definitions -We now have a single dataframe with all six years of HMDA data standardized to 2020 -tract definitions: + arrange(geoid, data_year) |> + mutate( + state = str_sub(geoid, 1, 2), + percent_race_white_purchase = count_race_white_purchase / count_owner_purchase_originations) -```{r final-summary} -# Unique tracts per year +## there's a little bit of variation year-to-year in terms of which tracts have +## reported HMDA data, but for the majority, we have observations in each of the +## six years: hmda_combined |> - group_by(data_year) |> - summarize(n_tracts = n_distinct(geoid)) + count(geoid) |> + count(n) ``` - -