From 638812d811416a7429c266372669c17508da2389 Mon Sep 17 00:00:00 2001 From: Dylan Hughes <115112140+dmhhughes@users.noreply.github.com> Date: Wed, 31 Dec 2025 06:48:24 -0600 Subject: [PATCH 01/17] Enforce stricter `time` value pattern matching Mitigating scenarios where column names contain one another (e.g., M1 and M12) --- R/migrate.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/migrate.R b/R/migrate.R index 07daa3b..eebf138 100644 --- a/R/migrate.R +++ b/R/migrate.R @@ -416,14 +416,16 @@ migrate <- function(data, id, time, state, } # Replace the time values in the column names with "start" and "end" + # Anchoring each pattern to the end of the column names to + # accommodate overlapping patterns (e.g., M1 and M12) colnames(data) <- gsub( - pattern = as.character(times[1]), + pattern = paste0(as.character(times[1]), "$"), replacement = "start", x = colnames(data) ) colnames(data) <- gsub( - pattern = as.character(times[2]), + pattern = paste0(as.character(times[2]), "$"), replacement = "end", x = colnames(data) ) From 7b21b716e989a85c85af86e76491982c043142a2 Mon Sep 17 00:00:00 2001 From: Dylan Hughes <115112140+dmhhughes@users.noreply.github.com> Date: Wed, 31 Dec 2025 06:59:59 -0600 Subject: [PATCH 02/17] Add warning for character `time` values --- R/migrate.R | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/R/migrate.R b/R/migrate.R index eebf138..40d5f93 100644 --- a/R/migrate.R +++ b/R/migrate.R @@ -103,6 +103,7 @@ coerce_factor <- function(data, state_name) { # Stop execution if there aren't exactly 2 unique time values in the data +# Warn that character time values may sort incorrectly (e.g., pre_, post_) check_times <- function(times, time_name) { if (length(times) != 2) { @@ -118,6 +119,12 @@ check_times <- function(times, time_name) { } + if (is.character(times)) { + cli::cli_warn( + c("!" = glue::glue("Please consider converting `{ time }` to an ordered factor before passing it to `migrate()` to ensure that the timepoint ordering in the final matrix displays correctly")) + ) + } + } From df33890f6b869d3e16da6e306b7e31833d80a5b8 Mon Sep 17 00:00:00 2001 From: Dylan Hughes <115112140+dmhhughes@users.noreply.github.com> Date: Wed, 31 Dec 2025 08:31:44 -0600 Subject: [PATCH 03/17] Modify variable name for character `time` warning --- R/migrate.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/migrate.R b/R/migrate.R index 40d5f93..fab4513 100644 --- a/R/migrate.R +++ b/R/migrate.R @@ -120,9 +120,11 @@ check_times <- function(times, time_name) { } if (is.character(times)) { + cli::cli_warn( - c("!" = glue::glue("Please consider converting `{ time }` to an ordered factor before passing it to `migrate()` to ensure that the timepoint ordering in the final matrix displays correctly")) + c("!" = glue::glue("Please consider converting `{ time_name }` to an ordered factor before passing it to `migrate()` to ensure that the timepoint ordering in the final matrix displays correctly")) ) + } } From 950cd4937c13ddef632b11cfb233b9e61f0a9471 Mon Sep 17 00:00:00 2001 From: Dylan Hughes <115112140+dmhhughes@users.noreply.github.com> Date: Wed, 31 Dec 2025 08:38:07 -0600 Subject: [PATCH 04/17] Add character -type `time` tests --- tests/testthat/test-migrate.R | 81 ++++++++++++++++++++++++++++------- 1 file changed, 65 insertions(+), 16 deletions(-) diff --git a/tests/testthat/test-migrate.R b/tests/testthat/test-migrate.R index 95fd0d6..35264cd 100644 --- a/tests/testthat/test-migrate.R +++ b/tests/testthat/test-migrate.R @@ -315,6 +315,7 @@ test_that("migrate() throws an error if `metric` argument is not numeric column" }) + test_that("migrate() correctly names third column based upon `metric` argument", { # when `percent = TRUE` (default) @@ -360,6 +361,7 @@ test_that("migrate() correctly names third column based upon `metric` argument", }) + test_that("migrate() coerces 'character'-type `state` columns to type 'factor'", { suppressWarnings({ @@ -389,13 +391,60 @@ test_that("migrate() coerces 'character'-type `state` columns to type 'factor'", }) + +# Mutate `date` to 'character'-type +mock_credit_time_character <- mock_credit |> + dplyr::mutate( + date = dplyr::case_when( + date == as.Date("2020-06-30") ~ "quarter_3", + date == as.Date("2020-09-30") ~ "quarter_4" + ) + ) + + +testthat::test_that("migrate() names 'character'-type `time` columns correctly", { + + df_time_character <- suppressWarnings({ + migrate( + data = mock_credit_time_character, + time = date, + state = risk_rating, + id = customer_id, + verbose = FALSE + ) + }) + + testthat::expect_identical( + raw_ct, + df_time_character + ) + +}) + + +testthat::test_that("migrate() throws a warning if `time` variable is 'character`-type", { + + # suggest converting character to ordered factor + testthat::expect_warning( + migrate(mock_credit_time_overlap, + time = date, + state = risk_rating, + id = customer_id, + verbose = FALSE + ), + regexp = "Please consider converting `date` to an ordered factor" + ) + +}) + + ## Tests for `fill_state` argument --------------------------------------- # Create mock data with `customer_id` values that only exist at one timepoint. # In particular, `mock_credit_with_missing` has: # - 20 customers that have a value only in the first timepoint # - 10 customers that have a value only in the second timepoint -mock_credit_with_missing <- mock_credit |> +mock_credit_with_missing <- mock_credit |> # Remove the first 10 rows dplyr::slice(-(1:10)) |> # Remove the last 20 rows @@ -410,8 +459,8 @@ test_that("migrate() doesn't remove customers with missing timepoints when `fill id = customer_id, percent = FALSE, verbose = FALSE - ) |> - dplyr::pull(count) |> + ) |> + dplyr::pull(count) |> sum() migrate_counts_with_missing <- migrate( @@ -422,8 +471,8 @@ test_that("migrate() doesn't remove customers with missing timepoints when `fill percent = FALSE, fill_state = "NR", verbose = FALSE - ) |> - dplyr::pull(count) |> + ) |> + dplyr::pull(count) |> sum() expect_equal(migrate_counts_without_missing, migrate_counts_with_missing) @@ -440,10 +489,10 @@ test_that("migrate() removes customers with missing timepoints when `fill_state` id = customer_id, percent = FALSE, verbose = FALSE - ) |> - dplyr::pull(count) |> + ) |> + dplyr::pull(count) |> sum() - }) + }) migrate_counts_with_fill_state <- migrate( data = mock_credit_with_missing, @@ -453,8 +502,8 @@ test_that("migrate() removes customers with missing timepoints when `fill_state` percent = FALSE, fill_state = "NR", verbose = FALSE - ) |> - dplyr::pull(count) |> + ) |> + dplyr::pull(count) |> sum() expect_true(migrate_counts_without_fill_state < migrate_counts_with_fill_state) @@ -503,14 +552,14 @@ test_that("migrate() assigns filler state correctly when `fill_state` is not NUL verbose = FALSE ) - n_missing_start <- migrated_data |> - dplyr::count(risk_rating_start, wt = count) |> - dplyr::filter(risk_rating_start == "NR") |> + n_missing_start <- migrated_data |> + dplyr::count(risk_rating_start, wt = count) |> + dplyr::filter(risk_rating_start == "NR") |> dplyr::pull(n) - n_missing_end <- migrated_data |> - dplyr::count(risk_rating_end, wt = count) |> - dplyr::filter(risk_rating_end == "NR") |> + n_missing_end <- migrated_data |> + dplyr::count(risk_rating_end, wt = count) |> + dplyr::filter(risk_rating_end == "NR") |> dplyr::pull(n) # Recall that `mock_credit_with_missing` removed the first 10 and the last 20 rows From dddc3f2355a6fd66dc0b46d66ea5160631c1dc41 Mon Sep 17 00:00:00 2001 From: Dylan Hughes <115112140+dmhhughes@users.noreply.github.com> Date: Wed, 31 Dec 2025 10:38:38 -0600 Subject: [PATCH 05/17] Add type discussion for `time` variable to README --- README.Rmd | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.Rmd b/README.Rmd index 725faa2..d424047 100644 --- a/README.Rmd +++ b/README.Rmd @@ -61,7 +61,11 @@ devtools::install_github("ketchbrookanalytics/migrate") ## Practical Usage -{migrate} currently only handles transitions between exactly two (2) timepoints. Under the hood, `migrate()` finds the earliest & latest dates in the given *time* variable, and filters out any observations where the *time* value does not match those two dates. +{migrate} currently only handles transitions between exactly two (2) timepoints. Under the hood, `migrate()` finds the earliest & latest timepoints in the given *time* variable, and filters out any observations where the *time* value does not match those two periods. + +`migrate()` finds the earliest and latest timepoints in the given *time* variable by isolating the unique values and sorting. As a result, `migrate` can accommodate a variety of data types, although `date` data types are likely the most convenient to work with. + +While most data types will sort appropriately, if the *time* variable is type `character`, it is recommended to use convert to type `factor` (ordered) instead to ensure the *time* values are properly sequenced. `migrate()` will throw a warning if the *time* variable is type `character`. If you are writing a SQL query to get data to be used with `migrate()`, the query would likely look something like this: From b8912cbc6ae9486bc5f30c294ab5d3b37e44bf3a Mon Sep 17 00:00:00 2001 From: Dylan Hughes <115112140+dmhhughes@users.noreply.github.com> Date: Tue, 6 Jan 2026 11:03:37 -0600 Subject: [PATCH 06/17] Revise type discussion for `time` variable in README --- README.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.Rmd b/README.Rmd index d424047..6c0c7e0 100644 --- a/README.Rmd +++ b/README.Rmd @@ -63,7 +63,7 @@ devtools::install_github("ketchbrookanalytics/migrate") {migrate} currently only handles transitions between exactly two (2) timepoints. Under the hood, `migrate()` finds the earliest & latest timepoints in the given *time* variable, and filters out any observations where the *time* value does not match those two periods. -`migrate()` finds the earliest and latest timepoints in the given *time* variable by isolating the unique values and sorting. As a result, `migrate` can accommodate a variety of data types, although `date` data types are likely the most convenient to work with. +`migrate()` identifies the desired timepoints in the *time* variable by isolating the unique values and sorting. As a result, `migrate()` can accommodate a variety of data types. However, `date` data types are likely the most convenient to work with. While most data types will sort appropriately, if the *time* variable is type `character`, it is recommended to use convert to type `factor` (ordered) instead to ensure the *time* values are properly sequenced. `migrate()` will throw a warning if the *time* variable is type `character`. From fe31101f3ac87525febbe8d0f718dd550cd9fe86 Mon Sep 17 00:00:00 2001 From: Dylan Hughes <115112140+dmhhughes@users.noreply.github.com> Date: Tue, 6 Jan 2026 11:08:31 -0600 Subject: [PATCH 07/17] Build README.md --- README.md | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index cd013a9..295c11b 100644 --- a/README.md +++ b/README.md @@ -68,8 +68,19 @@ devtools::install_github("ketchbrookanalytics/migrate") {migrate} currently only handles transitions between exactly two (2) timepoints. Under the hood, `migrate()` finds the earliest & latest -dates in the given *time* variable, and filters out any observations -where the *time* value does not match those two dates. +timepoints in the given *time* variable, and filters out any +observations where the *time* value does not match those two periods. + +`migrate()` identifies the desired timepoints in the *time* variable by +isolating the unique values and sorting. As a result, `migrate()` can +accommodate a variety of data types. However, `date` data types are +likely the most convenient to work with. + +While most data types will sort appropriately, if the *time* variable is +type `character`, it is recommended to use convert to type `factor` +(ordered) instead to ensure the *time* values are properly sequenced. +`migrate()` will throw a warning if the *time* variable is type +`character`. If you are writing a SQL query to get data to be used with `migrate()`, the query would likely look something like this: @@ -144,11 +155,11 @@ head(migrated_df) #> # A tibble: 6 × 3 #> risk_rating_start risk_rating_end prop #> -#> 1 AAA AAA 0.774 -#> 2 AAA AA 0.194 +#> 1 AAA AAA 0.774 +#> 2 AAA AA 0.194 #> 3 AAA A 0.0323 -#> 4 AAA BBB 0 -#> 5 AAA BB 0 +#> 4 AAA BBB 0 +#> 5 AAA BB 0 #> 6 AAA B 0 ``` From 7e0982278b40bb712b3bbacb60b9e29285bfa93c Mon Sep 17 00:00:00 2001 From: Dylan Hughes <115112140+dmhhughes@users.noreply.github.com> Date: Tue, 6 Jan 2026 12:01:36 -0600 Subject: [PATCH 08/17] Update test `df` reference --- tests/testthat/test-migrate.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-migrate.R b/tests/testthat/test-migrate.R index 35264cd..13c2c3b 100644 --- a/tests/testthat/test-migrate.R +++ b/tests/testthat/test-migrate.R @@ -426,7 +426,7 @@ testthat::test_that("migrate() throws a warning if `time` variable is 'character # suggest converting character to ordered factor testthat::expect_warning( - migrate(mock_credit_time_overlap, + migrate(mock_credit_time_character, time = date, state = risk_rating, id = customer_id, From f0c481a7aa38a9d2f4d080bb89dcccffd1a56fa0 Mon Sep 17 00:00:00 2001 From: Dylan Hughes <115112140+dmhhughes@users.noreply.github.com> Date: Tue, 6 Jan 2026 13:28:14 -0600 Subject: [PATCH 09/17] Update DESCRIPTION --- DESCRIPTION | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index dd077a4..74950f3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -14,7 +14,11 @@ Authors@R: person(given = "Ivan", family = "Millanes", role = "ctb", - email = "imillanes@ketchbrookanalytics.com")) + email = "imillanes@ketchbrookanalytics.com"), + person(given = "Dylan", + family = "Hughes", + role = "ctb", + email = "dhughes@ketchbrookanalytics.com")) Description: Tools to help convert credit risk data at two timepoints into traditional credit state migration (aka, "transition") matrices. At a higher level, 'migrate' is intended to help an analyst understand From 4992d67feaaea6d3c564f9113e33aa267781c57c Mon Sep 17 00:00:00 2001 From: Michael Thomas Date: Sat, 10 Jan 2026 23:00:58 -0500 Subject: [PATCH 10/17] Update warning language --- R/migrate.R | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/R/migrate.R b/R/migrate.R index fab4513..c683602 100644 --- a/R/migrate.R +++ b/R/migrate.R @@ -122,8 +122,13 @@ check_times <- function(times, time_name) { if (is.character(times)) { cli::cli_warn( - c("!" = glue::glue("Please consider converting `{ time_name }` to an ordered factor before passing it to `migrate()` to ensure that the timepoint ordering in the final matrix displays correctly")) - ) + c("!" = glue::glue( + "Please consider converting `{ time_name }` to an ordered factor", + "before passing it to `migrate()` to ensure that transition is", + "appropriately chronological", + .sep = " " + )) + ) } From 4ca21496fdce10defb4ad5427c1022d79bd8fbe5 Mon Sep 17 00:00:00 2001 From: Michael Thomas Date: Sat, 10 Jan 2026 23:02:37 -0500 Subject: [PATCH 11/17] Keep all code w/in 80 characters wide --- R/migrate.R | 48 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 40 insertions(+), 8 deletions(-) diff --git a/R/migrate.R b/R/migrate.R index c683602..98d55e5 100644 --- a/R/migrate.R +++ b/R/migrate.R @@ -72,8 +72,15 @@ coerce_factor <- function(data, state_name) { # to convert it to an ordered factor if (!is.ordered(state_vec)) { + msg <- glue::glue( + "Please consider converting `{ state_name }` to an ordered factor", + "before passing it to `migrate()` to ensure that the rank-ordering in", + "the final matrix displays correctly", + .sep = " " + ) + cli::cli_warn( - c("!" = glue::glue("Please consider converting `{ state_name }` to an ordered factor before passing it to `migrate()` to ensure that the rank-ordering in the final matrix displays correctly")) + c("!" = msg) ) } @@ -87,7 +94,12 @@ coerce_factor <- function(data, state_name) { cli::cli_warn( c( "!" = glue::glue("Converting `{ state_name }` to type `factor`"), - "!" = glue::glue("To ensure that your output is ordered correctly, convert the `{ state_name }` column variable in your data frame to an ordered factor before passing to `migrate()`") + "i" = glue::glue( + "To ensure that your output is ordered correctly, convert the", + "`{ state_name }` column variable in your data frame to an ordered", + "factor before passing to `migrate()`", + .sep = " " + ) ) ) @@ -158,7 +170,11 @@ drop_missing_timepoints <- function(data) { tidyr::drop_na() cli::cli_warn( - c("!" = glue::glue("Removed { (nrow(data) - nrow(out)) } observations due to missingness or IDs only existing at one `time` value")) + c("!" = glue::glue( + "Removed { (nrow(data) - nrow(out)) } observations due to missingness or", + "IDs only existing at one `time` value", + .sep = " " + )) ) return(out) @@ -200,7 +216,7 @@ migrate_percent <- function(data, state_start_name, metric_name) { "{metric_name}" := .data[[metric_name]] / sum(.data[[metric_name]]) ) |> dplyr::ungroup() |> - # Replace `NaN` values with `Inf` so that they are not dropped with `drop_na()` + # Replace `NaN` values with `Inf` so that they're not dropped by `drop_na()` dplyr::mutate( "{metric_name}" := ifelse( is.nan(.data[[metric_name]]), @@ -327,7 +343,9 @@ migrate <- function(data, id, time, state, # Stop if the `metric` variable isn't numeric if (!is.numeric(data[[metric_name]])) { - cli::cli_abort("`metric` argument must be a numeric type variable in `data`") + cli::cli_abort( + "`metric` argument must be a numeric type variable in `data`" + ) } @@ -374,10 +392,24 @@ migrate <- function(data, id, time, state, # Inform the user cli::cli_div(theme = list(ul = list(`margin-left` = 2, before = ""))) - cli::cli_alert_info(glue::glue("{ n_missing } IDs have a missing timepoint:")) + cli::cli_alert_info( + glue::glue("{ n_missing } IDs have a missing timepoint:") + ) cli::cli_ul(id = "ul_id") - cli::cli_li(glue::glue("Migrating { n_missing_end } IDs with missing end timepoint to { fill_state_class_type } class '{ fill_state }'")) - cli::cli_li(glue::glue("Migrating { n_missing_start } IDs with missing start timepoint from { fill_state_class_type } class '{ fill_state }'")) + cli::cli_li( + glue::glue( + "Migrating { n_missing_end } IDs with missing end timepoint to", + "{ fill_state_class_type } class '{ fill_state }'", + .sep = " " + ) + ) + cli::cli_li( + glue::glue( + "Migrating { n_missing_start } IDs with missing start timepoint from", + "{ fill_state_class_type } class '{ fill_state }'", + .sep = " " + ) + ) cli::cli_end(id = "ul_id") } From 47b2966e028875a24e7173d0f82da3b5912b01fe Mon Sep 17 00:00:00 2001 From: Michael Thomas Date: Sat, 10 Jan 2026 23:09:45 -0500 Subject: [PATCH 12/17] Make arg naming consistent with prior test --- tests/testthat/test-migrate.R | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tests/testthat/test-migrate.R b/tests/testthat/test-migrate.R index 13c2c3b..e1162b1 100644 --- a/tests/testthat/test-migrate.R +++ b/tests/testthat/test-migrate.R @@ -426,12 +426,13 @@ testthat::test_that("migrate() throws a warning if `time` variable is 'character # suggest converting character to ordered factor testthat::expect_warning( - migrate(mock_credit_time_character, - time = date, - state = risk_rating, - id = customer_id, - verbose = FALSE - ), + migrate( + data = mock_credit_time_character, + time = date, + state = risk_rating, + id = customer_id, + verbose = FALSE + ), regexp = "Please consider converting `date` to an ordered factor" ) From 557df1da2d7794c56a4d2174810c1b04474c5232 Mon Sep 17 00:00:00 2001 From: Michael Thomas Date: Sat, 10 Jan 2026 23:32:28 -0500 Subject: [PATCH 13/17] Minor README updates --- README.Rmd | 15 +++++++-------- README.md | 34 +++++++++++++++------------------- 2 files changed, 22 insertions(+), 27 deletions(-) diff --git a/README.Rmd b/README.Rmd index 6c0c7e0..76e8348 100644 --- a/README.Rmd +++ b/README.Rmd @@ -63,18 +63,17 @@ devtools::install_github("ketchbrookanalytics/migrate") {migrate} currently only handles transitions between exactly two (2) timepoints. Under the hood, `migrate()` finds the earliest & latest timepoints in the given *time* variable, and filters out any observations where the *time* value does not match those two periods. -`migrate()` identifies the desired timepoints in the *time* variable by isolating the unique values and sorting. As a result, `migrate()` can accommodate a variety of data types. However, `date` data types are likely the most convenient to work with. +`migrate()` identifies the desired timepoints in the *time* variable by isolating the unique values and sorting. As a result, `migrate()` can accommodate a variety of data types. However, `date` or `datetime` data types are likely the most convenient to work with. -While most data types will sort appropriately, if the *time* variable is type `character`, it is recommended to use convert to type `factor` (ordered) instead to ensure the *time* values are properly sequenced. `migrate()` will throw a warning if the *time* variable is type `character`. +While most data types will sort appropriately, if the *time* variable is type `character`, it is recommended to convert to type `factor` (ordered) before passing to `migrate()` to ensure the *time* values are properly sequenced. `migrate()` will throw a warning if the *time* variable is type `character`. If you are writing a SQL query to get data to be used with `migrate()`, the query would likely look something like this: -```{r, eval = FALSE} -# -- Get the *State* risk status and *Balance* dollar amount for each ID, at two distinct dates - -# SELECT ID, Date, State, Balance -# FROM my_database -# WHERE Date IN ('2020-12-31', '2021-06-30') +```sql +-- Get the *State* risk status and *Balance* dollar amount for each ID, at two distinct dates +SELECT ID, Date, State, Balance +FROM my_database +WHERE Date IN ('2020-12-31', '2021-06-30') ``` By default, `migrate()` drops observations that belong to IDs found at a single timepoint. However, users can define a *filler state* so that IDs with a single timepoint are not removed but rather migrated from or to this *filler state*. This allows for more flexible handling of such data, ensuring that no information is lost during the migration process. Check [Handle IDs with observations at a single timepoint](https://ketchbrookanalytics.github.io/migrate/articles/migrate.html#handle-ids-with-observations-at-a-single-timepoint) for more information. diff --git a/README.md b/README.md index 295c11b..532fb47 100644 --- a/README.md +++ b/README.md @@ -73,24 +73,23 @@ observations where the *time* value does not match those two periods. `migrate()` identifies the desired timepoints in the *time* variable by isolating the unique values and sorting. As a result, `migrate()` can -accommodate a variety of data types. However, `date` data types are -likely the most convenient to work with. +accommodate a variety of data types. However, `date` or `datetime` data +types are likely the most convenient to work with. While most data types will sort appropriately, if the *time* variable is -type `character`, it is recommended to use convert to type `factor` -(ordered) instead to ensure the *time* values are properly sequenced. -`migrate()` will throw a warning if the *time* variable is type -`character`. +type `character`, it is recommended to convert to type `factor` +(ordered) before passing to `migrate()` to ensure the *time* values are +properly sequenced. `migrate()` will throw a warning if the *time* +variable is type `character`. If you are writing a SQL query to get data to be used with `migrate()`, the query would likely look something like this: -``` r -# -- Get the *State* risk status and *Balance* dollar amount for each ID, at two distinct dates - -# SELECT ID, Date, State, Balance -# FROM my_database -# WHERE Date IN ('2020-12-31', '2021-06-30') +``` sql +-- Get the *State* risk status and *Balance* dollar amount for each ID, at two distinct dates +SELECT ID, Date, State, Balance +FROM my_database +WHERE Date IN ('2020-12-31', '2021-06-30') ``` By default, `migrate()` drops observations that belong to IDs found at a @@ -148,18 +147,15 @@ migrated_df <- migrate( state = risk_rating, ) #> ℹ Migrating from 2020-06-30 to 2020-09-30 -``` - -``` r head(migrated_df) #> # A tibble: 6 × 3 #> risk_rating_start risk_rating_end prop #> -#> 1 AAA AAA 0.774 -#> 2 AAA AA 0.194 +#> 1 AAA AAA 0.774 +#> 2 AAA AA 0.194 #> 3 AAA A 0.0323 -#> 4 AAA BBB 0 -#> 5 AAA BB 0 +#> 4 AAA BBB 0 +#> 5 AAA BB 0 #> 6 AAA B 0 ``` From ccc64a45ebf48c04c089d69ed8b058254eb8a405 Mon Sep 17 00:00:00 2001 From: Michael Thomas Date: Sat, 10 Jan 2026 23:34:50 -0500 Subject: [PATCH 14/17] Use latest roxygen version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 74950f3..b7e4605 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -48,5 +48,5 @@ Imports: VignetteBuilder: knitr Language: en-US -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.3 Roxygen: list(markdown = TRUE) From 093c41cede98cf7200020dfff7ab5e0052d9edca Mon Sep 17 00:00:00 2001 From: Michael Thomas Date: Sat, 10 Jan 2026 23:38:20 -0500 Subject: [PATCH 15/17] Clean up devcontainer to align with our approach in {ffiec} --- .devcontainer/devcontainer.json | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 90df33d..c1b1326 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,22 +1,18 @@ { - "image": "rocker/r2u", + "name": "migrate R package", + "image": "rocker/r-ver:4", // Features to add to the dev container. More info: https://containers.dev/features. "features": { - // Install system library for `devtools::document()` - // More info: https://github.com/rocker-org/devcontainer-features/blob/main/src/quarto-cli/README.md#install-chromium - "ghcr.io/rocker-org/devcontainer-features/apt-packages:1": { + "ghcr.io/rocker-org/devcontainer-features/apt-packages:1": { "packages": "libxml2-dev, qpdf" }, - // Install pandoc (for building vignettes) - "ghcr.io/rocker-org/devcontainer-features/pandoc:1": {}, - // Install additional R package dependencies "ghcr.io/rocker-org/devcontainer-features/r-packages:1": { - "packages": "testthat, knitr, rmarkdown, dplyr, tidyr, tibble, rlang, utils, magrittr, devtools, usethis, testthat", - "additionalRepositories": "CRAN = 'https://packagemanager.posit.co/cran/__linux__/jammy/latest'" + "packages": "devtools, dplyr, github::nx10/httpgd, knitr, languageserver, magrittr, rlang, rmarkdown, testthat, tibble, tidyr, usethis", + "installSystemRequirements": true } }, @@ -25,13 +21,13 @@ // Settings for VS Code. "vscode": { "extensions": [ - "reditorsupport.r" + "reditorsupport.r", + "RDebugger.r-debugger" ], "settings": { + "editor.rulers": [80], "r.bracketedPaste": true, - "r.plot.useHttpgd": true, - "r.lsp.diagnostics": false, - "r.lsp.promptToInstall": false + "r.plot.useHttpgd": true } } } From 11749aa8d432420b93153b35a235e21ec780b385 Mon Sep 17 00:00:00 2001 From: Michael Thomas Date: Sat, 10 Jan 2026 23:49:28 -0500 Subject: [PATCH 16/17] Align test w/ issue example "quarter_3" and "quarter_4" doesn't address the overlapping characters issue described in #21 --- tests/testthat/test-migrate.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/testthat/test-migrate.R b/tests/testthat/test-migrate.R index e1162b1..9533e6f 100644 --- a/tests/testthat/test-migrate.R +++ b/tests/testthat/test-migrate.R @@ -395,9 +395,9 @@ test_that("migrate() coerces 'character'-type `state` columns to type 'factor'", # Mutate `date` to 'character'-type mock_credit_time_character <- mock_credit |> dplyr::mutate( - date = dplyr::case_when( - date == as.Date("2020-06-30") ~ "quarter_3", - date == as.Date("2020-09-30") ~ "quarter_4" + time_overlap_chars = dplyr::case_when( + date == as.Date("2020-06-30") ~ "M1", + date == as.Date("2020-09-30") ~ "M100" ) ) @@ -407,7 +407,7 @@ testthat::test_that("migrate() names 'character'-type `time` columns correctly", df_time_character <- suppressWarnings({ migrate( data = mock_credit_time_character, - time = date, + time = time_overlap_chars, state = risk_rating, id = customer_id, verbose = FALSE @@ -428,12 +428,12 @@ testthat::test_that("migrate() throws a warning if `time` variable is 'character testthat::expect_warning( migrate( data = mock_credit_time_character, - time = date, + time = time_overlap_chars, state = risk_rating, id = customer_id, verbose = FALSE ), - regexp = "Please consider converting `date` to an ordered factor" + regexp = "Please consider converting `time_overlap_chars` to an ordered factor" ) }) From 874392f53b2419a9a13b326d2fb2e03e6a3d0713 Mon Sep 17 00:00:00 2001 From: Michael Thomas Date: Sat, 10 Jan 2026 23:59:41 -0500 Subject: [PATCH 17/17] Increment version --- DESCRIPTION | 2 +- NEWS.md | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index b7e4605..c533c23 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: migrate Type: Package Title: Create Credit State Migration (Transition) Matrices -Version: 0.5.0 +Version: 0.5.1 Authors@R: c(person(given = "Michael", family = "Thomas", diff --git a/NEWS.md b/NEWS.md index 8ba537d..1dfcdb5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,10 @@ +# migrate 0.5.1 + +## Bug Fix + +* Remediates issue where `migrate()` would fail if values of `time` argument had overlapping characters (i.e., "T1" and "T100") + + `migrate()` now throws a warning if the argument passed to `time` is a character-type column + # migrate 0.5.0 ## Enhancements