ketchbrookanalytics · mthomas-ketchbrook · Jan 11, 2026 · Dec 31, 2025 · Dec 31, 2025 · Dec 31, 2025
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -1,22 +1,18 @@
 {
-    "image": "rocker/r2u",
+    "name": "migrate R package",
+    "image": "rocker/r-ver:4",
 
     // Features to add to the dev container. More info: https://containers.dev/features.
 	"features": {
 
-        // Install system library for `devtools::document()`
-        // More info: https://github.com/rocker-org/devcontainer-features/blob/main/src/quarto-cli/README.md#install-chromium
-		"ghcr.io/rocker-org/devcontainer-features/apt-packages:1": {
+        "ghcr.io/rocker-org/devcontainer-features/apt-packages:1": {
             "packages": "libxml2-dev, qpdf"
         },
 
-        // Install pandoc (for building vignettes)
-        "ghcr.io/rocker-org/devcontainer-features/pandoc:1": {},
-
 		// Install additional R package dependencies
 		"ghcr.io/rocker-org/devcontainer-features/r-packages:1": {
-			"packages": "testthat, knitr, rmarkdown, dplyr, tidyr, tibble, rlang, utils, magrittr, devtools, usethis, testthat",
-			"additionalRepositories": "CRAN = 'https://packagemanager.posit.co/cran/__linux__/jammy/latest'"
+			"packages": "devtools, dplyr, github::nx10/httpgd, knitr, languageserver, magrittr, rlang, rmarkdown, testthat, tibble, tidyr, usethis",
+			"installSystemRequirements": true
 		}
 
 	},
@@ -25,13 +21,13 @@
         // Settings for VS Code.
         "vscode": {
             "extensions": [
-                "reditorsupport.r"
+                "reditorsupport.r",
+                "RDebugger.r-debugger"
             ],
             "settings": {
+                "editor.rulers": [80],
                 "r.bracketedPaste": true,
-                "r.plot.useHttpgd": true,
-                "r.lsp.diagnostics": false,
-                "r.lsp.promptToInstall": false
+                "r.plot.useHttpgd": true
             }
         }
     }

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -14,7 +14,11 @@ Authors@R:
       person(given = "Ivan",
              family = "Millanes",
              role = "ctb",
-             email = "imillanes@ketchbrookanalytics.com"))
+             email = "imillanes@ketchbrookanalytics.com"),
+      person(given = "Dylan",
+             family = "Hughes",
+             role = "ctb",
+             email = "dhughes@ketchbrookanalytics.com"))
 Description: Tools to help convert credit risk data at two timepoints
     into traditional credit state migration (aka, "transition") matrices.
     At a higher level, 'migrate' is intended to help an analyst understand
@@ -44,5 +48,5 @@ Imports:
 VignetteBuilder:
     knitr
 Language: en-US
-RoxygenNote: 7.3.1
+RoxygenNote: 7.3.3
 Roxygen: list(markdown = TRUE)
diff --git a/R/migrate.R b/R/migrate.R
@@ -72,8 +72,15 @@ coerce_factor <- function(data, state_name) {
     # to convert it to an ordered factor
     if (!is.ordered(state_vec)) {
 
+      msg <- glue::glue(
+        "Please consider converting `{ state_name }` to an ordered factor",
+        "before passing it to `migrate()` to ensure that the rank-ordering in",
+        "the final matrix displays correctly",
+        .sep = " "
+      )
+
       cli::cli_warn(
-        c("!" = glue::glue("Please consider converting `{ state_name }` to an ordered factor before passing it to `migrate()` to ensure that the rank-ordering in the final matrix displays correctly"))
+        c("!" = msg)
       )
 
     }
@@ -87,7 +94,12 @@ coerce_factor <- function(data, state_name) {
     cli::cli_warn(
       c(
         "!" = glue::glue("Converting `{ state_name }` to type `factor`"),
-        "!" = glue::glue("To ensure that your output is ordered correctly, convert the `{ state_name }` column variable in your data frame to an ordered factor before passing to `migrate()`")
+        "i" = glue::glue(
+          "To ensure that your output is ordered correctly, convert the",
+          "`{ state_name }` column variable in your data frame to an ordered",
+          "factor before passing to `migrate()`",
+          .sep = " "
+        )
       )
     )
 
@@ -103,6 +115,7 @@ coerce_factor <- function(data, state_name) {
 
 
 # Stop execution if there aren't exactly 2 unique time values in the data
+# Warn that character time values may sort incorrectly (e.g., pre_, post_)
 check_times <- function(times, time_name) {
 
   if (length(times) != 2) {
@@ -118,6 +131,19 @@ check_times <- function(times, time_name) {
 
   }
 
+  if (is.character(times)) {
+
+    cli::cli_warn(
+      c("!" = glue::glue(
+        "Please consider converting `{ time_name }` to an ordered factor",
+        "before passing it to `migrate()` to ensure that transition is",
+        "appropriately chronological",
+        .sep = " "
+      ))
+    )
+
+  }
+
 }
 
 
@@ -144,7 +170,11 @@ drop_missing_timepoints <- function(data) {
     tidyr::drop_na()
 
   cli::cli_warn(
-    c("!" = glue::glue("Removed { (nrow(data) - nrow(out)) } observations due to missingness or IDs only existing at one `time` value"))
+    c("!" = glue::glue(
+      "Removed { (nrow(data) - nrow(out)) } observations due to missingness or",
+      "IDs only existing at one `time` value",
+      .sep = " "
+    ))
   )
 
   return(out)
@@ -186,7 +216,7 @@ migrate_percent <- function(data, state_start_name, metric_name) {
       "{metric_name}" := .data[[metric_name]] / sum(.data[[metric_name]])
     ) |>
     dplyr::ungroup() |>
-    # Replace `NaN` values with `Inf` so that they are not dropped with `drop_na()`
+    # Replace `NaN` values with `Inf` so that they're not dropped by `drop_na()`
     dplyr::mutate(
       "{metric_name}" := ifelse(
         is.nan(.data[[metric_name]]),
@@ -313,7 +343,9 @@ migrate <- function(data, id, time, state,
     # Stop if the `metric` variable isn't numeric
     if (!is.numeric(data[[metric_name]])) {
 
-      cli::cli_abort("`metric` argument must be a numeric type variable in `data`")
+      cli::cli_abort(
+        "`metric` argument must be a numeric type variable in `data`"
+      )
 
     }
 
@@ -360,10 +392,24 @@ migrate <- function(data, id, time, state,
 
       # Inform the user
       cli::cli_div(theme = list(ul = list(`margin-left` = 2, before = "")))
-      cli::cli_alert_info(glue::glue("{ n_missing } IDs have a missing timepoint:"))
+      cli::cli_alert_info(
+        glue::glue("{ n_missing } IDs have a missing timepoint:")
+      )
       cli::cli_ul(id = "ul_id")
-        cli::cli_li(glue::glue("Migrating { n_missing_end } IDs with missing end timepoint to { fill_state_class_type } class '{ fill_state }'"))
-        cli::cli_li(glue::glue("Migrating { n_missing_start } IDs with missing start timepoint from { fill_state_class_type } class '{ fill_state }'"))
+      cli::cli_li(
+        glue::glue(
+          "Migrating { n_missing_end } IDs with missing end timepoint to",
+          "{ fill_state_class_type } class '{ fill_state }'",
+          .sep = " "
+        )
+      )
+      cli::cli_li(
+        glue::glue(
+          "Migrating { n_missing_start } IDs with missing start timepoint from",
+          "{ fill_state_class_type } class '{ fill_state }'",
+          .sep = " "
+        )
+      )
       cli::cli_end(id = "ul_id")
 
     }
@@ -416,14 +462,16 @@ migrate <- function(data, id, time, state,
   }
 
   # Replace the time values in the column names with "start" and "end"
+  # Anchoring each pattern to the end of the column names to
+  # accommodate overlapping patterns (e.g., M1 and M12)
   colnames(data) <- gsub(
-    pattern = as.character(times[1]),
+    pattern = paste0(as.character(times[1]), "$"),
     replacement = "start",
     x = colnames(data)
   )
 
   colnames(data) <- gsub(
-    pattern = as.character(times[2]),
+    pattern = paste0(as.character(times[2]), "$"),
     replacement = "end",
     x = colnames(data)
   )

diff --git a/README.Rmd b/README.Rmd
@@ -61,16 +61,19 @@ devtools::install_github("ketchbrookanalytics/migrate")
 
 ## Practical Usage
 
-{migrate} currently only handles transitions between exactly two (2) timepoints. Under the hood, `migrate()` finds the earliest & latest dates in the given *time* variable, and filters out any observations where the *time* value does not match those two dates.
+{migrate} currently only handles transitions between exactly two (2) timepoints. Under the hood, `migrate()` finds the earliest & latest timepoints in the given *time* variable, and filters out any observations where the *time* value does not match those two periods.
 
-If you are writing a SQL query to get data to be used with `migrate()`, the query would likely look something like this:
+`migrate()` identifies the desired timepoints in the *time* variable by isolating the unique values and sorting. As a result, `migrate()` can accommodate a variety of data types. However, `date` or `datetime` data types are likely the most convenient to work with.
+
+While most data types will sort appropriately, if the *time* variable is type `character`, it is recommended to convert to type `factor` (ordered) before passing to `migrate()` to ensure the *time* values are properly sequenced. `migrate()` will throw a warning if the *time* variable is type `character`.
 
-```{r, eval = FALSE}
-# -- Get the *State* risk status and *Balance* dollar amount for each ID, at two distinct dates
+If you are writing a SQL query to get data to be used with `migrate()`, the query would likely look something like this:
 
-# SELECT ID, Date, State, Balance
-# FROM my_database
-# WHERE Date IN ('2020-12-31', '2021-06-30')
+```sql
+-- Get the *State* risk status and *Balance* dollar amount for each ID, at two distinct dates
+SELECT ID, Date, State, Balance
+FROM my_database
+WHERE Date IN ('2020-12-31', '2021-06-30')
 ```
 
 By default, `migrate()` drops observations that belong to IDs found at a single timepoint. However, users can define a *filler state* so that IDs with a single timepoint are not removed but rather migrated from or to this *filler state*. This allows for more flexible handling of such data, ensuring that no information is lost during the migration process. Check [Handle IDs with observations at a single timepoint](https://ketchbrookanalytics.github.io/migrate/articles/migrate.html#handle-ids-with-observations-at-a-single-timepoint) for more information.

diff --git a/README.md b/README.md
@@ -68,18 +68,28 @@ devtools::install_github("ketchbrookanalytics/migrate")
 
 {migrate} currently only handles transitions between exactly two (2)
 timepoints. Under the hood, `migrate()` finds the earliest & latest
-dates in the given *time* variable, and filters out any observations
-where the *time* value does not match those two dates.
+timepoints in the given *time* variable, and filters out any
+observations where the *time* value does not match those two periods.
+
+`migrate()` identifies the desired timepoints in the *time* variable by
+isolating the unique values and sorting. As a result, `migrate()` can
+accommodate a variety of data types. However, `date` or `datetime` data
+types are likely the most convenient to work with.
+
+While most data types will sort appropriately, if the *time* variable is
+type `character`, it is recommended to convert to type `factor`
+(ordered) before passing to `migrate()` to ensure the *time* values are
+properly sequenced. `migrate()` will throw a warning if the *time*
+variable is type `character`.
 
 If you are writing a SQL query to get data to be used with `migrate()`,
 the query would likely look something like this:
 
-``` r
-# -- Get the *State* risk status and *Balance* dollar amount for each ID, at two distinct dates
-
-# SELECT ID, Date, State, Balance
-# FROM my_database
-# WHERE Date IN ('2020-12-31', '2021-06-30')
+``` sql
+-- Get the *State* risk status and *Balance* dollar amount for each ID, at two distinct dates
+SELECT ID, Date, State, Balance
+FROM my_database
+WHERE Date IN ('2020-12-31', '2021-06-30')
 ```
 
 By default, `migrate()` drops observations that belong to IDs found at a
@@ -137,9 +147,6 @@ migrated_df <- migrate(
   state = risk_rating,
 )
 #> ℹ Migrating from 2020-06-30 to 2020-09-30
-```
-
-``` r
 head(migrated_df)
 #> # A tibble: 6 × 3
 #>   risk_rating_start risk_rating_end   prop