From 7d6d48fed9ea98a898a0742272a13ba3711bcacc Mon Sep 17 00:00:00 2001 From: Julia Silge Date: Mon, 9 Mar 2026 10:58:14 -0600 Subject: [PATCH 1/4] Switch to native pipe --- R/pairwise_cor.R | 14 +++++++------- R/pairwise_count.R | 8 ++++---- R/pairwise_delta.R | 36 ++++++++++++++++++------------------ R/pairwise_dist.R | 18 +++++++++--------- R/pairwise_pmi.R | 10 +++++----- R/pairwise_similarity.R | 18 +++++++++--------- R/squarely.R | 14 +++++++------- R/widely.R | 20 ++++++++++---------- R/widely_hclust.R | 22 +++++++++++----------- R/widely_kmeans.R | 12 ++++++------ R/widely_svd.R | 10 +++++----- README.Rmd | 12 ++++++------ vignettes/intro.Rmd | 12 ++++++------ vignettes/united_nations.Rmd | 28 ++++++++++++++-------------- 14 files changed, 117 insertions(+), 117 deletions(-) diff --git a/R/pairwise_cor.R b/R/pairwise_cor.R index 198f4c2..239e952 100644 --- a/R/pairwise_cor.R +++ b/R/pairwise_cor.R @@ -20,16 +20,16 @@ #' library(dplyr) #' library(gapminder) #' -#' gapminder %>% +#' gapminder |> #' pairwise_cor(country, year, lifeExp) #' -#' gapminder %>% +#' gapminder |> #' pairwise_cor(country, year, lifeExp, sort = TRUE) #' #' # United Nations voting data #' if (require("unvotes", quietly = TRUE)) { -#' country_cors <- un_votes %>% -#' mutate(vote = as.numeric(vote)) %>% +#' country_cors <- un_votes |> +#' mutate(vote = as.numeric(vote)) |> #' pairwise_cor(country, rcid, vote, sort = TRUE) #' } #' @@ -68,8 +68,8 @@ pairwise_cor_ <- function(tbl, item, feature, value, } cor_func <- squarely_(f, sparse = sparse, ...) - tbl %>% - ungroup() %>% - cor_func(item, feature, value) %>% + tbl |> + ungroup() |> + cor_func(item, feature, value) |> rename(correlation = value) } diff --git a/R/pairwise_count.R b/R/pairwise_count.R index 9c8de60..15e825e 100644 --- a/R/pairwise_count.R +++ b/R/pairwise_count.R @@ -51,9 +51,9 @@ pairwise_count_ <- function(tbl, item, feature, wt = NULL, ...) { func <- squarely_(function(m) m %*% t(m > 0), sparse = TRUE, ...) } - tbl %>% - distinct(.data[[item]], .data[[feature]], .keep_all = TRUE) %>% - mutate(..value = 1) %>% - func(item, feature, wt) %>% + tbl |> + distinct(.data[[item]], .data[[feature]], .keep_all = TRUE) |> + mutate(..value = 1) |> + func(item, feature, wt) |> rename(n = value) } diff --git a/R/pairwise_delta.R b/R/pairwise_delta.R index eaff548..0e01d3b 100644 --- a/R/pairwise_delta.R +++ b/R/pairwise_delta.R @@ -20,32 +20,32 @@ #' library(tidytext) #' #' # closest documents in terms of 1000 most frequent words -#' closest <- austen_books() %>% -#' unnest_tokens(word, text) %>% -#' count(book, word) %>% -#' top_n(1000, n) %>% -#' pairwise_delta(book, word, n, method = "burrows") %>% +#' closest <- austen_books() |> +#' unnest_tokens(word, text) |> +#' count(book, word) |> +#' top_n(1000, n) |> +#' pairwise_delta(book, word, n, method = "burrows") |> #' arrange(delta) #' #' closest #' -#' closest %>% +#' closest |> #' filter(item1 == "Pride & Prejudice") #' #' # to remove duplicates, use upper = FALSE -#' closest <- austen_books() %>% -#' unnest_tokens(word, text) %>% -#' count(book, word) %>% -#' top_n(1000, n) %>% -#' pairwise_delta(book, word, n, method = "burrows", upper = FALSE) %>% +#' closest <- austen_books() |> +#' unnest_tokens(word, text) |> +#' count(book, word) |> +#' top_n(1000, n) |> +#' pairwise_delta(book, word, n, method = "burrows", upper = FALSE) |> #' arrange(delta) #' #' # Can also use Argamon's Linear Delta -#' closest <- austen_books() %>% -#' unnest_tokens(word, text) %>% -#' count(book, word) %>% -#' top_n(1000, n) %>% -#' pairwise_delta(book, word, n, method = "argamon", upper = FALSE) %>% +#' closest <- austen_books() |> +#' unnest_tokens(word, text) |> +#' count(book, word) |> +#' top_n(1000, n) |> +#' pairwise_delta(book, word, n, method = "argamon", upper = FALSE) |> #' arrange(delta) #' #' @export @@ -79,7 +79,7 @@ pairwise_delta_ <- function(tbl, item, feature, value, method = "burrows", ...) d_func <- squarely_(delta_func, ...) - tbl %>% - d_func(item, feature, value) %>% + tbl |> + d_func(item, feature, value) |> rename(delta = value) } diff --git a/R/pairwise_dist.R b/R/pairwise_dist.R index 4c50b46..cb6994d 100644 --- a/R/pairwise_dist.R +++ b/R/pairwise_dist.R @@ -19,23 +19,23 @@ #' library(dplyr) #' #' # closest countries in terms of life expectancy over time -#' closest <- gapminder %>% -#' pairwise_dist(country, year, lifeExp) %>% +#' closest <- gapminder |> +#' pairwise_dist(country, year, lifeExp) |> #' arrange(distance) #' #' closest #' -#' closest %>% +#' closest |> #' filter(item1 == "United States") #' #' # to remove duplicates, use upper = FALSE -#' gapminder %>% -#' pairwise_dist(country, year, lifeExp, upper = FALSE) %>% +#' gapminder |> +#' pairwise_dist(country, year, lifeExp, upper = FALSE) |> #' arrange(distance) #' #' # Can also use Manhattan distance -#' gapminder %>% -#' pairwise_dist(country, year, lifeExp, method = "manhattan", upper = FALSE) %>% +#' gapminder |> +#' pairwise_dist(country, year, lifeExp, method = "manhattan", upper = FALSE) |> #' arrange(distance) #' #' @export @@ -54,7 +54,7 @@ pairwise_dist <- function(tbl, item, feature, value, pairwise_dist_ <- function(tbl, item, feature, value, method = "euclidean", ...) { d_func <- squarely_(function(m) as.matrix(stats::dist(m, method = method)), ...) - tbl %>% - d_func(item, feature, value) %>% + tbl |> + d_func(item, feature, value) |> rename(distance = value) } diff --git a/R/pairwise_pmi.R b/R/pairwise_pmi.R index 6b37e71..98f0ab3 100644 --- a/R/pairwise_pmi.R +++ b/R/pairwise_pmi.R @@ -56,10 +56,10 @@ pairwise_pmi_ <- function(tbl, item, feature, sort = FALSE, ...) { } pmi_func <- squarely_(f, sparse = TRUE, sort = sort, ...) - tbl %>% - ungroup() %>% - mutate(..value = 1) %>% - pmi_func(item, feature, "..value") %>% - mutate(value = log(value)) %>% + tbl |> + ungroup() |> + mutate(..value = 1) |> + pmi_func(item, feature, "..value") |> + mutate(value = log(value)) |> rename(pmi = value) } diff --git a/R/pairwise_similarity.R b/R/pairwise_similarity.R index 71d95a8..664f592 100644 --- a/R/pairwise_similarity.R +++ b/R/pairwise_similarity.R @@ -25,20 +25,20 @@ #' library(tidytext) #' #' # Comparing Jane Austen novels -#' austen_words <- austen_books() %>% -#' unnest_tokens(word, text) %>% -#' anti_join(stop_words, by = "word") %>% -#' count(book, word) %>% +#' austen_words <- austen_books() |> +#' unnest_tokens(word, text) |> +#' anti_join(stop_words, by = "word") |> +#' count(book, word) |> #' ungroup() #' #' # closest books to each other -#' closest <- austen_words %>% -#' pairwise_similarity(book, word, n) %>% +#' closest <- austen_words |> +#' pairwise_similarity(book, word, n) |> #' arrange(desc(similarity)) #' #' closest #' -#' closest %>% +#' closest |> #' filter(item1 == "Emma") #' #' @export @@ -59,7 +59,7 @@ pairwise_similarity_ <- function(tbl, item, feature, value, ...) { normed %*% t(normed) }, sparse = TRUE, ...) - tbl %>% - d_func(item, feature, value) %>% + tbl |> + d_func(item, feature, value) |> rename(similarity = value) } diff --git a/R/squarely.R b/R/squarely.R index d57127e..60ccfb3 100644 --- a/R/squarely.R +++ b/R/squarely.R @@ -27,8 +27,8 @@ #' library(dplyr) #' library(gapminder) #' -#' closest_continent <- gapminder %>% -#' group_by(continent) %>% +#' closest_continent <- gapminder |> +#' group_by(continent) |> #' squarely(dist)(country, year, lifeExp) #' #' @export @@ -54,11 +54,11 @@ squarely_ <- function(.f, diag = FALSE, f <- function(tbl, item, feature, value, ...) { if (inherits(tbl, "grouped_df")) { # perform within each group, then restore groups - ret <- tbl %>% - tidyr::nest() %>% - mutate(data = purrr::map(data, f, item, feature, value)) %>% - filter(purrr::map_lgl(data, ~ nrow(.) > 0)) %>% - tidyr::unnest(data) %>% + ret <- tbl |> + tidyr::nest() |> + mutate(data = purrr::map(data, f, item, feature, value)) |> + filter(purrr::map_lgl(data, ~ nrow(.) > 0)) |> + tidyr::unnest(data) |> dplyr::group_by_at(dplyr::group_vars(tbl)) return(ret) diff --git a/R/widely.R b/R/widely.R index 50a8b15..7a3f2b4 100644 --- a/R/widely.R +++ b/R/widely.R @@ -33,17 +33,17 @@ #' #' gapminder #' -#' gapminder %>% +#' gapminder |> #' widely(dist)(country, year, lifeExp) #' #' # can perform within groups -#' closest_continent <- gapminder %>% -#' group_by(continent) %>% +#' closest_continent <- gapminder |> +#' group_by(continent) |> #' widely(dist)(country, year, lifeExp) #' closest_continent #' #' # for example, find the closest pair in each -#' closest_continent %>% +#' closest_continent |> #' top_n(1, -value) #' #' @export @@ -80,10 +80,10 @@ widely_ <- function(.f, if (inherits(tbl, "grouped_df")) { # perform within each group # (group_by_at isn't necessary since 1.0.0, but is in earlier versions) - ret <- tbl %>% - tidyr::nest() %>% - mutate(data = purrr::map(data, f, row, column, value)) %>% - tidyr::unnest(data) %>% + ret <- tbl |> + tidyr::nest() |> + mutate(data = purrr::map(data, f, row, column, value)) |> + tidyr::unnest(data) |> dplyr::group_by_at(dplyr::group_vars(tbl)) return(ret) @@ -111,8 +111,8 @@ widely_ <- function(.f, } output <- purrr::as_mapper(.f)(input, ...) - ret <- output %>% - custom_melt() %>% + ret <- output |> + custom_melt() |> as_tibble() if (sort) { diff --git a/R/widely_hclust.R b/R/widely_hclust.R index dfcca45..47c7eaf 100644 --- a/R/widely_hclust.R +++ b/R/widely_hclust.R @@ -17,18 +17,18 @@ #' #' # Construct Euclidean distances between countries based on life #' # expectancy over time -#' country_distances <- gapminder %>% +#' country_distances <- gapminder |> #' pairwise_dist(country, year, lifeExp) #' #' country_distances #' #' # Turn this into 5 hierarchical clusters -#' clusters <- country_distances %>% +#' clusters <- country_distances |> #' widely_hclust(item1, item2, distance, k = 8) #' #' # Examine a few such clusters -#' clusters %>% filter(cluster == 1) -#' clusters %>% filter(cluster == 2) +#' clusters |> filter(cluster == 1) +#' clusters |> filter(cluster == 2) #' #' @seealso [cutree] #' @@ -46,13 +46,13 @@ widely_hclust <- function(tbl, item1, item2, distance, k = NULL, h = NULL) { tibble(item1 = match(tbl[[col1_str]], unique_items), item2 = match(tbl[[col2_str]], unique_items), - distance = tbl[[dist_str]]) %>% - reshape2::acast(item1 ~ item2, value.var = "distance", fill = max_distance) %>% - stats::as.dist() %>% - stats::hclust() %>% - stats::cutree(k = k, h = h) %>% - tibble::enframe("item", "cluster") %>% + distance = tbl[[dist_str]]) |> + reshape2::acast(item1 ~ item2, value.var = "distance", fill = max_distance) |> + stats::as.dist() |> + stats::hclust() |> + stats::cutree(k = k, h = h) |> + tibble::enframe("item", "cluster") |> dplyr::mutate(item = unique_items[as.integer(item)], - cluster = factor(cluster)) %>% + cluster = factor(cluster)) |> dplyr::arrange(cluster) } diff --git a/R/widely_kmeans.R b/R/widely_kmeans.R index c473eff..6337ecd 100644 --- a/R/widely_kmeans.R +++ b/R/widely_kmeans.R @@ -21,17 +21,17 @@ #' library(gapminder) #' library(dplyr) #' -#' clusters <- gapminder %>% +#' clusters <- gapminder |> #' widely_kmeans(country, year, lifeExp, k = 5) #' #' clusters #' -#' clusters %>% +#' clusters |> #' count(cluster) #' #' # Examine a few clusters -#' clusters %>% filter(cluster == 1) -#' clusters %>% filter(cluster == 2) +#' clusters |> filter(cluster == 1) +#' clusters |> filter(cluster == 2) #' #' @export widely_kmeans <- function(tbl, item, feature, value, k, fill = 0, ...) { @@ -41,7 +41,7 @@ widely_kmeans <- function(tbl, item, feature, value, k, fill = 0, ...) { form <- stats::as.formula(paste(item_str, "~", feature_str)) - m <- tbl %>% + m <- tbl |> reshape2::acast(form, value.var = value_str, fill = fill) clustered <- stats::kmeans(m, k, ...) @@ -49,6 +49,6 @@ widely_kmeans <- function(tbl, item, feature, value, k, fill = 0, ...) { # Add the clusters to the original table i <- match(rownames(m), as.character(tbl[[item_str]])) tibble::tibble(!!sym(item_str) := tbl[[item_str]][i], - cluster = factor(clustered$cluster)) %>% + cluster = factor(clustered$cluster)) |> dplyr::arrange(cluster) } diff --git a/R/widely_svd.R b/R/widely_svd.R index a6aacb1..d8ffab6 100644 --- a/R/widely_svd.R +++ b/R/widely_svd.R @@ -25,7 +25,7 @@ #' library(gapminder) #' #' # principal components driving change -#' gapminder_svd <- gapminder %>% +#' gapminder_svd <- gapminder |> #' widely_svd(country, year, lifeExp) #' #' gapminder_svd @@ -34,9 +34,9 @@ #' library(ggplot2) #' library(tidyr) #' -#' gapminder_svd %>% -#' spread(dimension, value) %>% -#' inner_join(distinct(gapminder, country, continent), by = "country") %>% +#' gapminder_svd |> +#' spread(dimension, value) |> +#' inner_join(distinct(gapminder, country, continent), by = "country") |> #' ggplot(aes(`1`, `2`, label = country)) + #' geom_point(aes(color = continent)) + #' geom_text(vjust = 1, hjust = 1) @@ -94,7 +94,7 @@ widely_svd_ <- function(tbl, item, feature, value, nv = NULL, weight_d = FALSE, ret <- widely_(perform_svd, sparse = sparse)(tbl, item, feature, value) - ret <- ret %>% + ret <- ret |> transmute(item = item_u[as.integer(item1)], dimension = item2, value) diff --git a/README.Rmd b/README.Rmd index 46df85d..a088c19 100644 --- a/README.Rmd +++ b/README.Rmd @@ -79,7 +79,7 @@ The widyr package offers `pairwise_` functions that operate on pairs of items wi ```{r} library(widyr) -gapminder %>% +gapminder |> pairwise_dist(country, year, lifeExp) ``` @@ -88,23 +88,23 @@ This finds the Euclidean distance between the `lifeExp` value in each pair of co We could find the closest pairs of countries overall with `arrange()`: ```{r} -gapminder %>% - pairwise_dist(country, year, lifeExp) %>% +gapminder |> + pairwise_dist(country, year, lifeExp) |> arrange(distance) ``` Notice that this includes duplicates (Germany/Belgium and Belgium/Germany). To avoid those (the upper triangle of the distance matrix), use `upper = FALSE`: ```{r} -gapminder %>% - pairwise_dist(country, year, lifeExp, upper = FALSE) %>% +gapminder |> + pairwise_dist(country, year, lifeExp, upper = FALSE) |> arrange(distance) ``` In some analyses, we may be interested in correlation rather than distance of pairs. For this we would use `pairwise_cor`: ```{r} -gapminder %>% +gapminder |> pairwise_cor(country, year, lifeExp, upper = FALSE) ``` diff --git a/vignettes/intro.Rmd b/vignettes/intro.Rmd index 94acbaa..3a08946 100644 --- a/vignettes/intro.Rmd +++ b/vignettes/intro.Rmd @@ -47,29 +47,29 @@ The widyr package offers `pairwise_` functions that operate on pairs of items wi ```{r} library(widyr) -gapminder %>% +gapminder |> pairwise_dist(country, year, lifeExp) ``` In a single step, this finds the Euclidean distance between the `lifeExp` value in each pair of countries, matching pairs based on year. We could find the closest pairs of countries overall with `arrange()`: ```{r} -gapminder %>% - pairwise_dist(country, year, lifeExp) %>% +gapminder |> + pairwise_dist(country, year, lifeExp) |> arrange(distance) ``` Notice that this includes duplicates (Germany/Belgium and Belgium/Germany). To avoid those (the upper triangle of the distance matrix), use `upper = FALSE`: ```{r} -gapminder %>% - pairwise_dist(country, year, lifeExp, upper = FALSE) %>% +gapminder |> + pairwise_dist(country, year, lifeExp, upper = FALSE) |> arrange(distance) ``` In some analyses, we may be interested in correlation rather than distance of pairs. For this we would use `pairwise_cor`: ```{r} -gapminder %>% +gapminder |> pairwise_cor(country, year, lifeExp, upper = FALSE, sort = TRUE) ``` diff --git a/vignettes/united_nations.Rmd b/vignettes/united_nations.Rmd index 95d7d14..b13b74b 100644 --- a/vignettes/united_nations.Rmd +++ b/vignettes/united_nations.Rmd @@ -50,8 +50,8 @@ We may then be interested in obtaining a measure of country-to-country agreement ```{r cors} library(widyr) -cors <- un_votes %>% - mutate(vote = as.numeric(vote)) %>% +cors <- un_votes |> + mutate(vote = as.numeric(vote)) |> pairwise_cor(country, rcid, vote, use = "pairwise.complete.obs", sort = TRUE) cors @@ -60,14 +60,14 @@ cors We could, for example, find the countries that the US is most and least in agreement with: ```{r US_cors} -US_cors <- cors %>% +US_cors <- cors |> filter(item1 == "United States") # Most in agreement US_cors # Least in agreement -US_cors %>% +US_cors |> arrange(correlation) ``` @@ -78,13 +78,13 @@ if (require("maps", quietly = TRUE) && require("fuzzyjoin", quietly = TRUE) && require("countrycode", quietly = TRUE) && require("ggplot2", quietly = TRUE)) { - world_data <- map_data("world") %>% - regex_full_join(iso3166, by = c("region" = "mapname")) %>% + world_data <- map_data("world") |> + regex_full_join(iso3166, by = c("region" = "mapname")) |> filter(region != "Antarctica") - US_cors %>% - mutate(a2 = countrycode(item2, "country.name", "iso2c")) %>% - full_join(world_data, by = "a2") %>% + US_cors |> + mutate(a2 = countrycode(item2, "country.name", "iso2c")) |> + full_join(world_data, by = "a2") |> ggplot(aes(long, lat, group = group, fill = correlation)) + geom_polygon(color = "gray", size = .1) + scale_fill_gradient2() + @@ -104,18 +104,18 @@ Another useful kind of visualization is a network plot, which can be created wit if (require("ggraph", quietly = TRUE) && require("igraph", quietly = TRUE) && require("countrycode", quietly = TRUE)) { - cors_filtered <- cors %>% + cors_filtered <- cors |> filter(correlation > .6) - continents <- tibble(country = unique(un_votes$country)) %>% + continents <- tibble(country = unique(un_votes$country)) |> filter(country %in% cors_filtered$item1 | - country %in% cors_filtered$item2) %>% + country %in% cors_filtered$item2) |> mutate(continent = countrycode(country, "country.name", "continent")) set.seed(2017) - cors_filtered %>% - graph_from_data_frame(vertices = continents) %>% + cors_filtered |> + graph_from_data_frame(vertices = continents) |> ggraph() + geom_edge_link(aes(edge_alpha = correlation)) + geom_node_point(aes(color = continent), size = 3) + From 1a1132cf4bd35b8c8a5a6d4261b669b7cfde4cef Mon Sep 17 00:00:00 2001 From: Julia Silge Date: Mon, 9 Mar 2026 10:58:20 -0600 Subject: [PATCH 2/4] Redocument --- man/pairwise_cor.Rd | 8 ++++---- man/pairwise_delta.Rd | 32 ++++++++++++++++---------------- man/pairwise_dist.Rd | 14 +++++++------- man/pairwise_similarity.Rd | 14 +++++++------- man/squarely.Rd | 4 ++-- man/widely.Rd | 8 ++++---- man/widely_hclust.Rd | 8 ++++---- man/widely_kmeans.Rd | 8 ++++---- man/widely_svd.Rd | 8 ++++---- 9 files changed, 52 insertions(+), 52 deletions(-) diff --git a/man/pairwise_cor.Rd b/man/pairwise_cor.Rd index f5fecd2..e515db9 100644 --- a/man/pairwise_cor.Rd +++ b/man/pairwise_cor.Rd @@ -53,16 +53,16 @@ that links them together. This is an example of the spread-operate-retidy patter library(dplyr) library(gapminder) -gapminder \%>\% +gapminder |> pairwise_cor(country, year, lifeExp) -gapminder \%>\% +gapminder |> pairwise_cor(country, year, lifeExp, sort = TRUE) # United Nations voting data if (require("unvotes", quietly = TRUE)) { - country_cors <- un_votes \%>\% - mutate(vote = as.numeric(vote)) \%>\% + country_cors <- un_votes |> + mutate(vote = as.numeric(vote)) |> pairwise_cor(country, rcid, vote, sort = TRUE) } diff --git a/man/pairwise_delta.Rd b/man/pairwise_delta.Rd index 053b74e..f09c398 100644 --- a/man/pairwise_delta.Rd +++ b/man/pairwise_delta.Rd @@ -34,32 +34,32 @@ library(dplyr) library(tidytext) # closest documents in terms of 1000 most frequent words -closest <- austen_books() \%>\% - unnest_tokens(word, text) \%>\% - count(book, word) \%>\% - top_n(1000, n) \%>\% - pairwise_delta(book, word, n, method = "burrows") \%>\% +closest <- austen_books() |> + unnest_tokens(word, text) |> + count(book, word) |> + top_n(1000, n) |> + pairwise_delta(book, word, n, method = "burrows") |> arrange(delta) closest -closest \%>\% +closest |> filter(item1 == "Pride & Prejudice") # to remove duplicates, use upper = FALSE -closest <- austen_books() \%>\% - unnest_tokens(word, text) \%>\% - count(book, word) \%>\% - top_n(1000, n) \%>\% - pairwise_delta(book, word, n, method = "burrows", upper = FALSE) \%>\% +closest <- austen_books() |> + unnest_tokens(word, text) |> + count(book, word) |> + top_n(1000, n) |> + pairwise_delta(book, word, n, method = "burrows", upper = FALSE) |> arrange(delta) # Can also use Argamon's Linear Delta -closest <- austen_books() \%>\% - unnest_tokens(word, text) \%>\% - count(book, word) \%>\% - top_n(1000, n) \%>\% - pairwise_delta(book, word, n, method = "argamon", upper = FALSE) \%>\% +closest <- austen_books() |> + unnest_tokens(word, text) |> + count(book, word) |> + top_n(1000, n) |> + pairwise_delta(book, word, n, method = "argamon", upper = FALSE) |> arrange(delta) } diff --git a/man/pairwise_dist.Rd b/man/pairwise_dist.Rd index 0c28775..702fd34 100644 --- a/man/pairwise_dist.Rd +++ b/man/pairwise_dist.Rd @@ -33,23 +33,23 @@ library(gapminder) library(dplyr) # closest countries in terms of life expectancy over time -closest <- gapminder \%>\% - pairwise_dist(country, year, lifeExp) \%>\% +closest <- gapminder |> + pairwise_dist(country, year, lifeExp) |> arrange(distance) closest -closest \%>\% +closest |> filter(item1 == "United States") # to remove duplicates, use upper = FALSE -gapminder \%>\% - pairwise_dist(country, year, lifeExp, upper = FALSE) \%>\% +gapminder |> + pairwise_dist(country, year, lifeExp, upper = FALSE) |> arrange(distance) # Can also use Manhattan distance -gapminder \%>\% - pairwise_dist(country, year, lifeExp, method = "manhattan", upper = FALSE) \%>\% +gapminder |> + pairwise_dist(country, year, lifeExp, method = "manhattan", upper = FALSE) |> arrange(distance) } diff --git a/man/pairwise_similarity.Rd b/man/pairwise_similarity.Rd index 9c93b73..5ef18f4 100644 --- a/man/pairwise_similarity.Rd +++ b/man/pairwise_similarity.Rd @@ -38,20 +38,20 @@ library(dplyr) library(tidytext) # Comparing Jane Austen novels -austen_words <- austen_books() \%>\% - unnest_tokens(word, text) \%>\% - anti_join(stop_words, by = "word") \%>\% - count(book, word) \%>\% +austen_words <- austen_books() |> + unnest_tokens(word, text) |> + anti_join(stop_words, by = "word") |> + count(book, word) |> ungroup() # closest books to each other -closest <- austen_words \%>\% - pairwise_similarity(book, word, n) \%>\% +closest <- austen_words |> + pairwise_similarity(book, word, n) |> arrange(desc(similarity)) closest -closest \%>\% +closest |> filter(item1 == "Emma") } diff --git a/man/squarely.Rd b/man/squarely.Rd index 19249ff..ed8ff8b 100644 --- a/man/squarely.Rd +++ b/man/squarely.Rd @@ -39,8 +39,8 @@ distance or correlation matrix. library(dplyr) library(gapminder) -closest_continent <- gapminder \%>\% - group_by(continent) \%>\% +closest_continent <- gapminder |> + group_by(continent) |> squarely(dist)(country, year, lifeExp) } diff --git a/man/widely.Rd b/man/widely.Rd index ceea2a9..0384356 100644 --- a/man/widely.Rd +++ b/man/widely.Rd @@ -44,17 +44,17 @@ library(gapminder) gapminder -gapminder \%>\% +gapminder |> widely(dist)(country, year, lifeExp) # can perform within groups -closest_continent <- gapminder \%>\% - group_by(continent) \%>\% +closest_continent <- gapminder |> + group_by(continent) |> widely(dist)(country, year, lifeExp) closest_continent # for example, find the closest pair in each -closest_continent \%>\% +closest_continent |> top_n(1, -value) } diff --git a/man/widely_hclust.Rd b/man/widely_hclust.Rd index 3be99c0..1491070 100644 --- a/man/widely_hclust.Rd +++ b/man/widely_hclust.Rd @@ -30,18 +30,18 @@ library(dplyr) # Construct Euclidean distances between countries based on life # expectancy over time -country_distances <- gapminder \%>\% +country_distances <- gapminder |> pairwise_dist(country, year, lifeExp) country_distances # Turn this into 5 hierarchical clusters -clusters <- country_distances \%>\% +clusters <- country_distances |> widely_hclust(item1, item2, distance, k = 8) # Examine a few such clusters -clusters \%>\% filter(cluster == 1) -clusters \%>\% filter(cluster == 2) +clusters |> filter(cluster == 1) +clusters |> filter(cluster == 2) } \seealso{ diff --git a/man/widely_kmeans.Rd b/man/widely_kmeans.Rd index 7793b0f..4d2737b 100644 --- a/man/widely_kmeans.Rd +++ b/man/widely_kmeans.Rd @@ -31,17 +31,17 @@ one-row-per-cluster. library(gapminder) library(dplyr) -clusters <- gapminder \%>\% +clusters <- gapminder |> widely_kmeans(country, year, lifeExp, k = 5) clusters -clusters \%>\% +clusters |> count(cluster) # Examine a few clusters -clusters \%>\% filter(cluster == 1) -clusters \%>\% filter(cluster == 2) +clusters |> filter(cluster == 1) +clusters |> filter(cluster == 2) } \seealso{ diff --git a/man/widely_svd.Rd b/man/widely_svd.Rd index 6defa51..a8e6354 100644 --- a/man/widely_svd.Rd +++ b/man/widely_svd.Rd @@ -41,7 +41,7 @@ library(dplyr) library(gapminder) # principal components driving change -gapminder_svd <- gapminder \%>\% +gapminder_svd <- gapminder |> widely_svd(country, year, lifeExp) gapminder_svd @@ -50,9 +50,9 @@ gapminder_svd library(ggplot2) library(tidyr) -gapminder_svd \%>\% - spread(dimension, value) \%>\% - inner_join(distinct(gapminder, country, continent), by = "country") \%>\% +gapminder_svd |> + spread(dimension, value) |> + inner_join(distinct(gapminder, country, continent), by = "country") |> ggplot(aes(`1`, `2`, label = country)) + geom_point(aes(color = continent)) + geom_text(vjust = 1, hjust = 1) From 9acee4a978a3ec3b8b894836bf541ce746467ff8 Mon Sep 17 00:00:00 2001 From: Julia Silge Date: Mon, 9 Mar 2026 10:58:27 -0600 Subject: [PATCH 3/4] Update tests --- tests/testthat/test-pairwise-cor.R | 8 ++-- tests/testthat/test-pairwise-count.R | 46 +++++++++++------------ tests/testthat/test-pairwise-dist.R | 2 +- tests/testthat/test-pairwise-similarity.R | 4 +- tests/testthat/test-squarely.R | 6 +-- tests/testthat/test-widely.R | 8 ++-- 6 files changed, 37 insertions(+), 37 deletions(-) diff --git a/tests/testthat/test-pairwise-cor.R b/tests/testthat/test-pairwise-cor.R index 0174151..f116db3 100644 --- a/tests/testthat/test-pairwise-cor.R +++ b/tests/testthat/test-pairwise-cor.R @@ -7,7 +7,7 @@ d <- tibble(col = rep(c("a", "b", "c"), each = 3), value = c(1, 2, 3, 6, 5, 4, 7, 9, 8)) test_that("pairwise_cor computes pairwise correlations", { - ret <- d %>% + ret <- d |> pairwise_cor(col, row, value) ret1 <- ret$correlation[ret$item1 == "a" & ret$item2 == "b"] @@ -20,7 +20,7 @@ test_that("pairwise_cor computes pairwise correlations", { }) test_that("pairwise_cor can compute Spearman correlations", { - ret <- d %>% + ret <- d |> pairwise_cor(col, row, value, method = "spearman") ret1 <- ret$correlation[ret$item1 == "a" & ret$item2 == "b"] @@ -29,7 +29,7 @@ test_that("pairwise_cor can compute Spearman correlations", { test_that("pairwise_cor works on binary matrices", { cors <- data.frame(x = c("a", "a", "a", "b", "b", "b", "c", "c", "c"), - y = c(1, 2, 3, 1, 2, 3, 1, 3, 4)) %>% + y = c(1, 2, 3, 1, 2, 3, 1, 3, 4)) |> pairwise_cor(x, y, sort = TRUE) expect_equal(colnames(cors), c("item1", "item2", "correlation")) @@ -39,7 +39,7 @@ test_that("pairwise_cor works on binary matrices", { test_that("pairwise_cor retains factor levels", { d$col <- factor(d$col, levels = c("b", "c", "a")) - ret <- d %>% + ret <- d |> pairwise_cor(col, row, value, method = "spearman") expect_is(ret$item1, "factor") diff --git a/tests/testthat/test-pairwise-count.R b/tests/testthat/test-pairwise-count.R index 35702c8..a2f7406 100644 --- a/tests/testthat/test-pairwise-count.R +++ b/tests/testthat/test-pairwise-count.R @@ -8,12 +8,12 @@ suppressPackageStartupMessages(library(tidytext)) original <- tibble(txt = c("I felt a funeral in my brain,", "And mourners, to and fro,", "Kept treading, treading, till it seemed", - "That sense was breaking through.")) %>% - mutate(line = row_number()) %>% + "That sense was breaking through.")) |> + mutate(line = row_number()) |> unnest_tokens(char, txt, token = "characters") test_that("pairing and counting works", { - d <- original %>% + d <- original |> pairwise_count(char, line, sort = TRUE, upper = FALSE, diag = FALSE) expect_equal(nrow(d), 164) @@ -29,25 +29,25 @@ test_that("pairing and counting works", { # for self-pairs, the number of occurrences should be the number of distinct # lines - d2 <- original %>% + d2 <- original |> pairwise_count(char, line, sort = TRUE, upper = FALSE, diag = TRUE) expect_equal(nrow(d2), nrow(d) + 20) - self_pairs <- d2 %>% - filter(item1 == item2) %>% + self_pairs <- d2 |> + filter(item1 == item2) |> arrange(item1) - char_counts <- original %>% - distinct(line, char) %>% - count(char) %>% + char_counts <- original |> + distinct(line, char) |> + count(char) |> arrange(char) expect_true(all(self_pairs$item1 == char_counts$char)) expect_true(all(self_pairs$n == char_counts$n)) # when upper is TRUE, should include twice as many items as original - d3 <- original %>% + d3 <- original |> pairwise_count(char, line, sort = TRUE, upper = TRUE) expect_equal(nrow(d) * 2, nrow(d3)) @@ -72,27 +72,27 @@ test_that("We can count with a weight column", { test_that("Counts co-occurrences of words in Pride & Prejudice", { if (require("janeaustenr", quietly = TRUE)) { - words <- tibble(text = prideprejudice) %>% - mutate(line = row_number()) %>% + words <- tibble(text = prideprejudice) |> + mutate(line = row_number()) |> unnest_tokens(word, text) - pairs <- words %>% + pairs <- words |> pairwise_count(word, line, upper = TRUE, diag = TRUE, sort = TRUE) # check it is sorted in descending order expect_false(is.unsorted(rev(pairs$n))) # check occurrences of words that appear with "elizabeth" - words_with_elizabeth <- words %>% - filter(word == "elizabeth") %>% - select(line) %>% - inner_join(words, by = "line") %>% - distinct(word, line) %>% - count(word) %>% + words_with_elizabeth <- words |> + filter(word == "elizabeth") |> + select(line) |> + inner_join(words, by = "line", relationship = "many-to-many") |> + distinct(word, line) |> + count(word) |> arrange(n, word) - pairs_with_elizabeth <- pairs %>% - filter(item1 == "elizabeth") %>% + pairs_with_elizabeth <- pairs |> + filter(item1 == "elizabeth") |> arrange(n, item2) expect_true(all(words_with_elizabeth$word == pairs_with_elizabeth$item2)) @@ -101,8 +101,8 @@ test_that("Counts co-occurrences of words in Pride & Prejudice", { }) test_that("Can count within groups", { - grouped_result <- mtcars %>% - group_by(cyl) %>% + grouped_result <- mtcars |> + group_by(cyl) |> pairwise_count(vs, am) expect_equal(as.character(groups(grouped_result)), c("cyl")) diff --git a/tests/testthat/test-pairwise-dist.R b/tests/testthat/test-pairwise-dist.R index bf19c45..6cc4d70 100644 --- a/tests/testthat/test-pairwise-dist.R +++ b/tests/testthat/test-pairwise-dist.R @@ -7,7 +7,7 @@ test_that("pairwise_dist computes a distance matrix", { row = rep(c("d", "e", "f"), 3), value = c(1, 2, 3, 6, 5, 4, 7, 9, 8)) - ret <- d %>% + ret <- d |> pairwise_dist(col, row, value) ret1 <- ret$distance[ret$item1 == "a" & ret$item2 == "b"] diff --git a/tests/testthat/test-pairwise-similarity.R b/tests/testthat/test-pairwise-similarity.R index 833ff55..9d8656f 100644 --- a/tests/testthat/test-pairwise-similarity.R +++ b/tests/testthat/test-pairwise-similarity.R @@ -11,7 +11,7 @@ cosine_similarity <- function(x, y) { } test_that("pairwise_similarity computes pairwise cosine similarity", { - ret <- d %>% + ret <- d |> pairwise_similarity(col, row, value) ret1 <- ret$similarity[ret$item1 == "a" & ret$item2 == "b"] @@ -26,7 +26,7 @@ test_that("pairwise_similarity computes pairwise cosine similarity", { test_that("pairwise_similarity retains factor levels", { d$col <- factor(d$col, levels = c("b", "c", "a")) - ret <- d %>% + ret <- d |> pairwise_similarity(col, row, value) expect_is(ret$item1, "factor") diff --git a/tests/testthat/test-squarely.R b/tests/testthat/test-squarely.R index f3ef14b..f6d155f 100644 --- a/tests/testthat/test-squarely.R +++ b/tests/testthat/test-squarely.R @@ -6,7 +6,7 @@ test_that("Can perform 'squarely' operations on pairs of items", { if (require("gapminder", quietly = TRUE)) { ncountries <- length(unique(gapminder$country)) - closest <- gapminder %>% + closest <- gapminder |> squarely(dist)(country, year, lifeExp) expect_equal(colnames(closest), c("item1", "item2", "value")) @@ -17,8 +17,8 @@ test_that("Can perform 'squarely' operations on pairs of items", { test_that("Can perform 'squarely' within groups", { if (require("gapminder", quietly = TRUE)) { - closest_continent <- gapminder %>% - group_by(continent) %>% + closest_continent <- gapminder |> + group_by(continent) |> squarely(dist)(country, year, lifeExp) expect_equal(colnames(closest_continent), c("continent", "item1", "item2", "value")) diff --git a/tests/testthat/test-widely.R b/tests/testthat/test-widely.R index 2243b00..ec5d75c 100644 --- a/tests/testthat/test-widely.R +++ b/tests/testthat/test-widely.R @@ -4,7 +4,7 @@ test_that("widely can widen, operate, and re-tidy", { skip_if_not_installed("gapminder") library(gapminder) - ret <- gapminder %>% + ret <- gapminder |> widely(cor)(year, country, lifeExp) expect_is(ret$item1, "character") @@ -17,7 +17,7 @@ test_that("widely can widen, operate, and re-tidy", { expect_equal(nrow(ret), length(unique(gapminder$country)) ^ 2) - ret2 <- gapminder %>% + ret2 <- gapminder |> widely(cor, sort = TRUE)(year, country, lifeExp) expect_equal(sort(ret$value, decreasing = TRUE), ret2$value) @@ -27,8 +27,8 @@ test_that("widely works within groups", { skip_if_not_installed("gapminder") library(gapminder) - ret <- gapminder %>% - group_by(continent) %>% + ret <- gapminder |> + group_by(continent) |> widely(cor)(year, country, lifeExp) expect_equal(colnames(ret), c("continent", "item1", "item2", "value")) From 06d8e0013230379db49e178e9bacaa46e9ea192c Mon Sep 17 00:00:00 2001 From: Julia Silge Date: Mon, 9 Mar 2026 10:58:41 -0600 Subject: [PATCH 4/4] Render README --- README.md | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 1121273..14f1403 100644 --- a/README.md +++ b/README.md @@ -45,10 +45,10 @@ useful. A **wide** dataset is one or more matrices where: -- Each row is one **item** -- Each column is one **feature** -- Each value is one **observation** -- Each matrix is one **variable** +- Each row is one **item** +- Each column is one **feature** +- Each value is one **observation** +- Each matrix is one **variable** When would you want data to be wide rather than tidy? Notable examples include classification, clustering, correlation, factorization, or other @@ -85,8 +85,7 @@ gapminder #> 8 Afghanistan Asia 1987 40.8 13867957 852. #> 9 Afghanistan Asia 1992 41.7 16317921 649. #> 10 Afghanistan Asia 1997 41.8 22227415 635. -#> # … with 1,694 more rows -#> # ℹ Use `print(n = ...)` to see more rows +#> # ℹ 1,694 more rows ``` This tidy format (one-row-per-country-per-year) is very useful for @@ -103,7 +102,7 @@ items within data. An example is `pairwise_dist`: ``` r library(widyr) -gapminder %>% +gapminder |> pairwise_dist(country, year, lifeExp) #> # A tibble: 20,022 × 3 #> item1 item2 distance @@ -118,8 +117,7 @@ gapminder %>% #> 8 Bangladesh Afghanistan 45.3 #> 9 Belgium Afghanistan 125. #> 10 Benin Afghanistan 39.3 -#> # … with 20,012 more rows -#> # ℹ Use `print(n = ...)` to see more rows +#> # ℹ 20,012 more rows ``` This finds the Euclidean distance between the `lifeExp` value in each @@ -129,8 +127,8 @@ with `year`, which is the feature column. We could find the closest pairs of countries overall with `arrange()`: ``` r -gapminder %>% - pairwise_dist(country, year, lifeExp) %>% +gapminder |> + pairwise_dist(country, year, lifeExp) |> arrange(distance) #> # A tibble: 20,022 × 3 #> item1 item2 distance @@ -145,8 +143,7 @@ gapminder %>% #> 8 Israel Italy 1.66 #> 9 Finland Austria 1.94 #> 10 Austria Finland 1.94 -#> # … with 20,012 more rows -#> # ℹ Use `print(n = ...)` to see more rows +#> # ℹ 20,012 more rows ``` Notice that this includes duplicates (Germany/Belgium and @@ -154,8 +151,8 @@ Belgium/Germany). To avoid those (the upper triangle of the distance matrix), use `upper = FALSE`: ``` r -gapminder %>% - pairwise_dist(country, year, lifeExp, upper = FALSE) %>% +gapminder |> + pairwise_dist(country, year, lifeExp, upper = FALSE) |> arrange(distance) #> # A tibble: 10,011 × 3 #> item1 item2 distance @@ -170,15 +167,14 @@ gapminder %>% #> 8 Comoros Mauritania 2.01 #> 9 Belgium United States 2.09 #> 10 Germany Ireland 2.10 -#> # … with 10,001 more rows -#> # ℹ Use `print(n = ...)` to see more rows +#> # ℹ 10,001 more rows ``` In some analyses, we may be interested in correlation rather than distance of pairs. For this we would use `pairwise_cor`: ``` r -gapminder %>% +gapminder |> pairwise_cor(country, year, lifeExp, upper = FALSE) #> # A tibble: 10,011 × 3 #> item1 item2 correlation @@ -193,8 +189,7 @@ gapminder %>% #> 8 Albania Argentina 0.949 #> 9 Algeria Argentina 0.991 #> 10 Angola Argentina 0.936 -#> # … with 10,001 more rows -#> # ℹ Use `print(n = ...)` to see more rows +#> # ℹ 10,001 more rows ``` ### Code of Conduct