Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions R/pairwise_cor.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,16 @@
#' library(dplyr)
#' library(gapminder)
#'
#' gapminder %>%
#' gapminder |>
#' pairwise_cor(country, year, lifeExp)
#'
#' gapminder %>%
#' gapminder |>
#' pairwise_cor(country, year, lifeExp, sort = TRUE)
#'
#' # United Nations voting data
#' if (require("unvotes", quietly = TRUE)) {
#' country_cors <- un_votes %>%
#' mutate(vote = as.numeric(vote)) %>%
#' country_cors <- un_votes |>
#' mutate(vote = as.numeric(vote)) |>
#' pairwise_cor(country, rcid, vote, sort = TRUE)
#' }
#'
Expand Down Expand Up @@ -68,8 +68,8 @@ pairwise_cor_ <- function(tbl, item, feature, value,
}
cor_func <- squarely_(f, sparse = sparse, ...)

tbl %>%
ungroup() %>%
cor_func(item, feature, value) %>%
tbl |>
ungroup() |>
cor_func(item, feature, value) |>
rename(correlation = value)
}
8 changes: 4 additions & 4 deletions R/pairwise_count.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,9 @@ pairwise_count_ <- function(tbl, item, feature, wt = NULL, ...) {
func <- squarely_(function(m) m %*% t(m > 0), sparse = TRUE, ...)
}

tbl %>%
distinct(.data[[item]], .data[[feature]], .keep_all = TRUE) %>%
mutate(..value = 1) %>%
func(item, feature, wt) %>%
tbl |>
distinct(.data[[item]], .data[[feature]], .keep_all = TRUE) |>
mutate(..value = 1) |>
func(item, feature, wt) |>
rename(n = value)
}
36 changes: 18 additions & 18 deletions R/pairwise_delta.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,32 +20,32 @@
#' library(tidytext)
#'
#' # closest documents in terms of 1000 most frequent words
#' closest <- austen_books() %>%
#' unnest_tokens(word, text) %>%
#' count(book, word) %>%
#' top_n(1000, n) %>%
#' pairwise_delta(book, word, n, method = "burrows") %>%
#' closest <- austen_books() |>
#' unnest_tokens(word, text) |>
#' count(book, word) |>
#' top_n(1000, n) |>
#' pairwise_delta(book, word, n, method = "burrows") |>
#' arrange(delta)
#'
#' closest
#'
#' closest %>%
#' closest |>
#' filter(item1 == "Pride & Prejudice")
#'
#' # to remove duplicates, use upper = FALSE
#' closest <- austen_books() %>%
#' unnest_tokens(word, text) %>%
#' count(book, word) %>%
#' top_n(1000, n) %>%
#' pairwise_delta(book, word, n, method = "burrows", upper = FALSE) %>%
#' closest <- austen_books() |>
#' unnest_tokens(word, text) |>
#' count(book, word) |>
#' top_n(1000, n) |>
#' pairwise_delta(book, word, n, method = "burrows", upper = FALSE) |>
#' arrange(delta)
#'
#' # Can also use Argamon's Linear Delta
#' closest <- austen_books() %>%
#' unnest_tokens(word, text) %>%
#' count(book, word) %>%
#' top_n(1000, n) %>%
#' pairwise_delta(book, word, n, method = "argamon", upper = FALSE) %>%
#' closest <- austen_books() |>
#' unnest_tokens(word, text) |>
#' count(book, word) |>
#' top_n(1000, n) |>
#' pairwise_delta(book, word, n, method = "argamon", upper = FALSE) |>
#' arrange(delta)
#'
#' @export
Expand Down Expand Up @@ -79,7 +79,7 @@ pairwise_delta_ <- function(tbl, item, feature, value, method = "burrows", ...)

d_func <- squarely_(delta_func, ...)

tbl %>%
d_func(item, feature, value) %>%
tbl |>
d_func(item, feature, value) |>
rename(delta = value)
}
18 changes: 9 additions & 9 deletions R/pairwise_dist.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,23 +19,23 @@
#' library(dplyr)
#'
#' # closest countries in terms of life expectancy over time
#' closest <- gapminder %>%
#' pairwise_dist(country, year, lifeExp) %>%
#' closest <- gapminder |>
#' pairwise_dist(country, year, lifeExp) |>
#' arrange(distance)
#'
#' closest
#'
#' closest %>%
#' closest |>
#' filter(item1 == "United States")
#'
#' # to remove duplicates, use upper = FALSE
#' gapminder %>%
#' pairwise_dist(country, year, lifeExp, upper = FALSE) %>%
#' gapminder |>
#' pairwise_dist(country, year, lifeExp, upper = FALSE) |>
#' arrange(distance)
#'
#' # Can also use Manhattan distance
#' gapminder %>%
#' pairwise_dist(country, year, lifeExp, method = "manhattan", upper = FALSE) %>%
#' gapminder |>
#' pairwise_dist(country, year, lifeExp, method = "manhattan", upper = FALSE) |>
#' arrange(distance)
#'
#' @export
Expand All @@ -54,7 +54,7 @@ pairwise_dist <- function(tbl, item, feature, value,
pairwise_dist_ <- function(tbl, item, feature, value, method = "euclidean", ...) {
d_func <- squarely_(function(m) as.matrix(stats::dist(m, method = method)), ...)

tbl %>%
d_func(item, feature, value) %>%
tbl |>
d_func(item, feature, value) |>
rename(distance = value)
}
10 changes: 5 additions & 5 deletions R/pairwise_pmi.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,10 @@ pairwise_pmi_ <- function(tbl, item, feature, sort = FALSE, ...) {
}
pmi_func <- squarely_(f, sparse = TRUE, sort = sort, ...)

tbl %>%
ungroup() %>%
mutate(..value = 1) %>%
pmi_func(item, feature, "..value") %>%
mutate(value = log(value)) %>%
tbl |>
ungroup() |>
mutate(..value = 1) |>
pmi_func(item, feature, "..value") |>
mutate(value = log(value)) |>
rename(pmi = value)
}
18 changes: 9 additions & 9 deletions R/pairwise_similarity.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,20 +25,20 @@
#' library(tidytext)
#'
#' # Comparing Jane Austen novels
#' austen_words <- austen_books() %>%
#' unnest_tokens(word, text) %>%
#' anti_join(stop_words, by = "word") %>%
#' count(book, word) %>%
#' austen_words <- austen_books() |>
#' unnest_tokens(word, text) |>
#' anti_join(stop_words, by = "word") |>
#' count(book, word) |>
#' ungroup()
#'
#' # closest books to each other
#' closest <- austen_words %>%
#' pairwise_similarity(book, word, n) %>%
#' closest <- austen_words |>
#' pairwise_similarity(book, word, n) |>
#' arrange(desc(similarity))
#'
#' closest
#'
#' closest %>%
#' closest |>
#' filter(item1 == "Emma")
#'
#' @export
Expand All @@ -59,7 +59,7 @@ pairwise_similarity_ <- function(tbl, item, feature, value, ...) {
normed %*% t(normed)
}, sparse = TRUE, ...)

tbl %>%
d_func(item, feature, value) %>%
tbl |>
d_func(item, feature, value) |>
rename(similarity = value)
}
14 changes: 7 additions & 7 deletions R/squarely.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@
#' library(dplyr)
#' library(gapminder)
#'
#' closest_continent <- gapminder %>%
#' group_by(continent) %>%
#' closest_continent <- gapminder |>
#' group_by(continent) |>
#' squarely(dist)(country, year, lifeExp)
#'
#' @export
Expand All @@ -54,11 +54,11 @@ squarely_ <- function(.f, diag = FALSE,
f <- function(tbl, item, feature, value, ...) {
if (inherits(tbl, "grouped_df")) {
# perform within each group, then restore groups
ret <- tbl %>%
tidyr::nest() %>%
mutate(data = purrr::map(data, f, item, feature, value)) %>%
filter(purrr::map_lgl(data, ~ nrow(.) > 0)) %>%
tidyr::unnest(data) %>%
ret <- tbl |>
tidyr::nest() |>
mutate(data = purrr::map(data, f, item, feature, value)) |>
filter(purrr::map_lgl(data, ~ nrow(.) > 0)) |>
tidyr::unnest(data) |>
dplyr::group_by_at(dplyr::group_vars(tbl))

return(ret)
Expand Down
20 changes: 10 additions & 10 deletions R/widely.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,17 +33,17 @@
#'
#' gapminder
#'
#' gapminder %>%
#' gapminder |>
#' widely(dist)(country, year, lifeExp)
#'
#' # can perform within groups
#' closest_continent <- gapminder %>%
#' group_by(continent) %>%
#' closest_continent <- gapminder |>
#' group_by(continent) |>
#' widely(dist)(country, year, lifeExp)
#' closest_continent
#'
#' # for example, find the closest pair in each
#' closest_continent %>%
#' closest_continent |>
#' top_n(1, -value)
#'
#' @export
Expand Down Expand Up @@ -80,10 +80,10 @@ widely_ <- function(.f,
if (inherits(tbl, "grouped_df")) {
# perform within each group
# (group_by_at isn't necessary since 1.0.0, but is in earlier versions)
ret <- tbl %>%
tidyr::nest() %>%
mutate(data = purrr::map(data, f, row, column, value)) %>%
tidyr::unnest(data) %>%
ret <- tbl |>
tidyr::nest() |>
mutate(data = purrr::map(data, f, row, column, value)) |>
tidyr::unnest(data) |>
dplyr::group_by_at(dplyr::group_vars(tbl))

return(ret)
Expand Down Expand Up @@ -111,8 +111,8 @@ widely_ <- function(.f,
}
output <- purrr::as_mapper(.f)(input, ...)

ret <- output %>%
custom_melt() %>%
ret <- output |>
custom_melt() |>
as_tibble()

if (sort) {
Expand Down
22 changes: 11 additions & 11 deletions R/widely_hclust.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,18 @@
#'
#' # Construct Euclidean distances between countries based on life
#' # expectancy over time
#' country_distances <- gapminder %>%
#' country_distances <- gapminder |>
#' pairwise_dist(country, year, lifeExp)
#'
#' country_distances
#'
#' # Turn this into 5 hierarchical clusters
#' clusters <- country_distances %>%
#' clusters <- country_distances |>
#' widely_hclust(item1, item2, distance, k = 8)
#'
#' # Examine a few such clusters
#' clusters %>% filter(cluster == 1)
#' clusters %>% filter(cluster == 2)
#' clusters |> filter(cluster == 1)
#' clusters |> filter(cluster == 2)
#'
#' @seealso [cutree]
#'
Expand All @@ -46,13 +46,13 @@ widely_hclust <- function(tbl, item1, item2, distance, k = NULL, h = NULL) {

tibble(item1 = match(tbl[[col1_str]], unique_items),
item2 = match(tbl[[col2_str]], unique_items),
distance = tbl[[dist_str]]) %>%
reshape2::acast(item1 ~ item2, value.var = "distance", fill = max_distance) %>%
stats::as.dist() %>%
stats::hclust() %>%
stats::cutree(k = k, h = h) %>%
tibble::enframe("item", "cluster") %>%
distance = tbl[[dist_str]]) |>
reshape2::acast(item1 ~ item2, value.var = "distance", fill = max_distance) |>
stats::as.dist() |>
stats::hclust() |>
stats::cutree(k = k, h = h) |>
tibble::enframe("item", "cluster") |>
dplyr::mutate(item = unique_items[as.integer(item)],
cluster = factor(cluster)) %>%
cluster = factor(cluster)) |>
dplyr::arrange(cluster)
}
12 changes: 6 additions & 6 deletions R/widely_kmeans.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,17 @@
#' library(gapminder)
#' library(dplyr)
#'
#' clusters <- gapminder %>%
#' clusters <- gapminder |>
#' widely_kmeans(country, year, lifeExp, k = 5)
#'
#' clusters
#'
#' clusters %>%
#' clusters |>
#' count(cluster)
#'
#' # Examine a few clusters
#' clusters %>% filter(cluster == 1)
#' clusters %>% filter(cluster == 2)
#' clusters |> filter(cluster == 1)
#' clusters |> filter(cluster == 2)
#'
#' @export
widely_kmeans <- function(tbl, item, feature, value, k, fill = 0, ...) {
Expand All @@ -41,14 +41,14 @@ widely_kmeans <- function(tbl, item, feature, value, k, fill = 0, ...) {

form <- stats::as.formula(paste(item_str, "~", feature_str))

m <- tbl %>%
m <- tbl |>
reshape2::acast(form, value.var = value_str, fill = fill)

clustered <- stats::kmeans(m, k, ...)

# Add the clusters to the original table
i <- match(rownames(m), as.character(tbl[[item_str]]))
tibble::tibble(!!sym(item_str) := tbl[[item_str]][i],
cluster = factor(clustered$cluster)) %>%
cluster = factor(clustered$cluster)) |>
dplyr::arrange(cluster)
}
10 changes: 5 additions & 5 deletions R/widely_svd.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
#' library(gapminder)
#'
#' # principal components driving change
#' gapminder_svd <- gapminder %>%
#' gapminder_svd <- gapminder |>
#' widely_svd(country, year, lifeExp)
#'
#' gapminder_svd
Expand All @@ -34,9 +34,9 @@
#' library(ggplot2)
#' library(tidyr)
#'
#' gapminder_svd %>%
#' spread(dimension, value) %>%
#' inner_join(distinct(gapminder, country, continent), by = "country") %>%
#' gapminder_svd |>
#' spread(dimension, value) |>
#' inner_join(distinct(gapminder, country, continent), by = "country") |>
#' ggplot(aes(`1`, `2`, label = country)) +
#' geom_point(aes(color = continent)) +
#' geom_text(vjust = 1, hjust = 1)
Expand Down Expand Up @@ -94,7 +94,7 @@ widely_svd_ <- function(tbl, item, feature, value, nv = NULL, weight_d = FALSE,

ret <- widely_(perform_svd, sparse = sparse)(tbl, item, feature, value)

ret <- ret %>%
ret <- ret |>
transmute(item = item_u[as.integer(item1)],
dimension = item2,
value)
Expand Down
Loading