Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ Imports:
purrr,
Matrix,
broom,
methods
methods,
tibble
Suggests:
ggraph,
igraph,
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Generated by roxygen2: do not edit by hand

export(multi_scale)
export(pairwise_cor)
export(pairwise_cor_)
export(pairwise_count)
Expand Down
60 changes: 60 additions & 0 deletions R/multi_scale.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#' Multidimensional Scaling of documents separated by a distance measure.
#'
#' Given a tbl or data frame of pairwise distances between documents, scale each document
#' to a *k* dimensional space that ensures the distance between all documents is maintained.
#' **NOTE:** Doesnt work when the pairwise distance tbl is formed using \code{upper = FALSE}.
#'
#' @param tbl Table obtained by running a pairwise distance method \code{pairwise_delta} or \code{pairwise_dist}
#' @param item1 first item
#' @param item2 second item
#' @param value Value
#' @param k number of dimensions, defaults to 2
#'
#' @return Returns a function that takes at least four arguments:
#' \item{item}{Column to store documents or items separated by various distances as used prior to calling \code{multi_scale()}}
#' \item{V1}{First Dimension}
#' \item{V2}{Second Dimension}
#' \item{...}{Other Dimensions as specified by k's value}
#'
#' @examples
#'
#' library(janeaustenr)
#' library(dplyr)
#' library(tidyr)
#' library(tidytext)
#'
#' # closest documents in terms of 1000 most frequent words
#' austen_delta <- austen_books() %>%
#' unnest_tokens(word, text) %>%
#' count(book, word) %>%
#' pairwise_delta(book, word, n)
#'
#' austen_delta
#'
#' austen_delta %>%
#' multi_scale(item1, item2, delta)
#'
#' @export

multi_scale <- function(tbl, item1, item2, value, k = 2) {
multi_scale_(tbl,
col_name(substitute(item1)),
col_name(substitute(item2)),
col_name(substitute(value)),
k = 2)
}


multi_scale_ <- function(tbl, item1, item2, value, k = 2) {
tbl_matrix <- tbl %>%
tidyr::spread(item2, col_name(value), fill = 0) %>%
as.data.frame() %>%
tibble::remove_rownames() %>%
tibble::column_to_rownames("item1") %>%
as.matrix()

stats::cmdscale(tbl_matrix, k = k) %>%
as.data.frame() %>%
tibble::rownames_to_column("item") %>%
tibble::as_tibble()
}
20 changes: 10 additions & 10 deletions R/pairwise_cor.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@
#'
#' @export
pairwise_cor <- function(tbl, item, feature, value,
method = c("pearson", "kendall", "spearman"),
use = "everything", ...) {
method = c("pearson", "kendall", "spearman"),
use = "everything", ...) {
if (missing(value)) {
tbl$..value <- 1
val <- "..value"
Expand All @@ -47,19 +47,19 @@ pairwise_cor <- function(tbl, item, feature, value,
}

pairwise_cor_(tbl,
col_name(substitute(item)),
col_name(substitute(feature)),
val,
method = method, use = use, ...)
col_name(substitute(item)),
col_name(substitute(feature)),
val,
method = method, use = use, ...)
}


#' @rdname pairwise_cor
#' @export
pairwise_cor_ <- function(tbl, item, feature, value,
method = c("pearson", "kendall", "spearman"),
use = "everything",
...) {
method = c("pearson", "kendall", "spearman"),
use = "everything",
...) {
method <- match.arg(method)

sparse <- (method == "pearson" & use == "everything")
Expand All @@ -68,7 +68,7 @@ pairwise_cor_ <- function(tbl, item, feature, value,
} else {
function(x) stats::cor(t(x), method = method, use = use)
}
cor_func <- squarely_(f, sparse = sparse, ...)
cor_func <- squarely_(f, sparse = sparse, fill_value = NA, ...)

tbl %>%
ungroup() %>%
Expand Down
17 changes: 12 additions & 5 deletions R/widely.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#' non-sparse matrix to be created. Set to NULL to allow any size
#' matrix.
#' @param sparse Whether to cast to a sparse matrix
#' @param fill_value value to be used to replace NAs when converted from long to wide format.
#'
#' @return Returns a function that takes at least four arguments:
#' \item{tbl}{A table}
Expand Down Expand Up @@ -50,12 +51,16 @@
widely <- function(.f,
sort = FALSE,
sparse = FALSE,
maximum_size = 1e7) {
maximum_size = 1e7,
fill_value = 0) {
function(tbl, row, column, value, ...) {


inner_func <- widely_(.f,
sort = sort,
sparse = sparse,
maximum_size = maximum_size)
maximum_size = maximum_size,
fill_value = fill_value)

inner_func(tbl,
col_name(substitute(row)),
Expand All @@ -71,7 +76,8 @@ widely <- function(.f,
widely_ <- function(.f,
sort = FALSE,
sparse = FALSE,
maximum_size = 1e7) {
maximum_size = 1e7,
fill_value = 0) {
f <- function(tbl, row, column, value, ...) {
if (inherits(tbl, "grouped_df")) {
# perform within each group, then restore groups
Expand All @@ -87,7 +93,7 @@ widely_ <- function(.f,
if (!sparse) {
if (!is.null(maximum_size)) {
matrix_size <- (length(unique(tbl[[row]])) *
length(unique(tbl[[column]])))
length(unique(tbl[[column]])))
if (matrix_size > maximum_size) {
stop("Size of acast matrix, ", matrix_size,
" will be too large. Set maximum_size = NULL to avoid ",
Expand All @@ -98,7 +104,8 @@ widely_ <- function(.f,

form <- stats::as.formula(paste(row, column, sep = " ~ "))

input <- reshape2::acast(tbl, form, value.var = value, fill = 0)
input <- reshape2::acast(tbl, form, value.var = value, fill = fill_value)

} else {
input <- tidytext::cast_sparse_(tbl, row, column, value)
}
Expand Down
50 changes: 50 additions & 0 deletions man/multi_scale.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 6 additions & 2 deletions man/widely.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.