juliasilge · kanishkamisra · Mar 22, 2018 · Jun 7, 2018 · Jun 7, 2018 · Jun 8, 2018
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -22,7 +22,8 @@ Imports:
     purrr,
     Matrix,
     broom,
-    methods
+    methods,
+    tibble
 Suggests:
     ggraph,
     igraph,

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,5 +1,6 @@
 # Generated by roxygen2: do not edit by hand
 
+export(multi_scale)
 export(pairwise_cor)
 export(pairwise_cor_)
 export(pairwise_count)

diff --git a/R/multi_scale.R b/R/multi_scale.R
@@ -0,0 +1,60 @@
+#' Multidimensional Scaling of documents separated by a distance measure.
+#'
+#' Given a tbl or data frame of pairwise distances between documents, scale each document
+#' to a *k* dimensional space that ensures the distance between all documents is maintained.
+#' **NOTE:** Doesnt work when the pairwise distance tbl is formed using \code{upper = FALSE}.
+#'
+#' @param tbl Table obtained by running a pairwise distance method \code{pairwise_delta} or \code{pairwise_dist}
+#' @param item1 first item
+#' @param item2 second item
+#' @param value Value
+#' @param k number of dimensions, defaults to 2
+#'
+#' @return Returns a function that takes at least four arguments:
+#'   \item{item}{Column to store documents or items separated by various distances as used prior to calling \code{multi_scale()}}
+#'   \item{V1}{First Dimension}
+#'   \item{V2}{Second Dimension}
+#'   \item{...}{Other Dimensions as specified by k's value}
+#'
+#' @examples
+#'
+#' library(janeaustenr)
+#' library(dplyr)
+#' library(tidyr)
+#' library(tidytext)
+#'
+#' # closest documents in terms of 1000 most frequent words
+#' austen_delta <- austen_books() %>%
+#'   unnest_tokens(word, text) %>%
+#'   count(book, word) %>%
+#'   pairwise_delta(book, word, n)
+#'
+#' austen_delta
+#'
+#' austen_delta %>%
+#'   multi_scale(item1, item2, delta)
+#'
+#' @export
+
+multi_scale <- function(tbl, item1, item2, value, k = 2) {
+  multi_scale_(tbl,
+               col_name(substitute(item1)),
+               col_name(substitute(item2)),
+               col_name(substitute(value)),
+               k = 2)
+}
+
+
+multi_scale_ <- function(tbl, item1, item2, value, k = 2) {
+  tbl_matrix <- tbl %>%
+    tidyr::spread(item2, col_name(value), fill = 0) %>%
+    as.data.frame() %>%
+    tibble::remove_rownames() %>%
+    tibble::column_to_rownames("item1") %>%
+    as.matrix()
+
+  stats::cmdscale(tbl_matrix, k = k) %>%
+    as.data.frame() %>%
+    tibble::rownames_to_column("item") %>%
+    tibble::as_tibble()
+}
diff --git a/R/pairwise_cor.R b/R/pairwise_cor.R
@@ -37,8 +37,8 @@
 #'
 #' @export
 pairwise_cor <- function(tbl, item, feature, value,
-                     method = c("pearson", "kendall", "spearman"),
-                     use = "everything", ...) {
+                         method = c("pearson", "kendall", "spearman"),
+                         use = "everything", ...) {
   if (missing(value)) {
     tbl$..value <- 1
     val <- "..value"
@@ -47,19 +47,19 @@ pairwise_cor <- function(tbl, item, feature, value,
   }
 
   pairwise_cor_(tbl,
-            col_name(substitute(item)),
-            col_name(substitute(feature)),
-            val,
-            method = method, use = use, ...)
+                col_name(substitute(item)),
+                col_name(substitute(feature)),
+                val,
+                method = method, use = use, ...)
 }
 
 
 #' @rdname pairwise_cor
 #' @export
 pairwise_cor_ <- function(tbl, item, feature, value,
-                      method = c("pearson", "kendall", "spearman"),
-                      use = "everything",
-                      ...) {
+                          method = c("pearson", "kendall", "spearman"),
+                          use = "everything",
+                          ...) {
   method <- match.arg(method)
 
   sparse <- (method == "pearson" & use == "everything")
@@ -68,7 +68,7 @@ pairwise_cor_ <- function(tbl, item, feature, value,
   } else {
     function(x) stats::cor(t(x), method = method, use = use)
   }
-  cor_func <- squarely_(f, sparse = sparse, ...)
+  cor_func <- squarely_(f, sparse = sparse, fill_value = NA, ...)
 
   tbl %>%
     ungroup() %>%

diff --git a/R/widely.R b/R/widely.R
@@ -11,6 +11,7 @@
 #' non-sparse matrix to be created. Set to NULL to allow any size
 #' matrix.
 #' @param sparse Whether to cast to a sparse matrix
+#' @param fill_value value to be used to replace NAs when converted from long to wide format.
 #'
 #' @return Returns a function that takes at least four arguments:
 #'   \item{tbl}{A table}
@@ -50,12 +51,16 @@
 widely <- function(.f,
                    sort = FALSE,
                    sparse = FALSE,
-                   maximum_size = 1e7) {
+                   maximum_size = 1e7,
+                   fill_value = 0) {
   function(tbl, row, column, value, ...) {
+
+
     inner_func <- widely_(.f,
                           sort = sort,
                           sparse = sparse,
-                          maximum_size = maximum_size)
+                          maximum_size = maximum_size,
+                          fill_value = fill_value)
 
     inner_func(tbl,
                col_name(substitute(row)),
@@ -71,7 +76,8 @@ widely <- function(.f,
 widely_ <- function(.f,
                     sort = FALSE,
                     sparse = FALSE,
-                    maximum_size = 1e7) {
+                    maximum_size = 1e7,
+                    fill_value = 0) {
   f <- function(tbl, row, column, value, ...) {
     if (inherits(tbl, "grouped_df")) {
       # perform within each group, then restore groups
@@ -87,7 +93,7 @@ widely_ <- function(.f,
     if (!sparse) {
       if (!is.null(maximum_size)) {
         matrix_size <- (length(unique(tbl[[row]])) *
-                        length(unique(tbl[[column]])))
+                          length(unique(tbl[[column]])))
         if (matrix_size > maximum_size) {
           stop("Size of acast matrix, ", matrix_size,
                " will be too large. Set maximum_size = NULL to avoid ",
@@ -98,7 +104,8 @@ widely_ <- function(.f,
 
       form <- stats::as.formula(paste(row, column, sep = " ~ "))
 
-      input <- reshape2::acast(tbl, form, value.var = value, fill = 0)
+      input <- reshape2::acast(tbl, form, value.var = value, fill = fill_value)
+
     } else {
       input <- tidytext::cast_sparse_(tbl, row, column, value)
     }

diff --git a/man/multi_scale.Rd b/man/multi_scale.Rd
diff --git a/man/widely.Rd b/man/widely.Rd
-Original file line number
+Diff line change
@@ Expand Up / @@ -22,7 +22,8 @@ Imports: @@
         purrr,
         Matrix,
         broom,
-        methods
+        methods,
+        tibble
     Suggests:
         ggraph,
         igraph,
@@ Expand Down @@