From d83d236331da5bbe03911cd58e360dd619a239b9 Mon Sep 17 00:00:00 2001
From: yaraskaf <56359026+yaraskaf@users.noreply.github.com>
Date: Sun, 23 Feb 2020 21:45:11 -0500
Subject: [PATCH 01/25] feat(LandmarkBallCover): initial commit

Implementation of ball cover of lensed data space using landmark point set.

Fixed bug in landmarks.cpp
---
 R/YS_BallCover.R  | 199 +++++++++++++++++++
 R/mapperRef.R     | 473 +++++++++++++++++++++++-----------------------
 src/landmarks.cpp |  11 +-
 3 files changed, 443 insertions(+), 240 deletions(-)
 create mode 100644 R/YS_BallCover.R

diff --git a/R/YS_BallCover.R b/R/YS_BallCover.R
new file mode 100644
index 0000000..b845746
--- /dev/null
+++ b/R/YS_BallCover.R
@@ -0,0 +1,199 @@
+#' Ball Cover
+#'
+#' @docType class
+#' @description This class provides a cover whose open sets are formed by \deqn{\epsilon}-balls centered
+#' about each point. Using this class requires the \code{RANN} package to be installed, and thus explicitly assumes
+#' the filter space endowed with the euclidean metric.
+#'
+#' This differs from the DisjointBallCover in that it does NOT union intersecting cover sets.
+#'
+#' @field epsilon := radius of the ball to form around each point
+#' @author Cory Brunsion, Yara Skaf
+#' @export
+
+library(proxy)
+
+# TODO: make default spec with default seed 1l -> user can use default 1 or specify other seed
+# Seed methods: SPEC, RAND, ECC (or specify seed_index, unspecified uses the first index)
+YS_BallCover <- R6::R6Class(
+  classname = "YS_BallCover",
+  inherit = CoverRef,
+  public = list(epsilon=NULL, num_sets=NULL, seed_index=1, seed_method="SPEC")
+)
+
+## initialize ------
+YS_BallCover$set("public", "initialize", function(...){
+  super$initialize(typename="ys_ball")
+  params <- list(...)
+  if ("epsilon" %in% names(params)){ self$epsilon <- params[["epsilon"]] }
+  if ("num_sets" %in% names(params)){ self$num_sets <- params[["num_sets"]] }
+  if ("seed_index" %in% names(params)){ self$seed_index <- params[["seed_index"]] }
+  if ("seed_method" %in% names(params)){ self$seed_method <- params[["seed_method"]] }
+})
+
+## validate ------
+YS_BallCover$set("public", "validate", function(filter){
+  stopifnot(!is.null(self$epsilon) || !is.null(self$num_sets))
+  writeLines("\n(eps, num_sets, seed_index, seed_method):\n")
+  print(self$epsilon)
+  print(self$num_sets)
+  print(self$seed_index)
+  print(self$seed_method)
+})
+
+## format ----
+YS_BallCover$set("public", "format", function(...){
+  titlecase <- function(x){
+    s <- strsplit(x, " ")[[1]]
+    paste(toupper(substring(s, 1, 1)), substring(s, 2), sep = "", collapse = " ")
+  }
+  if (!is.null(self$num_sets)) {
+    sprintf("%s Cover: (num_sets = %s, seed_index = %s)", titlecase(private$.typename), self$num_sets, self$seed_index)
+  }
+  if (!is.null(self$epsilon)) {
+    sprintf("%s Cover: (epsilon = %.2f, seed_index = %s)", titlecase(private$.typename), self$epsilon, self$seed_index)
+  }
+})
+
+## construct_cover ------
+YS_BallCover$set("public", "construct_cover", function(filter, index=NULL){
+  writeLines("Index:")
+  print(index)
+  print("constructing")
+  if (!requireNamespace("RANN", quietly = TRUE)){
+    stop("Package \"RANN\" is needed for to use this cover.", call. = FALSE)
+  }
+  print("validating")
+  self$validate()
+
+  print("filtering")
+  ## Get filter values
+  fv <- filter()
+  f_dim <- ncol(fv)
+  f_size <- nrow(fv)
+
+
+  ## Set the seed index if necessary
+  if(is.null(index)){
+    if(all(self$seed_method == "RAND")) {
+      self$seed_index = sample(1:f_size, 1)
+      print(self$seed_index)
+      print("rand")
+    }
+    if(all(self$seed_method == "ECC")) {
+      #self$seed_index = sample(1:f_size, 1)
+      print(self$seed_index)
+      print("ecc")
+    }
+
+  }
+
+
+  if (!is.null(self$num_sets)) {
+    print("seed")
+    print(self$seed_index)
+    print(self$num_sets)
+    eps_lm <- landmarks(x=fv, n=self$num_sets, seed_index=self$seed_index)
+    print(eps_lm)
+    ## Get distance from each point to landmarks
+    dist_to_lm <- proxy::dist(fv, fv[eps_lm,,drop=FALSE])
+
+    orderedIndices = t(apply(dist_to_lm,1, sort))
+    max = which.max(orderedIndices[,1])
+    self$epsilon = dist_to_lm[max]    # ball radius should be distance of the farthest point from the landmark set so that all points are in at least one ball
+    pts_within_eps <- function(lm_dist){ which(lm_dist <= self$epsilon) }
+
+    ## Construct the index set + the preimages
+    self$index_set <- as.character(eps_lm)
+    print(self$index_set)
+    self$level_sets <- structure(as.list(apply(dist_to_lm, 2, pts_within_eps)), names=self$index_set)
+    print("done level sets")
+  }else{
+    if (!is.null(self$epsilon)) {
+      print("eps")
+
+    }
+  }
+
+
+
+
+  ## Construct the balls
+  # if spec, elif rand, elif ecc -> set self$seed_index
+  # then if epsilon elif num_sets using self$seed_index
+  # if (!is.null(self$seed_method)){
+  #   if (all(self$seed_method == "RAND")) {
+  #     self$seed_index = sample(1:f_size, 1)
+  #     print(self$seed_index)
+  #     print("rand")
+  #     eps_lm <- landmarks(fv, self$num_sets, seed_index=self$seed_index)
+  #
+  #     ## Get distance from each point to landmarks
+  #     dist_to_lm <- proxy::dist(fv, fv[eps_lm,,drop=FALSE])
+  #     pts_within_eps <- function(lm_dist){ which(lm_dist < self$epsilon) } # TODO: does this make sense? we shouldnt be able to sepcify both eps and num_sets
+  #
+  #     ## Construct the index set + the preimages
+  #     self$index_set <- as.character(eps_lm)
+  #     writeLines("set index set")
+  #     self$level_sets <- structure(as.list(apply(dist_to_lm, 2, pts_within_eps)), names=self$index_set)
+  #     writeLines("set level sets")
+  #   }
+  #   if (all(self$seed_method == "ECC")) {
+  #     print("ecc")
+  #   }
+  # }else{
+  #   if (!is.null(self$num_sets)){
+  #     print("seed")
+  #     print(self$seed_index)
+  #     print(self$num_sets)
+  #     eps_lm <- landmarks(x=fv, n=self$num_sets, seed_index=self$seed_index)
+  #     print(eps_lm)
+  #     ## Get distance from each point to landmarks
+  #     dist_to_lm <- proxy::dist(fv, fv[eps_lm,,drop=FALSE])
+  #
+  #     orderedIndices = t(apply(dist_to_lm,1, sort))
+  #     max = which.max(orderedIndices[,1])
+  #     self$epsilon = dist_to_lm[max]    # ball radius should be distance of the farthest point from the landmark set so that all points are in at least one ball
+  #     pts_within_eps <- function(lm_dist){ which(lm_dist <= self$epsilon) }
+  #
+  #     ## Construct the index set + the preimages
+  #     self$index_set <- as.character(eps_lm)
+  #     print(self$index_set)
+  #     self$level_sets <- structure(as.list(apply(dist_to_lm, 2, pts_within_eps)), names=self$index_set)
+  #   }else {
+  #     writeLines("else")
+  #     eps_lm <- landmarks(fv, 2L)
+  #
+  #     ## Get distance from each point to landmarks
+  #     dist_to_lm <- proxy::dist(fv, fv[eps_lm,,drop=FALSE])
+  #     pts_within_eps <- function(lm_dist){ which(lm_dist < self$epsilon) }
+  #
+  #     ## Construct the index set + the preimages
+  #     self$index_set <- as.character(eps_lm)
+  #     self$level_sets <- structure(as.list(apply(dist_to_lm, 2, pts_within_eps)), names=self$index_set)
+  #   }
+  # }
+
+
+  # print("making balls")
+  # ## Construct the balls
+  # ball_cover  <- RANN::nn2(fv, query = fv, searchtype = "radius", radius = self$epsilon)
+  #
+  # ## Assign the centers of the balls as the index set
+  # self$index_set <- as.character(ball_cover[["nn.idx"]][,1])
+  #
+  # ## Calculate level (pullback) sets
+  # ls <- lapply(seq_len(nrow(ball_cover[["nn.idx"]])), function(i) ball_cover[["nn.idx"]][i,])
+  # ls <- lapply(ls, function(i) Filter(function(x) any(x != 0), i))
+  #
+  # self$level_sets <- structure(ls, names=self$index_set)
+  print("test")
+  print(index)
+  if (!missing(index)){
+    print("hello")
+    return(self$level_sets[[index]]) }
+  print("done")
+
+  ## Always return self
+  invisible(self)
+})
diff --git a/R/mapperRef.R b/R/mapperRef.R
index 93c3d5a..a79ebe2 100644
--- a/R/mapperRef.R
+++ b/R/mapperRef.R
@@ -2,14 +2,14 @@
 #' @name MapperRef
 #' @aliases Mapper
 #' @description `MapperRef` is an \link{R6} class built for parameterizing and computing mappers efficiently.
-#' @section Details: To create a new \code{MapperRef} instance object, instantiate the class with the \code{\link[R6:R6Class]{R6 new}} operator. 
+#' @section Details: To create a new \code{MapperRef} instance object, instantiate the class with the \code{\link[R6:R6Class]{R6 new}} operator.
 #' Instantiation of a \code{MapperRef} objects requires a data matrix, or a function returning one. \cr
-#' The primary output of the Mapper method is a simplicial complex. 
-#' \cr 
-#' The underlying complex does not need to be modified by the user, i.e. is completely maintained by \code{\link{MapperRef}} methods 
+#' The primary output of the Mapper method is a simplicial complex.
+#' \cr
+#' The underlying complex does not need to be modified by the user, i.e. is completely maintained by \code{\link{MapperRef}} methods
 #' (e.g. \code{\link{construct_nerve}}, \code{\link{construct_k_skeleton}}, etc.).
 #' @section Usage:
-#' \preformatted{m = MapperRef$new(X)} 
+#' \preformatted{m = MapperRef$new(X)}
 #' @section Arguments:
 #' \itemize{
 #'    \item{\strong{X}}: The data, either as a matrix, or a function that returns a matrix.
@@ -17,15 +17,15 @@
 #' @section Fields:
 #' \itemize{
 #'   \item{\strong{X}}: The data, as a function. Evaluation returns the data as a matrix.
-#'   \item{\strong{filter}}: The filter, as a function. Evaluation returns the filtered data as a matrix.  
-#'   \item{\strong{cover}}: The cover (a \code{\link{CoverRef}} derived object). 
+#'   \item{\strong{filter}}: The filter, as a function. Evaluation returns the filtered data as a matrix.
+#'   \item{\strong{cover}}: The cover (a \code{\link{CoverRef}} derived object).
 #'   \item{\strong{clustering_algorithm}}: The clustering algorithm to use in the pullback.
-#'   \item{\strong{measure}}: Distance measure to use to compute distances in ambient space. See \code{use_distance_measure} for more details.     
-#'   \item{\strong{pullback}}: Mapping between the sets in the cover (by index) and the vertices (by id).  
-#'   \item{\strong{vertices}}: The mapper vertices. 
+#'   \item{\strong{measure}}: Distance measure to use to compute distances in ambient space. See \code{use_distance_measure} for more details.
+#'   \item{\strong{pullback}}: Mapping between the sets in the cover (by index) and the vertices (by id).
+#'   \item{\strong{vertices}}: The mapper vertices.
 #'   \item{\strong{simplicial_complex}}: A \code{\link[simplextree:simplextree]{simplex tree}} object.
 #' }
-#' 
+#'
 #' @section Methods:
 #' \itemize{
 #'   \item{\code{\link{use_filter}}: Specifies the filter.}
@@ -41,9 +41,9 @@
 #' }
 #' @section More information:
 #' Full documentation available \href{https://peekxc.github.io/Mapper}{online}.
-#' 
+#'
 #' @return \code{\link{MapperRef}} instance equipped with methods for building the mapper.
-#' 
+#'
 #' @import methods
 #' @importFrom Rcpp sourceCpp
 #' @author Matt Piekenbrock, \email{matt.piekenbrock@@gmail.com}
@@ -53,23 +53,23 @@
 NULL
 
 #' @export
-MapperRef <- R6::R6Class("MapperRef", 
+MapperRef <- R6::R6Class("MapperRef",
   private = list(
-    .X = NULL, 
+    .X = NULL,
     .filter = NULL,
-    .cover = NULL, 
-    .clustering_algorithm = NULL, 
-    .measure = "euclidean", 
-    .measure_opt = NULL, 
+    .cover = NULL,
+    .clustering_algorithm = NULL,
+    .measure = "euclidean",
+    .measure_opt = NULL,
     .pullback=list(),
     .vertices = list(),
     .simplicial_complex = NULL
   ),
   lock_class = FALSE,  ## Feel free to add your own members
-  lock_objects = FALSE ## Or change existing ones 
+  lock_objects = FALSE ## Or change existing ones
 )
 
-## To add a public member function 
+## To add a public member function
 ## add function ----
 MapperRef$set("public", "add_function", function(name, FUN) {
   self[[name]] <- FUN
@@ -78,9 +78,9 @@ MapperRef$set("public", "add_function", function(name, FUN) {
 
 ## The set index -> (vertex) decomposition mapping.
 ## pullback ----
-MapperRef$set("active", "pullback", 
-  function(value){ 
-    if (missing(value)){ private$.pullback } 
+MapperRef$set("active", "pullback",
+  function(value){
+    if (missing(value)){ private$.pullback }
     else { private$.pullback <- value }
   }
 )
@@ -88,16 +88,16 @@ MapperRef$set("active", "pullback",
 ## cover ----
 ## @title Mapper cover
 ## @name cover
-## @description Every \code{\link{MapperRef}} object requires a \code{\link{CoverRef}} object as 
-## is \code{cover} member field. In the context of Mapper, a cover is used to discretize the filter 
-## space into a partition, which is then used via a \emph{pullback} operation to construct the vertices. \cr 
-## \cr 
-## The \code{\link{MapperRef}} class makes no restrictions on the cover that is used; only that it fulfills the 
+## @description Every \code{\link{MapperRef}} object requires a \code{\link{CoverRef}} object as
+## is \code{cover} member field. In the context of Mapper, a cover is used to discretize the filter
+## space into a partition, which is then used via a \emph{pullback} operation to construct the vertices. \cr
+## \cr
+## The \code{\link{MapperRef}} class makes no restrictions on the cover that is used; only that it fulfills the
 ## requirements of being a valid \code{\link{CoverRef}} instance.
-## @seealso \code{\link{CoverRef}} 
-MapperRef$set("active", "cover", 
+## @seealso \code{\link{CoverRef}}
+MapperRef$set("active", "cover",
   function(value){ #function(fv, type = c("restrained rectangular"), ...)
-    if (missing(value)){ private$.cover } 
+    if (missing(value)){ private$.cover }
     else {
       stopifnot(inherits(value, "CoverRef"))
       private$.cover <- value
@@ -108,7 +108,7 @@ MapperRef$set("active", "cover",
 
 ## Mapper stores the vertices as a list
 ## vertices ----
-MapperRef$set("active", "vertices", 
+MapperRef$set("active", "vertices",
   function(value){
     if(missing(value)){
       private$.vertices
@@ -116,32 +116,32 @@ MapperRef$set("active", "vertices",
       private$.vertices <- value
       # stop("`$vertices` is read-only. To update the vertex membership, use the 'compute_vertices' function.", call. = FALSE)
     }
-  }  
+  }
 )
 
 ## simplicial_complex ----
-## @title Simplicial Complex 
-## @name simplicial_complex 
-## @description The relational information of the Mapper construction. 
-## @details The primary output of the Mapper method is a simplicial complex. With \code{\link{MapperRef}} objects, 
-## the simplicial complex is stored as a \code{\link[simplextree]{simplextree}}. 
-## \cr 
-## The underlying complex does not need to be modified by the user, i.e. is completely maintained by \code{\link{MapperRef}} methods 
+## @title Simplicial Complex
+## @name simplicial_complex
+## @description The relational information of the Mapper construction.
+## @details The primary output of the Mapper method is a simplicial complex. With \code{\link{MapperRef}} objects,
+## the simplicial complex is stored as a \code{\link[simplextree]{simplextree}}.
+## \cr
+## The underlying complex does not need to be modified by the user, i.e. is completely maintained by \code{\link{MapperRef}} methods
 ## (e.g. \code{\link{construct_nerve}}, \code{\link{construct_k_skeleton}}, etc.).
-MapperRef$set("active", "simplicial_complex", 
+MapperRef$set("active", "simplicial_complex",
   function(value){
     if (missing(value)){ private$.simplicial_complex }
     else {
       stopifnot(is(value, "Rcpp_SimplexTree"))
-      private$.simplicial_complex <- value 
+      private$.simplicial_complex <- value
       # stop("`$simplicial_complex` is read-only. To change the complex, use the objects (simplex tree) methods directly.", call. = FALSE)
     }
   }
 )
 
 ## clustering_algorithm ----
-## Clustering algorithm must be a function that takes as arguments 'pid' and 'idx' 
-MapperRef$set("active", "clustering_algorithm", 
+## Clustering algorithm must be a function that takes as arguments 'pid' and 'idx'
+MapperRef$set("active", "clustering_algorithm",
   function(value){
     if (missing(value)){ private$.clustering_algorithm }
     else {
@@ -157,9 +157,9 @@ MapperRef$set("active", "data", function(value){
     else {
       if (is.matrix(value)){
         X_acc <- function(data_matrix){
-          function(idx=NULL){ 
+          function(idx=NULL){
             if (missing(idx)){ return(data_matrix) }
-            return(data_matrix[idx,,drop=FALSE]) 
+            return(data_matrix[idx,,drop=FALSE])
           }
         }
         private$.X <- X_acc(value)
@@ -170,19 +170,19 @@ MapperRef$set("active", "data", function(value){
       }
     }
 })
-              
+
 
 ## X ----
 ## The data should be held fixed
-MapperRef$set("active", "X", 
+MapperRef$set("active", "X",
   function(value){
     if (missing(value)){ return(private$.X) }
     else {
       if (is.matrix(value)){
         X_acc <- function(data_matrix){
-          function(idx=NULL){ 
+          function(idx=NULL){
             if (missing(idx)){ return(data_matrix) }
-            return(data_matrix[idx,,drop=FALSE]) 
+            return(data_matrix[idx,,drop=FALSE])
           }
         }
         private$.X <- X_acc(value)
@@ -198,16 +198,16 @@ MapperRef$set("active", "X",
 
 ## filter ----
 ## The filter function associated with the Mapper instance
-MapperRef$set("active", "filter", 
+MapperRef$set("active", "filter",
   function(value){
     if (missing(value)){ return(private$.filter) }
     else {
       # browser()
       if (is.matrix(value)){
         filter_acc <- function(filter_matrix){
-          function(idx=NULL){ 
+          function(idx=NULL){
             if (missing(idx) || is.null(idx)){ return(filter_matrix) }
-            return(filter_matrix[idx,,drop=FALSE]) 
+            return(filter_matrix[idx,,drop=FALSE])
           }
         }
         private$.filter <- filter_acc(value)
@@ -236,7 +236,7 @@ MapperRef$set("active", "measure",
 )
 
 ## initialize ----
-## Initialization method relies on active binding 
+## Initialization method relies on active binding
 MapperRef$set("public", "initialize", function(X = NULL){
   private$.simplicial_complex <- simplextree::simplex_tree()
   self$use_distance_measure(measure = "euclidean")
@@ -249,19 +249,19 @@ MapperRef$set("public", "initialize", function(X = NULL){
 #' @name use_data
 #' @title Sets the data
 #' @description Sets the data matrix to associate with the mapper.
-#' @param data either a matrix, a function, or a data set name. See details. 
+#' @param data either a matrix, a function, or a data set name. See details.
 #' @param ... additional parameters to pass to the filter function.
-#' @details This function sets the data for the Mapper to work on. If \code{data} is a string, 
-#' it must be one of the (illustrative) data sets listed in \code{data(package="Mapper")}. 
-#' Otherwise, \code{data} must be either a matrix of coordinate values, a function that returns a matrix of 
+#' @details This function sets the data for the Mapper to work on. If \code{data} is a string,
+#' it must be one of the (illustrative) data sets listed in \code{data(package="Mapper")}.
+#' Otherwise, \code{data} must be either a matrix of coordinate values, a function that returns a matrix of
 #' coordinate values.
 MapperRef$set("public", "use_data", function(X){
   if (is.character(X)){
     stopifnot(X %in% c("noisy_circle", "wvs_us_wave6"))
-    self$X <- local({ 
+    self$X <- local({
       load(system.file(sprintf("data/%s.Rdata", X), package="Mapper"))
       X <- eval(parse(text=X))
-      return(scale(as.matrix(X))) 
+      return(scale(as.matrix(X)))
     })
   } else { self$X <- X }
   return(invisible(self))
@@ -271,10 +271,10 @@ MapperRef$set("public", "use_data", function(X){
 #' @name use_filter
 #' @title Sets the filter
 #' @description Sets the map, or \emph{filter}, to associate with the instance
-#' @param filter either the filter name to use, a matrix, or a function. See details. 
+#' @param filter either the filter name to use, a matrix, or a function. See details.
 #' @param ... additional parameters to pass to the filter function.
-#' @details \code{filter} must be either a matrix of coordinate values, a function that returns a matrix of 
-#' coordinate values, or one of the following predefined filters: 
+#' @details \code{filter} must be either a matrix of coordinate values, a function that returns a matrix of
+#' coordinate values, or one of the following predefined filters:
 #' \itemize{
 #'   \item{\strong{PC}}{ Principle components (with \code{\link[stats:prcomp]{prcomp}})}
 #'   \item{\strong{IC}}{ Independent components (with \code{\link[fastICA:fastICA]{fastICA}})}
@@ -287,15 +287,15 @@ MapperRef$set("public", "use_data", function(X){
 #'   \item{\strong{UMAP}}{ Uniform Manifold Approximation and Projection (with \code{\link[umap:umap]{umap}})}
 #' }
 #' Nearly all the pre-configured filters essentially call functions in other packages with
-#' somewhat reasonable default parameters to perform the mapping. Any parameters supplied to \code{...} 
-#' are passed to the corresponding package function linked above, which override any default parameters. 
-#' If the package needed to compute the filter is not installed, a prompt is given asking the user 
+#' somewhat reasonable default parameters to perform the mapping. Any parameters supplied to \code{...}
+#' are passed to the corresponding package function linked above, which override any default parameters.
+#' If the package needed to compute the filter is not installed, a prompt is given asking the user
 #' whether they would like to install it. \cr
 #' \cr
-#' \strong{NOTE:} The predefined filters are meant to be used for exploratory or illustrative purposes only---this function 
+#' \strong{NOTE:} The predefined filters are meant to be used for exploratory or illustrative purposes only---this function
 #' is \emph{not} meant to act as a comprensive interface to the functions each filter corresponds too.
 MapperRef$set("public", "use_filter", function(filter=c("PC", "IC", "ECC", "KDE", "DTM", "MDS", "ISOMAP", "LE", "UMAP"), ...){
-  if (is.function(filter)){ private$.filter <- filter } 
+  if (is.function(filter)){ private$.filter <- filter }
   else if (is.matrix(filter)){
     self$filter <- filter
   } else if (is.numeric(filter) && is.vector(filter)){
@@ -323,13 +323,13 @@ MapperRef$set("public", "use_filter", function(filter=c("PC", "IC", "ECC", "KDE"
       }
       return(params)
     }
-    
+
     ## Basic filters
     if (filter == "PC"){
       default_params <- list(x = self$X(), scale. = TRUE, center = TRUE, rank. = 2L)
       params <- modifyList(default_params, given_params)
       res <- do.call(stats::prcomp, params)
-      self$filter <- matrix(res$x, ncol = params[["rank."]])  
+      self$filter <- matrix(res$x, ncol = params[["rank."]])
       # c("mapper_filter", "function")
     }
     else if (filter == "IC"){
@@ -338,15 +338,15 @@ MapperRef$set("public", "use_filter", function(filter=c("PC", "IC", "ECC", "KDE"
       params <- modifyList(default_params, given_params)
       res <- do.call(fastICA::fastICA, params)
       self$filter <- matrix(res$S, ncol = params[["n.comp"]]) ## S stores independent components
-    } 
+    }
     else if (filter == "ECC"){ ## eccentricity
       has_pd <- requireNamespace("parallelDist", quietly = TRUE)
       dist_f <- ifelse(has_pd, parallelDist::parallelDist, stats::dist)
       params <- modifyList(list(p=1), given_params)
       p_str <- c("manhattan", "euclidean", "maximum")[match(params$p, list(1, 2, Inf))]
-      if (params$p != Inf) { 
+      if (params$p != Inf) {
         self$filter <- matrix(colMeans(as.matrix(dist_f(self$X())^(1/params$p))), ncol = 1)
-      } else { self$filter <- matrix(apply(as.matrix(dist_f(self$X())), 2, max), ncol = 1) } 
+      } else { self$filter <- matrix(apply(as.matrix(dist_f(self$X())), 2, max), ncol = 1) }
     }
     else if (filter == "KDE"){
       require_but_ask("ks")
@@ -403,39 +403,39 @@ MapperRef$set("public", "use_filter", function(filter=c("PC", "IC", "ECC", "KDE"
 ## use_distance_measure ----
 #' @name use_distance_measure
 #' @title Assign a distance measure
-#' @description Assigns a distance measure to the \code{\link{MapperRef}} instance to use in the clustering algorithm. 
+#' @description Assigns a distance measure to the \code{\link{MapperRef}} instance to use in the clustering algorithm.
 #' @section Usage:
 #' \preformatted{ $use_distance_measure(measure, ...) }
-#' @section Arguments: 
+#' @section Arguments:
 #' \describe{
 #'   \item{\code{measure}}{The distance measure to use (string).}
 #'   \item{\code{...}}{Extra parameters passed to the distance function. See details. }
 #' }
-#' @section Details: Unless the \code{\link[Mapper:use_clustering_algorithm]{clustering_algorithm}} has been replaced by the user, 
-#' by default, Mapper requires a notion of distance between objects to be defined in creating the vertices of the construction. 
-#' 
+#' @section Details: Unless the \code{\link[Mapper:use_clustering_algorithm]{clustering_algorithm}} has been replaced by the user,
+#' by default, Mapper requires a notion of distance between objects to be defined in creating the vertices of the construction.
+#'
 #' The distance function is determined based on the supplied \code{measure}. \code{measure} must be one of:
 #' \preformatted{["euclidean", "maximum", "manhattan", "canberra", "binary", "minkowski"]}
 #' or, if the \pkg{proxy} and \code{\link[parallelDist]{parallelDist}} packages are installed, any name in \code{proxy::pr_DB$get_entry_names()}.\cr
 #' \cr
-#' Additional parameters passed via \code{...} are passed to either \code{\link[stats]{dist}} (or 
-#' \code{\link[parallelDist]{parallelDist}} if installed). 
+#' Additional parameters passed via \code{...} are passed to either \code{\link[stats]{dist}} (or
+#' \code{\link[parallelDist]{parallelDist}} if installed).
 #' @section Value:
-#' The mapper instance, with the measure field assigned. 
-#' @examples 
+#' The mapper instance, with the measure field assigned.
+#' @examples
 #' data(noisy_circle)
 #' m <- MapperRef$new(noisy_circle)
 #' m$use_filter(noisy_circle[,1])
 #' m$use_cover("fixed interval", number_intervals = 5, percent_overlap = 25)
-#' 
+#'
 #' ## Constructs clusters with euclidean metric (default)
 #' m$use_distance_measure("euclidean")
-#' m$construct_pullback() 
-#' 
+#' m$construct_pullback()
+#'
 #' ## Constructs clusters with p-norm (p = 1)
 #' m$use_distance_measure("Minkowski", p = 1L)
-#' m$construct_pullback() 
-#' 
+#' m$construct_pullback()
+#'
 #' \dontrun{
 #' ## To see list of available measures, use:
 #' proxy::pr_DB$get_entry_names()
@@ -448,26 +448,26 @@ MapperRef$set("public", "use_distance_measure", function(measure, ...){
 })
 
 ## use_cover ----
-#' @name use_cover 
-#' @title Selects one of the available covering methods to use.  
-#' @description 
-#' Convenience method to select, parameterize, and construct a cover and associate it with the calling objects \code{cover} field.   
-#' @param cover Either a pre-configured cover, or name of the cover to use. 
-#' @param ... Additional parameter values to pass to the covering generators initialize method. 
-#' @details Every \code{\link{MapperRef}} object requires a \code{\link{CoverRef}} object as 
-#' is \code{cover} member field. In the context of Mapper, a cover is used to discretize the filter 
-#' space into a partition, which is then used via a \emph{pullback} operation to construct the vertices. \cr 
-#' \cr 
-#' The \code{\link{MapperRef}} class makes no restrictions on the cover that is used; only that it fulfills the 
-#' requirements of being a valid \code{\link{CoverRef}} instance (e.g. a \code{\link{FixedIntervalCover}}), 
-#' or one of the cover typenames listed in the \code{\link{covers_available}()}. 
-#' If a typename is given, the cover is automatically constructed before being assigned to the \code{$cover} field.  
-#' @examples 
+#' @name use_cover
+#' @title Selects one of the available covering methods to use.
+#' @description
+#' Convenience method to select, parameterize, and construct a cover and associate it with the calling objects \code{cover} field.
+#' @param cover Either a pre-configured cover, or name of the cover to use.
+#' @param ... Additional parameter values to pass to the covering generators initialize method.
+#' @details Every \code{\link{MapperRef}} object requires a \code{\link{CoverRef}} object as
+#' is \code{cover} member field. In the context of Mapper, a cover is used to discretize the filter
+#' space into a partition, which is then used via a \emph{pullback} operation to construct the vertices. \cr
+#' \cr
+#' The \code{\link{MapperRef}} class makes no restrictions on the cover that is used; only that it fulfills the
+#' requirements of being a valid \code{\link{CoverRef}} instance (e.g. a \code{\link{FixedIntervalCover}}),
+#' or one of the cover typenames listed in the \code{\link{covers_available}()}.
+#' If a typename is given, the cover is automatically constructed before being assigned to the \code{$cover} field.
+#' @examples
 #' data(noisy_circle)
 #' m <- MapperRef$new(noisy_circle)
 #' m$use_filter(noisy_circle[,1])
 #' m$use_cover("fixed interval", number_intervals = 5, percent_overlap = 25)
-#' 
+#'
 #' ## Alternative way to specify (and construct) the cover
 #' cover <- FixedIntervalCover$new(number_intervals = 5, percent_overlap = 25)
 #' cover$construct_cover(filter = m$filter)
@@ -475,73 +475,75 @@ MapperRef$set("public", "use_distance_measure", function(measure, ...){
 MapperRef$set("public", "use_cover", function(cover="fixed interval", ...){
   stopifnot(!is.null(self$filter))
   if (missing(cover)){ cover <- "fixed interval"}
-  self$cover <- switch(cover, 
-    "fixed interval"=FixedIntervalCover$new(...)$construct_cover(self$filter), 
+  self$cover <- switch(cover,
+    "fixed interval"=FixedIntervalCover$new(...)$construct_cover(self$filter),
     "restrained interval"=RestrainedIntervalCover$new(...)$construct_cover(self$filter),
     # "adaptive"=AdaptiveCover$new(...)$construct_cover(self$filter),
     "ball"=BallCover$new(...)$construct_cover(self$filter),
+    "ys_ball"=YS_BallCover$new(...)$construct_cover(self$filter),
     stop(sprintf("Unknown cover type: %s, please specify a cover typename listed in `covers_available()`", cover))
   )
+  print("leaving use cover")
   invisible(self)
 })
-#@param filter_values (n x d) numeric matrix of values giving the results of the map. 
+#@param filter_values (n x d) numeric matrix of values giving the results of the map.
 
 ## use_clustering_algorithm ----
-#' @name use_clustering_algorithm 
-#' @title Sets the clustering algorithm 
+#' @name use_clustering_algorithm
+#' @title Sets the clustering algorithm
 #' @description Sets the clustering algorithm used to construct the connected components in the pullback cover.
 #' @param cl Either one of the link criteria used in the \code{\link[stats]{hclust}} function (string) or a function. See details.
 #' @param cutoff_method Type of heuristic to determine cut value. See details. Ignored is \code{cl} is a function.
-#' @param ... Additional parameters passed as defaults to the cutoff method. See details. 
-#' @details If \code{cl} is a linkage criterion, a standard hierarchical clustering algorithm is used with suitable default parameters. 
-#' If it is a function, it must have a signature compatible with \code{function(pid, idx, self, ...)} where 
-#' \code{pid} is the index of the (pullback) set to cluster (see \code{CoverRef}),  
+#' @param ... Additional parameters passed as defaults to the cutoff method. See details.
+#' @details If \code{cl} is a linkage criterion, a standard hierarchical clustering algorithm is used with suitable default parameters.
+#' If it is a function, it must have a signature compatible with \code{function(pid, idx, self, ...)} where
+#' \code{pid} is the index of the (pullback) set to cluster (see \code{CoverRef}),
 #' \code{idx} are the indices of the points in \code{X} to cluster on, and
-#' \code{self} is the \code{MapperRef} instance environment. 
+#' \code{self} is the \code{MapperRef} instance environment.
 #' \cr
 #' The cutoff method may be one of either c("continuous", "histogram"). See \code{\link{cutoff_first_bin}} and \code{\link{cutoff_first_threshold}}
-#' for what they each correspond to, respectively. Additional named parameters passed via \code{...} act as defaults to the cutting method chosen. 
-#' If not chosen, reasonable defaults are used. \cr 
+#' for what they each correspond to, respectively. Additional named parameters passed via \code{...} act as defaults to the cutting method chosen.
+#' If not chosen, reasonable defaults are used. \cr
 #' \cr
-#' Additional named parameters passed via the dots in \code{$clustering_algorithm} 
-#' will taken precedence and override all previously set default settings. 
-MapperRef$set("public", "use_clustering_algorithm", 
-  function(cl = c("single", "ward.D", "ward.D2", "complete", "average", "mcquitty", "median", "centroid"), 
+#' Additional named parameters passed via the dots in \code{$clustering_algorithm}
+#' will taken precedence and override all previously set default settings.
+MapperRef$set("public", "use_clustering_algorithm",
+  function(cl = c("single", "ward.D", "ward.D2", "complete", "average", "mcquitty", "median", "centroid"),
            cutoff_method = c("continuous", "histogram"),
            ...){
     ## Use a linkage criterion + cutting rule
     default_cutting_params <- list(...)
     if (missing(cl)) { cl <- "single" }
-    if (missing(cutoff_method)){ cutoff_method <- "continuous" } 
+    if (missing(cutoff_method)){ cutoff_method <- "continuous" }
     if (class(cl) == "character"){
       hclust_opts <- c("single", "ward.D", "ward.D2", "complete", "average", "mcquitty", "median", "centroid")
       if (!cl %in% hclust_opts){ stop(sprint("Unknown linkage method passed. Please use one of (%s). See ?hclust for details.", paste0(hclust_opts, collapse = ", "))) }
       stopifnot(is.character(self$measure))
-      ## Closured representation to substitute default parameters. 
+      ## Closured representation to substitute default parameters.
       create_cl <- function(cl, cutoff_method, cut_defaults){
         cutoff_f <- switch(cutoff_method, "histogram"=cutoff_first_bin, "continuous"=cutoff_first_threshold)
         function(pid, idx, self, ...){
           if (is.null(idx) || length(idx) == 0){ return(integer(0L)) }
           if (length(idx) <= 2L){ return(rep(1L, length(idx))); }
           override_params <- list(...)
-          
+
           ## Use parallelDist package if available
           has_pd <- requireNamespace("parallelDist", quietly = TRUE)
           dist_f <- ifelse(has_pd, parallelDist::parallelDist, stats::dist)
           dist_params <- list(x=self$X(idx), method=tolower(self$measure))
           if (!is.null(private$.measure_opt)){ dist_params <- append(dist_params, private$.measure_opt) }
           dist_x <- do.call(dist_f, dist_params)
-          
-          ## Use fastcluster if available 
+
+          ## Use fastcluster if available
           has_fc <- requireNamespace("fastcluster", quietly = TRUE)
           cl_f <- ifelse(has_fc, fastcluster::hclust, stats::hclust)
           hcl <- do.call(cl_f, list(d=dist_x, method=cl))
-          
-          cutoff_params <- if (cutoff_method == "histogram"){ 
-            list(hcl = hcl, num_bins = 10L) 
+
+          cutoff_params <- if (cutoff_method == "histogram"){
+            list(hcl = hcl, num_bins = 10L)
           } else { list(hcl = hcl, threshold = 0.0) }
           cutoff_params <- modifyList(modifyList(cutoff_params, cut_defaults), override_params)
-        
+
           ## Use heuristic cutting value to produce the partitioning
           eps <- do.call(cutoff_f, cutoff_params)
           return(cutree(hcl, h = eps))
@@ -556,18 +558,18 @@ MapperRef$set("public", "use_clustering_algorithm",
 )
 
 ## construct_pullback ----
-#' @name construct_pullback 
-#' @title Constructs the (decomposed) pullback cover. 
-#' @description Executes the clustering algorithm on subsets of \code{X}, 
+#' @name construct_pullback
+#' @title Constructs the (decomposed) pullback cover.
+#' @description Executes the clustering algorithm on subsets of \code{X},
 #' @param pullback_ids indices of the \code{\link[Mapper:CoverRef]{covers}} \code{index_set}, or \code{NULL}.
 #' @param ... additional parameters to pass to the \code{clustering_algorithm}.
-#' @details This methods uses the function given by \code{clustering_algorithm} field to 
-#' decompose the preimages returned by the \code{cover} member into connected components, which are 
-#' stored as \code{vertices}. Indices may be passed to limit which sets are decomposed, otherwise 
+#' @details This methods uses the function given by \code{clustering_algorithm} field to
+#' decompose the preimages returned by the \code{cover} member into connected components, which are
+#' stored as \code{vertices}. Indices may be passed to limit which sets are decomposed, otherwise
 #' the sets in the cover all considered.
 #' \cr
-#' Note that this method removes the \code{vertices} associated with \code{pullback_ids}, but does not 
-#' modify the simplicial complex. 
+#' Note that this method removes the \code{vertices} associated with \code{pullback_ids}, but does not
+#' modify the simplicial complex.
 #' @seealso \code{\link{construct_k_skeleton}} \code{\link{construct_nerve}} \code{\link{use_clustering_algorithm}}
 MapperRef$set("public", "construct_pullback", function(pullback_ids=NULL, ...){
   stopifnot(!is.null(self$cover$level_sets))
@@ -575,7 +577,7 @@ MapperRef$set("public", "construct_pullback", function(pullback_ids=NULL, ...){
   pids_supplied <- (!missing(pullback_ids) && !is.null(pullback_ids))
   if (pids_supplied){ stopifnot(all(pullback_ids %in% self$cover$index_set)) }
   pullback_ids <- if (pids_supplied){ pullback_ids } else { self$cover$index_set }
-  
+  print("constructing pullback")
   ## If specific pullback ids not given, then resist the pullback mapping
   if (!pids_supplied || length(private$.pullback) == 0){
     n_sets <- length(self$cover$index_set)
@@ -583,14 +585,16 @@ MapperRef$set("public", "construct_pullback", function(pullback_ids=NULL, ...){
     self$vertices <- list()
     private$.pullback <- structure(replicate(n_sets, integer(0)), names = self$cover$index_set)
   }
-  
-  ## Update the vertices for the given level sets. This requires updating both the simplex tree's 
-  ## internal representation as well as the outward-facing vertex list. The new vertices are returned 
-  ## to replace the current list. The pullback map is also updated. 
+  print("after if")
+  ## Update the vertices for the given level sets. This requires updating both the simplex tree's
+  ## internal representation as well as the outward-facing vertex list. The new vertices are returned
+  ## to replace the current list. The pullback map is also updated.
   calc_preimage <- function(){
+    print("begin preimage")
     function(index){ self$cover$construct_cover(self$filter, index) }
   }
   partial_cluster <- function(...){
+    print("begin cluster")
     extra <- list(...)
     function(pid, idx){ do.call(self$clustering_algorithm, append(list(pid, idx, self), extra)) }
   }
@@ -603,29 +607,29 @@ MapperRef$set("public", "construct_pullback", function(pullback_ids=NULL, ...){
   #   private$.simplicial_complex$insert(as.list(ids))
   #   return(ids)
   # }
-  
+  print("decomposing")
   ## Do the decomposition
   private$.vertices <- Mapper:::decompose_preimages(
     pullback_ids = as.character(pullback_ids),
-    cluster_f = partial_cluster(...), 
-    level_set_f = calc_preimage(), 
+    cluster_f = partial_cluster(...),
+    level_set_f = calc_preimage(),
     vertices = private$.vertices,
     # id_generator = id_gen,
     pullback = private$.pullback
   )
-  
+  print("made pullback")
   ## Return self
   invisible(self)
 })
 
 ## construct_nerve ----
-#' @name construct_nerve 
-#' @title Compute the nerve of the cover. 
-#' @description Computes (or updates) the k-simplices composing the Mapper, where k >= 0. 
-#' @param k The order of the simplices to construct. See details.  
+#' @name construct_nerve
+#' @title Compute the nerve of the cover.
+#' @description Computes (or updates) the k-simplices composing the Mapper, where k >= 0.
+#' @param k The order of the simplices to construct. See details.
 #' @param indices (n x k) matrix of indices of the covers index set to update.
 #' @param min_weight minimum intersection size to consider as a simplex. Defaults to 1.
-#' @details Compared to \code{construct_k_skeleton}, this method \emph{only} intersections 
+#' @details Compared to \code{construct_k_skeleton}, this method \emph{only} intersections
 #' between (k-1) simplices in the complex.
 MapperRef$set("public", "construct_nerve", function(k, indices = NULL, min_weight=1L, modify=TRUE){
   stopifnot(length(private$.vertices) > 0)
@@ -639,49 +643,49 @@ MapperRef$set("public", "construct_nerve", function(k, indices = NULL, min_weigh
 
   ## If k==0 is specified, just build the vertices
   stree_ptr <- private$.simplicial_complex$as_XPtr()
-  if (k == 0){ 
-    if (!modify){ return(as.integer(names(self$vertices))) } 
+  if (k == 0){
+    if (!modify){ return(as.integer(names(self$vertices))) }
     build_0_skeleton(as.integer(names(self$vertices)), stree_ptr)
     return(invisible(self))
   }
-  
-  ## Retrieve the valid level set index pairs to compare. In the worst case, with no cover-specific optimization, 
+
+  ## Retrieve the valid level set index pairs to compare. In the worst case, with no cover-specific optimization,
   ## this may just be all pairwise combinations of LSFI's for the full simplicial complex.
   indices <- if (idx_specified){ indices } else { self$cover$neighborhood(self$filter, k) }
-  
+
   ## If no indices to compute, nothing to do for k > 1. return self invisibly.
   if (is.null(indices)){ return(invisible(self)) }
-  
+
   ## Build the parameter list
   params <- list(pullback_ids = indices, vertices = self$vertices, pullback = private$.pullback, stree = stree_ptr, modify = modify)
-  if (k == 1){ 
-    params <- modifyList(params, list(min_sz = min_weight)) 
-    if (!params$modify){ return(do.call(rbind, do.call(build_1_skeleton, params)) ) } 
+  if (k == 1){
+    params <- modifyList(params, list(min_sz = min_weight))
+    if (!params$modify){ return(do.call(rbind, do.call(build_1_skeleton, params)) ) }
     do.call(build_1_skeleton, params)
     return(invisible(self))
-  } else { 
-    params <- modifyList(params, list(k = k)) 
-    if (!params$modify){ return(do.call(rbind, do.call(build_k_skeleton, params)) ) } 
+  } else {
+    params <- modifyList(params, list(k = k))
+    if (!params$modify){ return(do.call(rbind, do.call(build_k_skeleton, params)) ) }
     do.call(build_k_skeleton, params)
     return(invisible(self))
-  }  
+  }
 })
 
 ## construct_k_skeleton ----
-#' @name construct_k_skeleton 
+#' @name construct_k_skeleton
 #' @title Constructs the k-skeleton
 #' @param k the maximal dimension to consider.
 #' @description Computes the k-skeleton of the mapper by computing the nerve of the pullback of \code{cover} member.
-#' @details 
-#' The primary output of the Mapper method is a simplicial complex. With \code{\link{MapperRef}} objects, 
-#' the simplicial complex is stored as a \code{\link[simplextree]{simplextree}}. The underlying complex does not need to be modified by the user, i.e. is completely maintained by \code{\link{MapperRef}} methods 
-#' (e.g. this method, \code{\link{construct_nerve}}, etc.). \cr 
-#' \cr 
-#' This function computes the k-skeleton inductively, e.g. by first computing the vertices, 
-#' then the edges, etc. up to the dimension specified. A check is performed to ensure the pullback has been decomposed, and 
+#' @details
+#' The primary output of the Mapper method is a simplicial complex. With \code{\link{MapperRef}} objects,
+#' the simplicial complex is stored as a \code{\link[simplextree]{simplextree}}. The underlying complex does not need to be modified by the user, i.e. is completely maintained by \code{\link{MapperRef}} methods
+#' (e.g. this method, \code{\link{construct_nerve}}, etc.). \cr
+#' \cr
+#' This function computes the k-skeleton inductively, e.g. by first computing the vertices,
+#' then the edges, etc. up to the dimension specified. A check is performed to ensure the pullback has been decomposed, and
 #' if not, then \code{\link{construct_pullback}} is called. \cr
 #' \cr
-#' For an algorithmic description of this process, see Singh et. al, section 3.2. 
+#' For an algorithmic description of this process, see Singh et. al, section 3.2.
 #' @references Gurjeet Singh, Facundo Mémoli, and Gunnar Carlsson. "Topological methods for the analysis of high dimensional data sets and 3d object recognition." SPBG. 2007.
 MapperRef$set("public", "construct_k_skeleton", function(k=1L){
   stopifnot(k >= 0)
@@ -705,19 +709,19 @@ MapperRef$set("public", "format", function(...){
 })
 
 ## as_igraph ----
-#' @name as_igraph 
+#' @name as_igraph
 #' @title Exports Mapper as an igraph object.
 #' @description Exports the 1-skeleton to a graph using the igraph library.
-#' @param vertex_scale scaling function for the vertex sizes. 
-#' @param vertex_min minimum vertex size. 
-#' @param vertex_min maximum vertex size. 
-#' @param col_pal color palette to color the vertices by. 
-#' @details This method converts the 1-skeleton of the Mapper to an igraph object, and assigns some 
-#' default visual properties. Namely, the vertex attributes "color", "size", and "label" and the 
-#' edge attribute "color" are assigned. 
-#' The vertex colors are colored according on the given color palette (default is rainbow) according 
-#' to their mean filter value (see \code{\link{bin_color}}). The vertex sizes are scaled according 
-#' to the number of points they contain, scaled by \code{vertex_scale}, and bounded between 
+#' @param vertex_scale scaling function for the vertex sizes.
+#' @param vertex_min minimum vertex size.
+#' @param vertex_min maximum vertex size.
+#' @param col_pal color palette to color the vertices by.
+#' @details This method converts the 1-skeleton of the Mapper to an igraph object, and assigns some
+#' default visual properties. Namely, the vertex attributes "color", "size", and "label" and the
+#' edge attribute "color" are assigned.
+#' The vertex colors are colored according on the given color palette (default is rainbow) according
+#' to their mean filter value (see \code{\link{bin_color}}). The vertex sizes are scaled according
+#' to the number of points they contain, scaled by \code{vertex_scale}, and bounded between
 #' (\code{vertex_min}, \code{vertex_max}). The vertex labels are in the format "<id>:<size>".\cr
 #' \cr
 #' The edges are colored similarly by the average filter value of the points intersecting
@@ -728,15 +732,15 @@ MapperRef$set("public", "as_igraph", function(vertex_scale=c("linear", "log"), v
   am <- private$.simplicial_complex$as_adjacency_matrix()
   colnames(am) <- as.character(private$.simplicial_complex$vertices)
   G <- igraph::graph_from_adjacency_matrix(am, mode = "undirected", add.colnames = NULL) ## NULL makes named vertices
-  
+
   ## Coloring + aggregation functions
-  agg_val <- function(lst) { sapply(sapply(lst, function(idx){ rowMeans(self$filter(idx)) }), mean) } 
-  
+  agg_val <- function(lst) { sapply(sapply(lst, function(idx){ rowMeans(self$filter(idx)) }), mean) }
+
   ## Aggregate node filter values
   v_idx <- match(private$.simplicial_complex$vertices, as.integer(names(self$vertices)))
   agg_node_val <- agg_val(private$.vertices)
   igraph::vertex_attr(G, name = "color") <- bin_color(agg_node_val[v_idx], col_pal = col_pal)
-  
+
   ## Extract indices in the edges
   edges <- igraph::as_edgelist(G)
   edge_idx <- lapply(seq(nrow(edges)), function(i){
@@ -745,9 +749,9 @@ MapperRef$set("public", "as_igraph", function(vertex_scale=c("linear", "log"), v
   })
   agg_edge_val <- agg_val(edge_idx)
   igraph::edge_attr(G, name = "color") <- bin_color(agg_edge_val, col_pal = col_pal)
-  
+
   ## Normalize between 0-1, unless all the same
-  normalize <- function(x) { 
+  normalize <- function(x) {
     if (all(x == x[1])){ return(rep(1, length(x))) }
     else {  (x - min(x))/(max(x) - min(x)) }
   }
@@ -784,75 +788,75 @@ MapperRef$set("public", "as_igraph", function(vertex_scale=c("linear", "log"), v
 #   top_vids <- names(m$vertices)[order(v_len, decreasing = TRUE)][1:v_idx]
 #   avg_v_f <- sapply(vids, function(vid) { mean(rowMeans(m$filter(m$vertices[[vid]]))) })
 #   row_col <- bin_color(avg_v_f)[match(top_vids, vids)]
-#   # list("matrix_rows, colors = c(Boston = "green", NYC = "navy", LA = "purple"), 
+#   # list("matrix_rows, colors = c(Boston = "green", NYC = "navy", LA = "purple"),
 #   # alpha = 0.5)))
 #   vids_int <- as.integer(top_vids)
-#   
-#   
+#
+#
 #   ## Color intersections by f
 #   res_f <- m$simplicial_complex$ltraverse(empty_face, function(simplex){
 #     if (all(simplex %in% vids_int)){
-#       list(simplex = simplex, f_val = mean(avg_v_f[match(simplex, vids)])) 
+#       list(simplex = simplex, f_val = mean(avg_v_f[match(simplex, vids)]))
 #     }
-#   }, type = "dfs") 
+#   }, type = "dfs")
 #   res_f <- Filter(function(x) { !is.null(x) }, res_f)
-#   params <- lapply(res_f, function(el){ 
-#     list(query = intersects, 
+#   params <- lapply(res_f, function(el){
+#     list(query = intersects,
 #          params = as.list(paste0("v", as.character(el$simplex))),
-#          color = bin_color(avg_v_f)[match(el$simplex, vids)], 
+#          color = bin_color(avg_v_f)[match(el$simplex, vids)],
 #          active = FALSE
 #     )
 #   }) # sapply(params, function(p) unlist(p[[2]]))
-#   
+#
 #   # queries = list(list(query = intersects, params = list("Drama"), color = "red", active = F)
 #   us_df <- as.data.frame(x_groups)
 #   meta_data <- data.frame(sets=as.factor(paste0("v", top_vids)))
 #   meta_data$cat_id <- as.character(paste0("v", top_vids))
 #   id_color_map <- structure(row_col, names=as.character(meta_data$cat_id))
-#   UpSetR::upset(data = us_df, nsets = v_idx, 
-#                 sets.x.label = "Node size", 
-#                 scale.intersections = "identity", 
+#   UpSetR::upset(data = us_df, nsets = v_idx,
+#                 sets.x.label = "Node size",
+#                 scale.intersections = "identity",
 #                 set.metadata = list(
 #                   data=meta_data,
 #                   plots=list(list(type="matrix_rows", column="cat_id", colors=id_color_map, alpha=0.5))
 #                 )
 #   )
-#                   
-#                 # queries = wut, 
+#
+#                 # queries = wut,
 #                 # queries = list(list(query = intersects, params = list("v43"), color = "red", active = F)),
-#                 #queries = list(list(query = intersects, params = list("v43", "v37"), color = "red", active = F)), 
+#                 #queries = list(list(query = intersects, params = list("v43", "v37"), color = "red", active = F)),
 #                 mb.ratio = c(0.35, 0.65))
 #   sets.bar.color = row_col
-#   
-# 
-#   
-# 
+#
+#
+#
+#
 # })
 
 
 # MapperRef$set("public", "as_grapher", function(construct_widget=TRUE, ...){
 #   requireNamespace("grapher", quietly = TRUE)
-#   
-#   ## Make the igraph 
+#
+#   ## Make the igraph
 #   am <- private$.simplicial_complex$as_adjacency_matrix()
-#   G <- igraph::graph_from_adjacency_matrix(am, mode = "undirected", add.colnames = NA) 
+#   G <- igraph::graph_from_adjacency_matrix(am, mode = "undirected", add.colnames = NA)
 #   json_config <- grapher::getDefaultJsonConfig(network=G)
-# 
+#
 #   ## Color nodes and edges by a default rainbow palette
 #   rbw_pal <- rev(rainbow(100, start = 0, end = 4/6))
-#   agg_node_val <- sapply(sapply(private$.vertices, function(v_idx){ 
+#   agg_node_val <- sapply(sapply(private$.vertices, function(v_idx){
 #     apply(as.matrix(self$cover$filter_values[v_idx,]), 1, mean)
 #   }), mean)
 #   binned_idx <- cut(agg_node_val, breaks = 100, labels = F)
 #   vertex_colors <- rbw_pal[binned_idx]
-#   vertex_sizes <- sapply(private$.vertices, length) 
-#   
+#   vertex_sizes <- sapply(private$.vertices, length)
+#
 #   ## Normalize between 0-1, unless all the same
-#   normalize <- function(x) { 
+#   normalize <- function(x) {
 #     if (all(x == x[1])){ return(rep(1, length(x))) }
 #     else {  (x - min(x))/(max(x) - min(x)) }
 #   }
-#     
+#
 #   ## Create the vertices. By default, color them on a rainbow palette according to their mean filter value.
 #   if (length(igraph::V(G)) > 0){
 #     vertex_radii <- (7L - 2L)*normalize(vertex_sizes) + 2L
@@ -865,7 +869,7 @@ MapperRef$set("public", "as_igraph", function(vertex_scale=c("linear", "log"), v
 #   } else {
 #     json_config$nodes <- integer(length = 0L)
 #   }
-#     
+#
 #   ## Create the edges w/ a similar coloring scheme.
 #   if (length(igraph::E(G)) > 0){
 #     el <- igraph::as_edgelist(G, names = FALSE)
@@ -873,14 +877,14 @@ MapperRef$set("public", "as_igraph", function(vertex_scale=c("linear", "log"), v
 #     edge_links <- matrix(apply(el, 2, as.integer) - 1L, ncol = 2)
 #     json_config$links$from <- edge_links[,1]
 #     json_config$links$to <- edge_links[,2]
-#     json_config$links$color <- substr(rbw_pal[edge_binned_idx], start = 0, stop = 7) 
+#     json_config$links$color <- substr(rbw_pal[edge_binned_idx], start = 0, stop = 7)
 #   } else {
 #     json_config$links <- integer(length = 0L)
 #   }
-#     
-#   ## Return the grapher instance or just the json config if requested 
+#
+#   ## Return the grapher instance or just the json config if requested
 #   if (construct_widget){
-#     grapher::grapher(json_config) 
+#     grapher::grapher(json_config)
 #   } else {
 #     return(json_config)
 #   }
@@ -889,22 +893,22 @@ MapperRef$set("public", "as_igraph", function(vertex_scale=c("linear", "log"), v
 ## as_pixiplex ----
 #' @name as_pixiplex
 #' @title Exports the complex as a pixiplex object.
-#' @description Uses the pixiplex library. 
+#' @description Uses the pixiplex library.
 MapperRef$set("public", "as_pixiplex", function(...){
   requireNamespace("pixiplex", quietly = TRUE)
   pp <- pixiplex::pixiplex(private$.simplicial_complex)
-  
+
   ## Default styling
   rbw_pal <- rev(rainbow(100, start = 0, end = 4/6))
-  agg_node_val <- sapply(sapply(private$.vertices, function(v_idx){ 
+  agg_node_val <- sapply(sapply(private$.vertices, function(v_idx){
     rowMeans(self$filter(v_idx))
   }), mean)
   binned_idx <- cut(agg_node_val, breaks = 100, labels = F)
   vertex_colors <- rbw_pal[binned_idx]
-  vertex_sizes <- sapply(private$.vertices, length) 
-  
+  vertex_sizes <- sapply(private$.vertices, length)
+
   ## Normalize between 0-1, unless all the same
-  normalize <- function(x) { 
+  normalize <- function(x) {
     if (all(x == x[1])){ return(rep(1, length(x))) }
     else {  (x - min(x))/(max(x) - min(x)) }
   }
@@ -919,20 +923,20 @@ MapperRef$set("public", "as_pixiplex", function(...){
 #' @title Exports minimal information about the Mapper
 #' @description This function exports a few core characteristics about the mapper complex as a list.
 #' @param graph_type export preference on the structure the graph output.
-#' @return List with the following members: 
+#' @return List with the following members:
 #' \itemize{
 #'   \item \emph{vertices} list of the indices of the original data the current vertex intersects with.
 #'   \item \emph{graph} some adjacency representation of the mapper graph.
 #'   \item \emph{pullback} map connecting the index set of the cover and the vertices.
 #' }
-#' @details \code{graph_type} must be one of 'adjacency_matrix', 'adjacency_list', or 'edgelist'. 
+#' @details \code{graph_type} must be one of 'adjacency_matrix', 'adjacency_list', or 'edgelist'.
 MapperRef$set("public", "exportMapper", function(graph_type=c("adjacency_matrix", "adjacency_list", "edgelist")){
   result <- list(pullback = private$.pullback, vertices = private$.vertices)
   if (missing(graph_type)){ graph_type <- "adjacency_matrix" }
-  result$graph <- switch(graph_type, 
+  result$graph <- switch(graph_type,
                          "adjacency_matrix"=self$simplicial_complex$as_adjacency_matrix(),
-                         "adjacency_list"=self$simplicial_complex$as_adjacency_list(), 
-                         "edgelist"=self$simplicial_complex$as_edge_list(), 
+                         "adjacency_list"=self$simplicial_complex$as_adjacency_list(),
+                         "edgelist"=self$simplicial_complex$as_edge_list(),
                          stop("'graph_type' must be one of: 'adjacency_matrix', 'adjacency_list', 'edgelist'"))
   n_simplices <- self$simplicial_complex$n_simplices
   x_dim <- ncol(self$X())
@@ -944,4 +948,3 @@ MapperRef$set("public", "exportMapper", function(graph_type=c("adjacency_matrix"
   class(result) <- "Mapper"
   return(result)
 })
-
diff --git a/src/landmarks.cpp b/src/landmarks.cpp
index 19e1588..b414b17 100644
--- a/src/landmarks.cpp
+++ b/src/landmarks.cpp
@@ -12,7 +12,7 @@ inline double sq_dist(const NumericVector& x, const NumericVector& y){
 }
 
 // Eccentricity
-// Computes a distance measure from the barycenter of the data, or from a given center, if provided. 
+// Computes a distance measure from the barycenter of the data, or from a given center, if provided.
 // [[Rcpp::export]]
 NumericVector eccentricity(const NumericMatrix& from, const NumericMatrix& x, const int type = 1) {
   if (from.ncol() != x.ncol()){ stop("Matrices must have identical number of columns."); }
@@ -29,25 +29,26 @@ NumericVector eccentricity(const NumericMatrix& from, const NumericMatrix& x, co
   return(wrap(pt_ecc));
 }
 
-// Uses the euclidean maxmin procedure to choose n landmarks. 
+// Uses the euclidean maxmin procedure to choose n landmarks.
 // [[Rcpp::export]]
 IntegerVector landmark_maxmin(const NumericMatrix& x, const int n, const int seed = 0) {
   const size_t n_pts = x.nrow();
   std::vector< double > lm_dist(n_pts, std::numeric_limits<double>::infinity());
   IntegerVector landmark_idx = no_init_vector(n);
-  landmark_idx[seed] = 0; 
+  //landmark_idx[seed] = 0;
+  landmark_idx[0] = seed;
   IntegerVector::iterator c_landmark = landmark_idx.begin();
   double new_min_dist;
   std::generate(landmark_idx.begin()+1, landmark_idx.end(), [&](){
     size_t i = 0;
     new_min_dist = std::numeric_limits<double>::infinity();
-    
+
     // Replace nearest landmark distances
     std::replace_if(lm_dist.begin(), lm_dist.end(), [&i, &x, &c_landmark, &new_min_dist](const double c_dist){
       new_min_dist = sq_dist(x.row(*c_landmark), x.row(i++));
       return(new_min_dist < c_dist);
     }, new_min_dist);
-    
+
     // Find the point that maximizes said distances, move to next landmark
     c_landmark++;
     auto max_landmark = std::max_element(lm_dist.begin(), lm_dist.end());

From 4332a690b1b1fb8cc0b603a9573152e0fbad672a Mon Sep 17 00:00:00 2001
From: yaraskaf <56359026+yaraskaf@users.noreply.github.com>
Date: Sun, 23 Feb 2020 21:45:42 -0500
Subject: [PATCH 02/25] chore: add Mapper.dll to .gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 097b3de..7bb888c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -49,3 +49,4 @@ ignore/
 doc
 Meta
 *cache
+src/Mapper.dll

From 05b143f033e5bb4f4bdd356fe335750bc0e4eb19 Mon Sep 17 00:00:00 2001
From: yaraskaf <56359026+yaraskaf@users.noreply.github.com>
Date: Sun, 23 Feb 2020 21:52:05 -0500
Subject: [PATCH 03/25] refactor(LandmarkBallCover): rename YS_BallCover to
 LandmarkBallCover

---
 R/{YS_BallCover.R => LandmarkBallCover.R} | 12 ++++++------
 R/mapperRef.R                             |  2 +-
 2 files changed, 7 insertions(+), 7 deletions(-)
 rename R/{YS_BallCover.R => LandmarkBallCover.R} (95%)

diff --git a/R/YS_BallCover.R b/R/LandmarkBallCover.R
similarity index 95%
rename from R/YS_BallCover.R
rename to R/LandmarkBallCover.R
index b845746..d4f8fcc 100644
--- a/R/YS_BallCover.R
+++ b/R/LandmarkBallCover.R
@@ -15,14 +15,14 @@ library(proxy)
 
 # TODO: make default spec with default seed 1l -> user can use default 1 or specify other seed
 # Seed methods: SPEC, RAND, ECC (or specify seed_index, unspecified uses the first index)
-YS_BallCover <- R6::R6Class(
-  classname = "YS_BallCover",
+LandmarkBallCover <- R6::R6Class(
+  classname = "LandmarkBallCover",
   inherit = CoverRef,
   public = list(epsilon=NULL, num_sets=NULL, seed_index=1, seed_method="SPEC")
 )
 
 ## initialize ------
-YS_BallCover$set("public", "initialize", function(...){
+LandmarkBallCover$set("public", "initialize", function(...){
   super$initialize(typename="ys_ball")
   params <- list(...)
   if ("epsilon" %in% names(params)){ self$epsilon <- params[["epsilon"]] }
@@ -32,7 +32,7 @@ YS_BallCover$set("public", "initialize", function(...){
 })
 
 ## validate ------
-YS_BallCover$set("public", "validate", function(filter){
+LandmarkBallCover$set("public", "validate", function(filter){
   stopifnot(!is.null(self$epsilon) || !is.null(self$num_sets))
   writeLines("\n(eps, num_sets, seed_index, seed_method):\n")
   print(self$epsilon)
@@ -42,7 +42,7 @@ YS_BallCover$set("public", "validate", function(filter){
 })
 
 ## format ----
-YS_BallCover$set("public", "format", function(...){
+LandmarkBallCover$set("public", "format", function(...){
   titlecase <- function(x){
     s <- strsplit(x, " ")[[1]]
     paste(toupper(substring(s, 1, 1)), substring(s, 2), sep = "", collapse = " ")
@@ -56,7 +56,7 @@ YS_BallCover$set("public", "format", function(...){
 })
 
 ## construct_cover ------
-YS_BallCover$set("public", "construct_cover", function(filter, index=NULL){
+LandmarkBallCover$set("public", "construct_cover", function(filter, index=NULL){
   writeLines("Index:")
   print(index)
   print("constructing")
diff --git a/R/mapperRef.R b/R/mapperRef.R
index a79ebe2..7bae5df 100644
--- a/R/mapperRef.R
+++ b/R/mapperRef.R
@@ -480,7 +480,7 @@ MapperRef$set("public", "use_cover", function(cover="fixed interval", ...){
     "restrained interval"=RestrainedIntervalCover$new(...)$construct_cover(self$filter),
     # "adaptive"=AdaptiveCover$new(...)$construct_cover(self$filter),
     "ball"=BallCover$new(...)$construct_cover(self$filter),
-    "ys_ball"=YS_BallCover$new(...)$construct_cover(self$filter),
+    "ys_ball"=LandmarkBallCover$new(...)$construct_cover(self$filter),
     stop(sprintf("Unknown cover type: %s, please specify a cover typename listed in `covers_available()`", cover))
   )
   print("leaving use cover")

From 08397866b6959cf259e3605da14022ffa058ccae Mon Sep 17 00:00:00 2001
From: yaraskaf <56359026+yaraskaf@users.noreply.github.com>
Date: Sun, 23 Feb 2020 22:04:15 -0500
Subject: [PATCH 04/25] refactor(LandmarkBallCover): rename typename ys_ball to
 landmark_ball

---
 R/LandmarkBallCover.R | 2 +-
 R/mapperRef.R         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/LandmarkBallCover.R b/R/LandmarkBallCover.R
index d4f8fcc..1d94a69 100644
--- a/R/LandmarkBallCover.R
+++ b/R/LandmarkBallCover.R
@@ -23,7 +23,7 @@ LandmarkBallCover <- R6::R6Class(
 
 ## initialize ------
 LandmarkBallCover$set("public", "initialize", function(...){
-  super$initialize(typename="ys_ball")
+  super$initialize(typename="landmark_ball")
   params <- list(...)
   if ("epsilon" %in% names(params)){ self$epsilon <- params[["epsilon"]] }
   if ("num_sets" %in% names(params)){ self$num_sets <- params[["num_sets"]] }
diff --git a/R/mapperRef.R b/R/mapperRef.R
index 7bae5df..235e849 100644
--- a/R/mapperRef.R
+++ b/R/mapperRef.R
@@ -480,7 +480,7 @@ MapperRef$set("public", "use_cover", function(cover="fixed interval", ...){
     "restrained interval"=RestrainedIntervalCover$new(...)$construct_cover(self$filter),
     # "adaptive"=AdaptiveCover$new(...)$construct_cover(self$filter),
     "ball"=BallCover$new(...)$construct_cover(self$filter),
-    "ys_ball"=LandmarkBallCover$new(...)$construct_cover(self$filter),
+    "landmark_ball"=LandmarkBallCover$new(...)$construct_cover(self$filter),
     stop(sprintf("Unknown cover type: %s, please specify a cover typename listed in `covers_available()`", cover))
   )
   print("leaving use cover")

From 5568886d52013a623a2e965a45d75f182a8f5306 Mon Sep 17 00:00:00 2001
From: yaraskaf <56359026+yaraskaf@users.noreply.github.com>
Date: Sun, 23 Feb 2020 22:18:39 -0500
Subject: [PATCH 05/25] refactor(LandmarkBallCover): remove print statements

---
 R/LandmarkBallCover.R | 29 ++---------------------------
 R/mapperRef.R         |  7 -------
 2 files changed, 2 insertions(+), 34 deletions(-)

diff --git a/R/LandmarkBallCover.R b/R/LandmarkBallCover.R
index 1d94a69..55d8c38 100644
--- a/R/LandmarkBallCover.R
+++ b/R/LandmarkBallCover.R
@@ -34,11 +34,6 @@ LandmarkBallCover$set("public", "initialize", function(...){
 ## validate ------
 LandmarkBallCover$set("public", "validate", function(filter){
   stopifnot(!is.null(self$epsilon) || !is.null(self$num_sets))
-  writeLines("\n(eps, num_sets, seed_index, seed_method):\n")
-  print(self$epsilon)
-  print(self$num_sets)
-  print(self$seed_index)
-  print(self$seed_method)
 })
 
 ## format ----
@@ -57,16 +52,11 @@ LandmarkBallCover$set("public", "format", function(...){
 
 ## construct_cover ------
 LandmarkBallCover$set("public", "construct_cover", function(filter, index=NULL){
-  writeLines("Index:")
-  print(index)
-  print("constructing")
   if (!requireNamespace("RANN", quietly = TRUE)){
     stop("Package \"RANN\" is needed for to use this cover.", call. = FALSE)
   }
-  print("validating")
   self$validate()
 
-  print("filtering")
   ## Get filter values
   fv <- filter()
   f_dim <- ncol(fv)
@@ -77,24 +67,16 @@ LandmarkBallCover$set("public", "construct_cover", function(filter, index=NULL){
   if(is.null(index)){
     if(all(self$seed_method == "RAND")) {
       self$seed_index = sample(1:f_size, 1)
-      print(self$seed_index)
-      print("rand")
     }
     if(all(self$seed_method == "ECC")) {
-      #self$seed_index = sample(1:f_size, 1)
-      print(self$seed_index)
-      print("ecc")
     }
 
   }
 
 
   if (!is.null(self$num_sets)) {
-    print("seed")
-    print(self$seed_index)
-    print(self$num_sets)
     eps_lm <- landmarks(x=fv, n=self$num_sets, seed_index=self$seed_index)
-    print(eps_lm)
+
     ## Get distance from each point to landmarks
     dist_to_lm <- proxy::dist(fv, fv[eps_lm,,drop=FALSE])
 
@@ -105,13 +87,9 @@ LandmarkBallCover$set("public", "construct_cover", function(filter, index=NULL){
 
     ## Construct the index set + the preimages
     self$index_set <- as.character(eps_lm)
-    print(self$index_set)
     self$level_sets <- structure(as.list(apply(dist_to_lm, 2, pts_within_eps)), names=self$index_set)
-    print("done level sets")
   }else{
     if (!is.null(self$epsilon)) {
-      print("eps")
-
     }
   }
 
@@ -187,12 +165,9 @@ LandmarkBallCover$set("public", "construct_cover", function(filter, index=NULL){
   # ls <- lapply(ls, function(i) Filter(function(x) any(x != 0), i))
   #
   # self$level_sets <- structure(ls, names=self$index_set)
-  print("test")
-  print(index)
+
   if (!missing(index)){
-    print("hello")
     return(self$level_sets[[index]]) }
-  print("done")
 
   ## Always return self
   invisible(self)
diff --git a/R/mapperRef.R b/R/mapperRef.R
index 235e849..fd9eea2 100644
--- a/R/mapperRef.R
+++ b/R/mapperRef.R
@@ -483,7 +483,6 @@ MapperRef$set("public", "use_cover", function(cover="fixed interval", ...){
     "landmark_ball"=LandmarkBallCover$new(...)$construct_cover(self$filter),
     stop(sprintf("Unknown cover type: %s, please specify a cover typename listed in `covers_available()`", cover))
   )
-  print("leaving use cover")
   invisible(self)
 })
 #@param filter_values (n x d) numeric matrix of values giving the results of the map.
@@ -577,7 +576,6 @@ MapperRef$set("public", "construct_pullback", function(pullback_ids=NULL, ...){
   pids_supplied <- (!missing(pullback_ids) && !is.null(pullback_ids))
   if (pids_supplied){ stopifnot(all(pullback_ids %in% self$cover$index_set)) }
   pullback_ids <- if (pids_supplied){ pullback_ids } else { self$cover$index_set }
-  print("constructing pullback")
   ## If specific pullback ids not given, then resist the pullback mapping
   if (!pids_supplied || length(private$.pullback) == 0){
     n_sets <- length(self$cover$index_set)
@@ -585,16 +583,13 @@ MapperRef$set("public", "construct_pullback", function(pullback_ids=NULL, ...){
     self$vertices <- list()
     private$.pullback <- structure(replicate(n_sets, integer(0)), names = self$cover$index_set)
   }
-  print("after if")
   ## Update the vertices for the given level sets. This requires updating both the simplex tree's
   ## internal representation as well as the outward-facing vertex list. The new vertices are returned
   ## to replace the current list. The pullback map is also updated.
   calc_preimage <- function(){
-    print("begin preimage")
     function(index){ self$cover$construct_cover(self$filter, index) }
   }
   partial_cluster <- function(...){
-    print("begin cluster")
     extra <- list(...)
     function(pid, idx){ do.call(self$clustering_algorithm, append(list(pid, idx, self), extra)) }
   }
@@ -607,7 +602,6 @@ MapperRef$set("public", "construct_pullback", function(pullback_ids=NULL, ...){
   #   private$.simplicial_complex$insert(as.list(ids))
   #   return(ids)
   # }
-  print("decomposing")
   ## Do the decomposition
   private$.vertices <- Mapper:::decompose_preimages(
     pullback_ids = as.character(pullback_ids),
@@ -617,7 +611,6 @@ MapperRef$set("public", "construct_pullback", function(pullback_ids=NULL, ...){
     # id_generator = id_gen,
     pullback = private$.pullback
   )
-  print("made pullback")
   ## Return self
   invisible(self)
 })

From 5275dfa98e87aff02145e7e8c15fd5a61d0cad93 Mon Sep 17 00:00:00 2001
From: yaraskaf <56359026+yaraskaf@users.noreply.github.com>
Date: Wed, 4 Mar 2020 22:28:50 -0500
Subject: [PATCH 06/25] feat(LandmarkBallCover): add fixed epsilon width balls

Addition of ability to specify a radius rather than a number of balls to construct a LandmarkBallCover.

Modified LandmarkBallCover.R to support use of epsilon parameter, added new landmark function to landmarks.R to find landmarks by radius rather than number.
---
 R/LandmarkBallCover.R | 125 ++++++++++++------------------------------
 R/landmarks.R         |  96 +++++++++++++++++++++++---------
 2 files changed, 105 insertions(+), 116 deletions(-)

diff --git a/R/LandmarkBallCover.R b/R/LandmarkBallCover.R
index 55d8c38..2bee8d2 100644
--- a/R/LandmarkBallCover.R
+++ b/R/LandmarkBallCover.R
@@ -5,7 +5,7 @@
 #' about each point. Using this class requires the \code{RANN} package to be installed, and thus explicitly assumes
 #' the filter space endowed with the euclidean metric.
 #'
-#' This differs from the DisjointBallCover in that it does NOT union intersecting cover sets.
+#' This differs from the BallCover in that it does NOT union intersecting cover sets.
 #'
 #' @field epsilon := radius of the ball to form around each point
 #' @author Cory Brunsion, Yara Skaf
@@ -13,8 +13,8 @@
 
 library(proxy)
 
-# TODO: make default spec with default seed 1l -> user can use default 1 or specify other seed
-# Seed methods: SPEC, RAND, ECC (or specify seed_index, unspecified uses the first index)
+# Seed methods: SPEC (specify index), RAND (random index), ECC (seed with highest eccentricity data point)
+# Default: specified index using first data point (seed_method = "SPEC", seed_index = 1)
 LandmarkBallCover <- R6::R6Class(
   classname = "LandmarkBallCover",
   inherit = CoverRef,
@@ -44,9 +44,10 @@ LandmarkBallCover$set("public", "format", function(...){
   }
   if (!is.null(self$num_sets)) {
     sprintf("%s Cover: (num_sets = %s, seed_index = %s)", titlecase(private$.typename), self$num_sets, self$seed_index)
-  }
-  if (!is.null(self$epsilon)) {
-    sprintf("%s Cover: (epsilon = %.2f, seed_index = %s)", titlecase(private$.typename), self$epsilon, self$seed_index)
+  }else{
+    if (!is.null(self$epsilon)) {
+      sprintf("%s Cover: (epsilon = %.2f, seed_index = %s)", titlecase(private$.typename), self$epsilon, self$seed_index)
+    }
   }
 })
 
@@ -59,112 +60,56 @@ LandmarkBallCover$set("public", "construct_cover", function(filter, index=NULL){
 
   ## Get filter values
   fv <- filter()
-  f_dim <- ncol(fv)
   f_size <- nrow(fv)
 
-
   ## Set the seed index if necessary
   if(is.null(index)){
     if(all(self$seed_method == "RAND")) {
       self$seed_index = sample(1:f_size, 1)
     }
     if(all(self$seed_method == "ECC")) {
+      # todo
     }
-
   }
 
-
   if (!is.null(self$num_sets)) {
-    eps_lm <- landmarks(x=fv, n=self$num_sets, seed_index=self$seed_index)
+    eps_lm <- landmarks(x=fv, n=self$num_sets, seed_index=self$seed_index) # compute landmark set
 
     ## Get distance from each point to landmarks
     dist_to_lm <- proxy::dist(fv, fv[eps_lm,,drop=FALSE])
-
-    orderedIndices = t(apply(dist_to_lm,1, sort))
-    max = which.max(orderedIndices[,1])
-    self$epsilon = dist_to_lm[max]    # ball radius should be distance of the farthest point from the landmark set so that all points are in at least one ball
     pts_within_eps <- function(lm_dist){ which(lm_dist <= self$epsilon) }
 
-    ## Construct the index set + the preimages
+    ## Construct the index set
     self$index_set <- as.character(eps_lm)
-    self$level_sets <- structure(as.list(apply(dist_to_lm, 2, pts_within_eps)), names=self$index_set)
-  }else{
-    if (!is.null(self$epsilon)) {
-    }
-  }
 
+    ## Construct the preimages
+    if(length(eps_lm) == 1){
+      orderedIndices = apply(dist_to_lm,1, sort)
+      max = which.max(orderedIndices)
+      self$epsilon = dist_to_lm[max]    # ball radius should be distance of the farthest point from the landmark set so that all points are in at least one ball
+      self$level_sets <- structure(as.list(list(t(apply(dist_to_lm, 2, pts_within_eps))[1,])), names=self$index_set)
+    }else{
+      orderedIndices = t(apply(dist_to_lm,1, sort))
+      max = which.max(orderedIndices[,1])
+      self$epsilon = dist_to_lm[max]    # ball radius should be distance of the farthest point from the landmark set so that all points are in at least one ball
+      self$level_sets <- structure(as.list(apply(dist_to_lm, 2, pts_within_eps)), names=self$index_set)
+    }
+  }else if (!is.null(self$epsilon)) {
+      eps_lm <- landmarks(x=fv, eps=self$epsilon, seed_index=self$seed_index) # compute landmark set
+      dist_to_lm <- proxy::dist(fv, fv[eps_lm,,drop=FALSE]) # Get distance from each point to landmarks
 
+      pts_within_eps <- function(lm_dist){ which(lm_dist <= self$epsilon) }
 
+      ## Construct the index set
+      self$index_set <- as.character(eps_lm)
 
-  ## Construct the balls
-  # if spec, elif rand, elif ecc -> set self$seed_index
-  # then if epsilon elif num_sets using self$seed_index
-  # if (!is.null(self$seed_method)){
-  #   if (all(self$seed_method == "RAND")) {
-  #     self$seed_index = sample(1:f_size, 1)
-  #     print(self$seed_index)
-  #     print("rand")
-  #     eps_lm <- landmarks(fv, self$num_sets, seed_index=self$seed_index)
-  #
-  #     ## Get distance from each point to landmarks
-  #     dist_to_lm <- proxy::dist(fv, fv[eps_lm,,drop=FALSE])
-  #     pts_within_eps <- function(lm_dist){ which(lm_dist < self$epsilon) } # TODO: does this make sense? we shouldnt be able to sepcify both eps and num_sets
-  #
-  #     ## Construct the index set + the preimages
-  #     self$index_set <- as.character(eps_lm)
-  #     writeLines("set index set")
-  #     self$level_sets <- structure(as.list(apply(dist_to_lm, 2, pts_within_eps)), names=self$index_set)
-  #     writeLines("set level sets")
-  #   }
-  #   if (all(self$seed_method == "ECC")) {
-  #     print("ecc")
-  #   }
-  # }else{
-  #   if (!is.null(self$num_sets)){
-  #     print("seed")
-  #     print(self$seed_index)
-  #     print(self$num_sets)
-  #     eps_lm <- landmarks(x=fv, n=self$num_sets, seed_index=self$seed_index)
-  #     print(eps_lm)
-  #     ## Get distance from each point to landmarks
-  #     dist_to_lm <- proxy::dist(fv, fv[eps_lm,,drop=FALSE])
-  #
-  #     orderedIndices = t(apply(dist_to_lm,1, sort))
-  #     max = which.max(orderedIndices[,1])
-  #     self$epsilon = dist_to_lm[max]    # ball radius should be distance of the farthest point from the landmark set so that all points are in at least one ball
-  #     pts_within_eps <- function(lm_dist){ which(lm_dist <= self$epsilon) }
-  #
-  #     ## Construct the index set + the preimages
-  #     self$index_set <- as.character(eps_lm)
-  #     print(self$index_set)
-  #     self$level_sets <- structure(as.list(apply(dist_to_lm, 2, pts_within_eps)), names=self$index_set)
-  #   }else {
-  #     writeLines("else")
-  #     eps_lm <- landmarks(fv, 2L)
-  #
-  #     ## Get distance from each point to landmarks
-  #     dist_to_lm <- proxy::dist(fv, fv[eps_lm,,drop=FALSE])
-  #     pts_within_eps <- function(lm_dist){ which(lm_dist < self$epsilon) }
-  #
-  #     ## Construct the index set + the preimages
-  #     self$index_set <- as.character(eps_lm)
-  #     self$level_sets <- structure(as.list(apply(dist_to_lm, 2, pts_within_eps)), names=self$index_set)
-  #   }
-  # }
-
-
-  # print("making balls")
-  # ## Construct the balls
-  # ball_cover  <- RANN::nn2(fv, query = fv, searchtype = "radius", radius = self$epsilon)
-  #
-  # ## Assign the centers of the balls as the index set
-  # self$index_set <- as.character(ball_cover[["nn.idx"]][,1])
-  #
-  # ## Calculate level (pullback) sets
-  # ls <- lapply(seq_len(nrow(ball_cover[["nn.idx"]])), function(i) ball_cover[["nn.idx"]][i,])
-  # ls <- lapply(ls, function(i) Filter(function(x) any(x != 0), i))
-  #
-  # self$level_sets <- structure(ls, names=self$index_set)
+      ## Construct the preimages
+      if(length(eps_lm) == 1){
+        self$level_sets <- structure(as.list(list(t(apply(dist_to_lm, 2, pts_within_eps))[1,])), names=self$index_set)
+      }else{
+        self$level_sets <- structure(as.list(apply(dist_to_lm, 2, pts_within_eps)), names=self$index_set)
+      }
+    }
 
   if (!missing(index)){
     return(self$level_sets[[index]]) }
diff --git a/R/landmarks.R b/R/landmarks.R
index 4791643..7005b2f 100644
--- a/R/landmarks.R
+++ b/R/landmarks.R
@@ -1,39 +1,83 @@
 #' landmarks
 #' @description Computes a set of \code{n} landmarks of a point cloud.
-#' @details This function uses the greedy maxmin procedure to produce a set of \code{n} evenly spaced landmark points in the data set 
-#' \code{x}. Maxmin is a simple greedy algorithm that is relatively efficient, but it has a tendency to pick out extremal points. 
-#' If the distance metric is euclidean, an efficient Rcpp implementation is used. If another metric is requested, 
-#' the algorithm is performed in R.  
-#' @param x a data matrix. 
-#' @param n the number of landmarks requested. 
+#' @details This function uses the greedy maxmin procedure to produce a set of \code{n} evenly spaced landmark points in the data set
+#' \code{x}. Maxmin is a simple greedy algorithm that is relatively efficient, but it has a tendency to pick out extremal points.
+#' If the distance metric is euclidean, an efficient Rcpp implementation is used. If another metric is requested,
+#' the algorithm is performed in R.
+#' @param x a data matrix.
+#' @param n the number of landmarks requested.
 #' @param dist_method the distance metric to use. Any distance measure in the \code{proxy} package is supported.
-#' @param seed_index the first landmark to seed the algorithm. 
+#' @param seed_index the first landmark to seed the algorithm.
 #' @param shuffle_data whether to first randomly shuffle the data.
 #' @references De Silva, Vin, and Gunnar E. Carlsson. "Topological estimation using witness complexes." SPBG 4 (2004): 157-166.
 #' @export
-landmarks <- function(x, n, dist_method = "euclidean", seed_index = 1, shuffle_data=FALSE){
+landmarks <- function(x, n=NULL, eps=NULL, dist_method = "euclidean", seed_index = 1, shuffle_data=FALSE){
   stopifnot(is.matrix(x))
-  stopifnot(seed_index >= 1L && seed_index < nrow(x))
+  stopifnot(seed_index >= 1L && seed_index <= nrow(x))
+  stopifnot(!is.null(n) || !is.null(eps)) # must specify either a number of balls or a radius
+
   shuffle_idx <- NA
-  if (shuffle_data){ 
+  if (shuffle_data){
     shuffle_idx <- sample(seq(nrow(x)))
-    x <- x[,,drop=FALSE] 
+    x <- x[,,drop=FALSE]
   }
-  if (missing(dist_method) || toupper(dist_method) == "EUCLIDEAN"){
-    landmark_idx <- landmark_maxmin(x, n, seed_index-1L)
-  } else if (requireNamespace("proxy", quietly = TRUE)){
-    stopifnot(toupper(dist_method) %in% toupper(proxy::pr_DB$get_entry_names()))
-    landmark_idx <- vector(mode="integer", n)
-    landmark_idx[1L] <- seed_index
-    landmark_min_dist <- rep(Inf, nrow(x))
-    for (i in 2L:n){
-      landmark_dist <- proxy::dist(x, x[landmark_idx[i-1L],,drop=FALSE], method = dist_method)
-      landmark_min_dist <- pmin(landmark_dist, landmark_min_dist)
-      potential_idx <- setdiff(seq(nrow(x)), landmark_idx[c(1:i)])
-      landmark_idx[i] <- potential_idx[which.max(landmark_min_dist[potential_idx])]
+
+  if(!is.null(n)){
+    if (missing(dist_method) || toupper(dist_method) == "EUCLIDEAN"){
+      landmark_idx <- landmark_maxmin(x, n, seed_index-1L)
+    } else if (requireNamespace("proxy", quietly = TRUE)){
+      stopifnot(toupper(dist_method) %in% toupper(proxy::pr_DB$get_entry_names()))
+      landmark_idx <- vector(mode="integer", n)
+      landmark_idx[1L] <- seed_index
+      landmark_min_dist <- rep(Inf, nrow(x))
+      for (i in 2L:n){
+        landmark_dist <- proxy::dist(x, x[landmark_idx[i-1L],,drop=FALSE], method = dist_method)
+        landmark_min_dist <- pmin(landmark_dist, landmark_min_dist)
+        potential_idx <- setdiff(seq(nrow(x)), landmark_idx[c(1:i)])
+        landmark_idx[i] <- potential_idx[which.max(landmark_min_dist[potential_idx])]
+      }
+    } else {
+      stop(sprintf("Unsupported distance method passed: %s\n", dist_method))
     }
-  } else {
-    stop(sprintf("Unsupported distance method passed: %s\n", dist_method))
+  } else if(!is.null(eps)){
+    # TODO: Currently written for euclidean distance in 1-dimensional lens space
+    stopifnot(missing(dist_method) || toupper(dist_method) == "EUCLIDEAN")
+
+    # STEP 1: Pick point in the space (seed) and add it to the list of centers/landmarks
+    C = list(seed_index)
+    f_C = list(x[seed_index])
+
+    # STEP 2: Compute distance between landmark set and each point in the space
+    dists = sapply(f_C, function(c) {
+      abs(c-x)
+    })
+    max = which.max(dists)
+    d = dists[max]
+
+    # Continue if distance is greater than epsilon
+    if(d >= eps){
+      C = append(C, max)
+      f_C = append(f_C, x[max])
+
+      # STEP 2: Compute distance between landmark set and each point in the space
+      while(TRUE){
+        dists = sapply(f_C, function(c) {
+          abs(c-x)
+        })
+
+        orderedIndices = t(apply(dists,1, sort))
+        max = which.max(orderedIndices[,1])
+        d = orderedIndices[max,1]
+
+        # Continue until distance is less than epsilon (i.e. stop when all points are contained within an epsilon-ball)
+        if(d >= eps){
+          C = append(C,max)
+          f_C = append(f_C,x[max])
+        } else{ break }
+      }
+    }
+    landmark_idx = unlist(C)
   }
+
   if (is.na(shuffle_idx)){ landmark_idx } else { shuffle_idx[landmark_idx] }
-}
\ No newline at end of file
+}

From 848787f23c6df2dc03e27ebc017667c06f6cbb03 Mon Sep 17 00:00:00 2001
From: yaraskaf <56359026+yaraskaf@users.noreply.github.com>
Date: Wed, 4 Mar 2020 22:51:41 -0500
Subject: [PATCH 07/25] feat(LandmarkBallCover): add option to use filtered
 value of maximum eccentricity as seed for landmark selection

Modified construct_cover function in LandmarkBallCover.R to include selection of seed based on maximum eccentricity.
---
 R/LandmarkBallCover.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/LandmarkBallCover.R b/R/LandmarkBallCover.R
index 2bee8d2..72a6ced 100644
--- a/R/LandmarkBallCover.R
+++ b/R/LandmarkBallCover.R
@@ -68,7 +68,8 @@ LandmarkBallCover$set("public", "construct_cover", function(filter, index=NULL){
       self$seed_index = sample(1:f_size, 1)
     }
     if(all(self$seed_method == "ECC")) {
-      # todo
+      ecc = eccentricity(from=fv, x=fv)
+      self$seed_index = which.max(ecc)
     }
   }
 

From c02ad6222dfd4d851744d93d07b06dde6cd060e0 Mon Sep 17 00:00:00 2001
From: yaraskaf <56359026+yaraskaf@users.noreply.github.com>
Date: Thu, 5 Mar 2020 12:47:02 -0500
Subject: [PATCH 08/25] fix(LandmarkBallCover): Fix bug with apply when all
 balls have same number of points

When every ball has the same number of points, apply() returns a matrix rather than a list of lists, causing a crash. Replaced this function with splitting the matrix by column in this case.
---
 R/LandmarkBallCover.R | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/R/LandmarkBallCover.R b/R/LandmarkBallCover.R
index 72a6ced..e7c39b7 100644
--- a/R/LandmarkBallCover.R
+++ b/R/LandmarkBallCover.R
@@ -93,7 +93,16 @@ LandmarkBallCover$set("public", "construct_cover", function(filter, index=NULL){
       orderedIndices = t(apply(dist_to_lm,1, sort))
       max = which.max(orderedIndices[,1])
       self$epsilon = dist_to_lm[max]    # ball radius should be distance of the farthest point from the landmark set so that all points are in at least one ball
-      self$level_sets <- structure(as.list(apply(dist_to_lm, 2, pts_within_eps)), names=self$index_set)
+
+      x = apply(dist_to_lm, 2, pts_within_eps)
+      # if all level sets contain the same number of points, apply returns a matrix and gives an error
+      #   -> need to split columns into list elements in this case
+      if(is.matrix(x)){
+        self$level_sets <- structure(split(x, rep(1:ncol(x), each = nrow(x))), names=self$index_set)
+      }else{
+        self$level_sets <- structure(as.list(x), names=self$index_set)
+      }
+
     }
   }else if (!is.null(self$epsilon)) {
       eps_lm <- landmarks(x=fv, eps=self$epsilon, seed_index=self$seed_index) # compute landmark set
@@ -108,7 +117,14 @@ LandmarkBallCover$set("public", "construct_cover", function(filter, index=NULL){
       if(length(eps_lm) == 1){
         self$level_sets <- structure(as.list(list(t(apply(dist_to_lm, 2, pts_within_eps))[1,])), names=self$index_set)
       }else{
-        self$level_sets <- structure(as.list(apply(dist_to_lm, 2, pts_within_eps)), names=self$index_set)
+        x = apply(dist_to_lm, 2, pts_within_eps)
+        # if all level sets contain the same number of points, apply returns a matrix and gives an error
+        #   -> need to split columns into list elements in this case
+        if(is.matrix(x)){
+          self$level_sets <- structure(split(x, rep(1:ncol(x), each = nrow(x))), names=self$index_set)
+        }else{
+          self$level_sets <- structure(as.list(x), names=self$index_set)
+        }
       }
     }
 

From 4863777ebd40cee4d548e5a29e6315deaa3ae4fb Mon Sep 17 00:00:00 2001
From: yaraskaf <56359026+yaraskaf@users.noreply.github.com>
Date: Thu, 5 Mar 2020 14:39:34 -0500
Subject: [PATCH 09/25] feat(Landmarks): add ability to use any distance metric
 in pr_DB to compute eps-landmark set

Replaced 1D euclidean distance with proxy::dist(), allowing arbitrary dist_method from proxy::pr_DB
---
 R/landmarks.R | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/R/landmarks.R b/R/landmarks.R
index 7005b2f..ecdbddc 100644
--- a/R/landmarks.R
+++ b/R/landmarks.R
@@ -40,8 +40,7 @@ landmarks <- function(x, n=NULL, eps=NULL, dist_method = "euclidean", seed_index
       stop(sprintf("Unsupported distance method passed: %s\n", dist_method))
     }
   } else if(!is.null(eps)){
-    # TODO: Currently written for euclidean distance in 1-dimensional lens space
-    stopifnot(missing(dist_method) || toupper(dist_method) == "EUCLIDEAN")
+    stopifnot(toupper(dist_method) %in% toupper(proxy::pr_DB$get_entry_names()))
 
     # STEP 1: Pick point in the space (seed) and add it to the list of centers/landmarks
     C = list(seed_index)
@@ -49,7 +48,7 @@ landmarks <- function(x, n=NULL, eps=NULL, dist_method = "euclidean", seed_index
 
     # STEP 2: Compute distance between landmark set and each point in the space
     dists = sapply(f_C, function(c) {
-      abs(c-x)
+      proxy::dist(c, x, method = dist_method)
     })
     max = which.max(dists)
     d = dists[max]
@@ -62,9 +61,8 @@ landmarks <- function(x, n=NULL, eps=NULL, dist_method = "euclidean", seed_index
       # STEP 2: Compute distance between landmark set and each point in the space
       while(TRUE){
         dists = sapply(f_C, function(c) {
-          abs(c-x)
+          proxy::dist(c, x, method = dist_method)
         })
-
         orderedIndices = t(apply(dists,1, sort))
         max = which.max(orderedIndices[,1])
         d = orderedIndices[max,1]

From 984f1fde82454f6431710d19bbc9b9b7c6941058 Mon Sep 17 00:00:00 2001
From: yaraskaf <56359026+yaraskaf@users.noreply.github.com>
Date: Thu, 5 Mar 2020 15:04:33 -0500
Subject: [PATCH 10/25] feat(LandmarkBallCover): add appropriate validation for
 cover parameters

Updated validate() method in LandmarkBallCover.R to ensure appropriate values of parameters for cover construction.
---
 R/LandmarkBallCover.R | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/R/LandmarkBallCover.R b/R/LandmarkBallCover.R
index e7c39b7..16cccb0 100644
--- a/R/LandmarkBallCover.R
+++ b/R/LandmarkBallCover.R
@@ -33,7 +33,16 @@ LandmarkBallCover$set("public", "initialize", function(...){
 
 ## validate ------
 LandmarkBallCover$set("public", "validate", function(filter){
-  stopifnot(!is.null(self$epsilon) || !is.null(self$num_sets))
+  ## Get filter values
+  fv <- filter()
+  f_size <- nrow(fv)
+
+  ## validate parameters
+  stopifnot(!is.null(self$epsilon) || !is.null(self$num_sets)) # require either radius or # balls
+  stopifnot(is.null(self$num_sets) || (self$num_sets <= f_size && self$num_sets > 0)) # cannot have more cover sets than data points
+  stopifnot(is.null(self$epsilon) || self$epsilon >= 0) # radius must be positive
+  stopifnot(self$seed_index <= f_size && self$seed_index > 0) # seed index must be within the range of the data indices
+  stopifnot(all(self$seed_method == "RAND") || all(self$seed_method == "SPEC") || all(self$seed_method == "ECC")) # must use one of available seed methods
 })
 
 ## format ----
@@ -56,7 +65,7 @@ LandmarkBallCover$set("public", "construct_cover", function(filter, index=NULL){
   if (!requireNamespace("RANN", quietly = TRUE)){
     stop("Package \"RANN\" is needed for to use this cover.", call. = FALSE)
   }
-  self$validate()
+  self$validate(filter)
 
   ## Get filter values
   fv <- filter()

From 84dd145d6a7b8d4d8155edb97726887571961ae5 Mon Sep 17 00:00:00 2001
From: yaraskaf <56359026+yaraskaf@users.noreply.github.com>
Date: Mon, 9 Mar 2020 14:45:42 -0400
Subject: [PATCH 11/25] feat(NeighborhoodCover): initial commit

Implementation of cover by k-nearest neighbor sets.
---
 R/CoverRef.R          | 67 +++++++++++++++++----------------
 R/NeighborhoodCover.R | 87 +++++++++++++++++++++++++++++++++++++++++++
 R/landmarks.R         | 46 ++++++++++++++++++++++-
 R/mapperRef.R         |  1 +
 4 files changed, 166 insertions(+), 35 deletions(-)
 create mode 100644 R/NeighborhoodCover.R

diff --git a/R/CoverRef.R b/R/CoverRef.R
index 9e62ec7..416543f 100644
--- a/R/CoverRef.R
+++ b/R/CoverRef.R
@@ -1,17 +1,17 @@
 #' Cover abstract class
 #' @aliases cover
-#' @description Reference Class (R6) implementation of a Cover. This class is meant to act as an abstract class to derive other 
+#' @description Reference Class (R6) implementation of a Cover. This class is meant to act as an abstract class to derive other
 #' types of covering generators with. Minimally, a derived covering class must implement the
-#' 'construct_cover' method to populate the 'level_sets' list with point indices, and any parameters 
-#' that the derived class requires. 
-#' 
-#' Additional methods may also be added to improve the efficiency of the cover. 
-#' See the \href{https://peekxc.github.io/Mapper/articles/UsingCustomCover.html}{vignette} on creating a custom 
-#' cover for details. 
-#' 
+#' 'construct_cover' method to populate the 'level_sets' list with point indices, and any parameters
+#' that the derived class requires.
+#'
+#' Additional methods may also be added to improve the efficiency of the cover.
+#' See the \href{https://peekxc.github.io/Mapper/articles/UsingCustomCover.html}{vignette} on creating a custom
+#' cover for details.
+#'
 #' @section Fields:
-#' The following is a list of the fields available for derived classes. Each may be accessed 
-#' by the \code{self} environment. See \code{?R6} for more details. 
+#' The following is a list of the fields available for derived classes. Each may be accessed
+#' by the \code{self} environment. See \code{?R6} for more details.
 #' \itemize{
 #'  \item{\emph{level_sets}:}{ named list, indexed by \code{index_set}, whose values represent indices in the original data set to cluster over.}
 #'  \item{\emph{index_set}:}{ character vector of keys that uniquely index the open sets of the cover.}
@@ -20,14 +20,14 @@
 #' @format An \code{\link{R6Class}} generator object
 #' @author Matt Piekenbrock
 #' @export CoverRef
-CoverRef <- R6::R6Class("CoverRef", 
+CoverRef <- R6::R6Class("CoverRef",
   private = list(
     .level_sets = NULL,
-    .index_set = NULL, 
+    .index_set = NULL,
     .typename = character(0)
-  ), 
+  ),
   lock_class = FALSE,  ## Feel free to add your own members
-  lock_objects = FALSE ## Or change existing ones 
+  lock_objects = FALSE ## Or change existing ones
 )
 
 ## Cover initialization
@@ -43,7 +43,7 @@ CoverRef$set("public", "format", function(...){
 
 ## Typename field
 ## typename ----
-CoverRef$set("active", "typename", 
+CoverRef$set("active", "typename",
   function(value){
     if (missing(value)){ private$.typename } else {
       stop("Cover 'typename' member is read-only.")
@@ -53,7 +53,7 @@ CoverRef$set("active", "typename",
 ## The index set may be composed of any data type, but the collection of indices must uniquely
 ## index the level sets list via the `[[` operator.
 ## index_set ----
-CoverRef$set("active", "index_set", 
+CoverRef$set("active", "index_set",
  function(value){
    if (missing(value)){
      private$.index_set
@@ -65,10 +65,10 @@ CoverRef$set("active", "index_set",
    }
 })
 
-## The level sets must be a list indexed by the index set. If the list is named, a check is performed to make sure the 
-## names match the values of the index set, and in the proper order. Otherwise, the order is assumed to be correct. 
+## The level sets must be a list indexed by the index set. If the list is named, a check is performed to make sure the
+## names match the values of the index set, and in the proper order. Otherwise, the order is assumed to be correct.
 ## level_sets ----
-CoverRef$set("active", "level_sets", 
+CoverRef$set("active", "level_sets",
   function(value){
     if (missing(value)){
       private$.level_sets
@@ -80,23 +80,23 @@ CoverRef$set("active", "level_sets",
   }
 )
 
-## Default cover 
+## Default cover
 ## construct_cover ----
 CoverRef$set("public", "construct_cover", function(index=NULL){
   stop("Base class cover construction called. This method must be overridden to be used.")
 })
 
-## Given an index in the index set, returns the indices of point intersect the image 
-## of f in the cover. The default method relies on the construct_cover method.  
+## Given an index in the index set, returns the indices of point intersect the image
+## of f in the cover. The default method relies on the construct_cover method.
 # CoverRef$set("public", "construct_pullback", function(index){
-# 
+#
 # })
 
 
-## Which indices of the index set should be compared in constructing the k-simplices? 
-## This can be customized based on the cover to (dramatically) reduce 
-## the number of intersection checks needed to generate the k-skeletons, where k >= 1. 
-## Defaults to every pairwise combination of level sets. 
+## Which indices of the index set should be compared in constructing the k-simplices?
+## This can be customized based on the cover to (dramatically) reduce
+## the number of intersection checks needed to generate the k-skeletons, where k >= 1.
+## Defaults to every pairwise combination of level sets.
 ## neighborhood ----
 CoverRef$set("public", "neighborhood", function(filter, k=1){
   if (length(private$.index_set) <= 1L){ return(NULL) }
@@ -104,7 +104,7 @@ CoverRef$set("public", "neighborhood", function(filter, k=1){
   relist(private$.index_set[k_combs], k_combs)
 })
 
-## Validates that the constructed cover is indeed a valid cover. 
+## Validates that the constructed cover is indeed a valid cover.
 ## validate ----
 CoverRef$set("public", "validate", function(){
   if ( length(private$.index_set) != length(private$.level_sets) ){
@@ -129,13 +129,14 @@ covers_available <- function(){
   line_format <- " %-28s %-34s %-15s"
   writeLines(c(
     sprintf("Typename:%-20sGenerator:%-25sParameters:%-26s", "", "", ""),
-    sprintf(line_format, "fixed interval", "FixedIntervalCover", paste0(c("number_intervals", "percent_overlap"), collapse = ", ")), 
+    sprintf(line_format, "fixed interval", "FixedIntervalCover", paste0(c("number_intervals", "percent_overlap"), collapse = ", ")),
     sprintf(line_format, "restrained interval", "RestrainedIntervalCover", paste0(c("number_intervals", "percent_overlap"), collapse = ", ")),
     # sprintf(line_format, "adaptive", "AdaptiveCover", paste0(c("number_intervals", "percent_overlap", "quantile_method"), collapse = ", ")),
-    sprintf(line_format, "ball", "BallCover", paste0("epsilon", collapse = ", "))
+    sprintf(line_format, "ball", "BallCover", paste0("epsilon", collapse = ", ")),
+    sprintf(line_format, "landmark_ball", "LandmarkBallCover", paste0(c("epsilon", "num_sets", "seed_index", "seed_method"), collapse = ", ")),
+    sprintf(line_format, "neighborhood", "NeighborhoodCover", paste0(c("k", "seed_index", "seed_method"), collapse = ", "))
   ))
 }
 
-# TODO: add ability to convert cover to new type to preserve copying the filter points multiple times, or 
-# move the filter points to the MapperRef object 
-
+# TODO: add ability to convert cover to new type to preserve copying the filter points multiple times, or
+# move the filter points to the MapperRef object
diff --git a/R/NeighborhoodCover.R b/R/NeighborhoodCover.R
new file mode 100644
index 0000000..88165f0
--- /dev/null
+++ b/R/NeighborhoodCover.R
@@ -0,0 +1,87 @@
+#' Ball Cover
+#'
+#' @docType class
+#' @description This class provides a cover whose open sets are formed by \deqn{\epsilon}-balls centered
+#' about each point. Using this class requires the \code{RANN} package to be installed, and thus explicitly assumes
+#' the filter space endowed with the euclidean metric.
+#'
+#' This differs from the BallCover in that it does NOT union intersecting cover sets.
+#'
+#' @field epsilon := radius of the ball to form around each point
+#' @author Cory Brunsion, Yara Skaf
+#' @export
+
+library(proxy)
+
+# Seed methods: SPEC (specify index), RAND (random index), ECC (seed with highest eccentricity data point)
+# Default: specified index using first data point (seed_method = "SPEC", seed_index = 1)
+NeighborhoodCover <- R6::R6Class(
+  classname = "NeighborhoodCover",
+  inherit = CoverRef,
+  public = list(k=NULL, seed_index=1, seed_method="SPEC")
+)
+
+## initialize ------
+NeighborhoodCover$set("public", "initialize", function(...){
+  super$initialize(typename="neighborhood")
+  params <- list(...)
+  if ("k" %in% names(params)){ self$k <- params[["k"]] }
+  if ("seed_index" %in% names(params)){ self$seed_index <- params[["seed_index"]] }
+  if ("seed_method" %in% names(params)){ self$seed_method <- params[["seed_method"]] }
+})
+
+## validate ------
+NeighborhoodCover$set("public", "validate", function(filter){
+  ## Get filter values
+  fv <- filter()
+  f_size <- nrow(fv)
+
+  ## validate parameters
+  stopifnot(!is.null(self$k)) # require nieghborhood size
+  stopifnot(self$k >= 1 && self$k <= f_size) # radius must be at least 1 (ball around every point)
+  stopifnot(self$seed_index <= f_size && self$seed_index > 0) # seed index must be within the range of the data indices
+  stopifnot(all(self$seed_method == "RAND") || all(self$seed_method == "SPEC") || all(self$seed_method == "ECC")) # must use one of available seed methods
+})
+
+## format ----
+NeighborhoodCover$set("public", "format", function(...){
+  titlecase <- function(x){
+    s <- strsplit(x, " ")[[1]]
+    paste(toupper(substring(s, 1, 1)), substring(s, 2), sep = "", collapse = " ")
+  }
+  sprintf("%s Cover: (k = %s, seed_index = %s)", titlecase(private$.typename), self$k, self$seed_index)
+})
+
+## construct_cover ------
+NeighborhoodCover$set("public", "construct_cover", function(filter, index=NULL){
+  if (!requireNamespace("RANN", quietly = TRUE)){
+    stop("Package \"RANN\" is needed for to use this cover.", call. = FALSE)
+  }
+  self$validate(filter)
+
+  ## Get filter values
+  fv <- filter()
+  f_size <- nrow(fv)
+
+  ## Set the seed index if necessary
+  if(is.null(index)){
+    if(all(self$seed_method == "RAND")) {
+      self$seed_index = sample(1:f_size, 1)
+    }
+    if(all(self$seed_method == "ECC")) {
+      ecc = eccentricity(from=fv, x=fv)
+      self$seed_index = which.max(ecc)
+    }
+  }
+
+  eps_lm <- landmarks(x=fv, k=self$k, seed_index=self$seed_index) # compute landmark set
+
+  ## Construct the index set
+  self$index_set <- as.character(attr(eps_lm,"names"))
+  self$level_sets <- eps_lm
+
+  if (!missing(index)){ return(self$level_sets[[index]]) }
+
+  ## Always return self
+  invisible(self)
+})
diff --git a/R/landmarks.R b/R/landmarks.R
index ecdbddc..18a5c83 100644
--- a/R/landmarks.R
+++ b/R/landmarks.R
@@ -11,10 +11,10 @@
 #' @param shuffle_data whether to first randomly shuffle the data.
 #' @references De Silva, Vin, and Gunnar E. Carlsson. "Topological estimation using witness complexes." SPBG 4 (2004): 157-166.
 #' @export
-landmarks <- function(x, n=NULL, eps=NULL, dist_method = "euclidean", seed_index = 1, shuffle_data=FALSE){
+landmarks <- function(x, n=NULL, eps=NULL, k=NULL, dist_method = "euclidean", seed_index = 1, shuffle_data=FALSE){
   stopifnot(is.matrix(x))
   stopifnot(seed_index >= 1L && seed_index <= nrow(x))
-  stopifnot(!is.null(n) || !is.null(eps)) # must specify either a number of balls or a radius
+  stopifnot(!is.null(n) || !is.null(eps) || !is.null(k)) # must specify a number of balls, a radius, or k-neighborhood
 
   shuffle_idx <- NA
   if (shuffle_data){
@@ -75,6 +75,48 @@ landmarks <- function(x, n=NULL, eps=NULL, dist_method = "euclidean", seed_index
       }
     }
     landmark_idx = unlist(C)
+  } else if (!is.null(k)){
+    stopifnot(toupper(dist_method) %in% toupper(proxy::pr_DB$get_entry_names()))
+
+    # STEP 1: Pick point in the space (seed) and add it to the list of centers/landmarks
+    C = list(seed_index)
+    f_C = list(x[seed_index])
+
+    # STEP 2: Compute distance between landmark set and each point in the space
+    dists = sapply(f_C, function(c) {
+      proxy::dist(c, x, method = dist_method)
+    })
+    max = which.max(dists)
+    d = dists[max]
+
+    k_nhds = list(apply(dists,2,order)[1:k])
+
+
+    # Continue if distance is greater than epsilon
+    if(!(max %in% k_nhds[[1]])){
+      C = append(C, max)
+      f_C = append(f_C, x[max])
+
+      # STEP 2: Compute distance between landmark set and each point in the space
+      while(TRUE){
+        dists = sapply(f_C, function(c) {
+          proxy::dist(c, x, method = dist_method)
+        })
+        orderedIndices = t(apply(dists,1, sort))
+        max = which.max(orderedIndices[,1])
+        d = orderedIndices[max,1]
+
+        k_nhds = append(k_nhds, list(apply(dists,2,order)[1:k,ncol(dists)]))
+
+        # Continue until all points are within a k-neighborhood
+        if(!(any(sapply(k_nhds,function(x) x==max)))){
+          C = append(C,max)
+          f_C = append(f_C,x[max])
+        } else{ break }
+      }
+    }
+    idx_list = as.character(unlist(C))
+    landmark_idx = structure(k_nhds, names=idx_list)
   }
 
   if (is.na(shuffle_idx)){ landmark_idx } else { shuffle_idx[landmark_idx] }
diff --git a/R/mapperRef.R b/R/mapperRef.R
index fd9eea2..765ed4e 100644
--- a/R/mapperRef.R
+++ b/R/mapperRef.R
@@ -481,6 +481,7 @@ MapperRef$set("public", "use_cover", function(cover="fixed interval", ...){
     # "adaptive"=AdaptiveCover$new(...)$construct_cover(self$filter),
     "ball"=BallCover$new(...)$construct_cover(self$filter),
     "landmark_ball"=LandmarkBallCover$new(...)$construct_cover(self$filter),
+    "neighborhood"=NeighborhoodCover$new(...)$construct_cover(self$filter),
     stop(sprintf("Unknown cover type: %s, please specify a cover typename listed in `covers_available()`", cover))
   )
   invisible(self)

From 187899c6d3ad048a7da05793907501e921254ec3 Mon Sep 17 00:00:00 2001
From: yaraskaf <56359026+yaraskaf@users.noreply.github.com>
Date: Mon, 9 Mar 2020 18:39:58 -0400
Subject: [PATCH 12/25] fix(NeighborhoodCover): fix bug where not all points
 are included in a cover set

Fixed bug where some points are excluded from cover sets.
---
 R/NeighborhoodCover.R |  4 +++-
 R/landmarks.R         | 19 ++++++++-----------
 2 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/R/NeighborhoodCover.R b/R/NeighborhoodCover.R
index 88165f0..18b315d 100644
--- a/R/NeighborhoodCover.R
+++ b/R/NeighborhoodCover.R
@@ -15,6 +15,7 @@ library(proxy)
 
 # Seed methods: SPEC (specify index), RAND (random index), ECC (seed with highest eccentricity data point)
 # Default: specified index using first data point (seed_method = "SPEC", seed_index = 1)
+#' @export
 NeighborhoodCover <- R6::R6Class(
   classname = "NeighborhoodCover",
   inherit = CoverRef,
@@ -22,6 +23,7 @@ NeighborhoodCover <- R6::R6Class(
 )
 
 ## initialize ------
+#' @export
 NeighborhoodCover$set("public", "initialize", function(...){
   super$initialize(typename="neighborhood")
   params <- list(...)
@@ -38,7 +40,7 @@ NeighborhoodCover$set("public", "validate", function(filter){
 
   ## validate parameters
   stopifnot(!is.null(self$k)) # require nieghborhood size
-  stopifnot(self$k >= 1 && self$k <= f_size) # radius must be at least 1 (ball around every point)
+  stopifnot(self$k >= 2 && self$k <= f_size) # size must be at least 2 points
   stopifnot(self$seed_index <= f_size && self$seed_index > 0) # seed index must be within the range of the data indices
   stopifnot(all(self$seed_method == "RAND") || all(self$seed_method == "SPEC") || all(self$seed_method == "ECC")) # must use one of available seed methods
 })
diff --git a/R/landmarks.R b/R/landmarks.R
index 18a5c83..f6a9ee8 100644
--- a/R/landmarks.R
+++ b/R/landmarks.R
@@ -90,28 +90,25 @@ landmarks <- function(x, n=NULL, eps=NULL, k=NULL, dist_method = "euclidean", se
     d = dists[max]
 
     k_nhds = list(apply(dists,2,order)[1:k])
-
+    pt_list = k_nhds[[1]]
 
     # Continue if distance is greater than epsilon
-    if(!(max %in% k_nhds[[1]])){
-      C = append(C, max)
-      f_C = append(f_C, x[max])
-
+    if(!(max %in% pt_list)){
       # STEP 2: Compute distance between landmark set and each point in the space
       while(TRUE){
-        dists = sapply(f_C, function(c) {
-          proxy::dist(c, x, method = dist_method)
+        dists = sapply(pt_list, function(c) {
+          proxy::dist(x[c], x, method = dist_method)
         })
         orderedIndices = t(apply(dists,1, sort))
         max = which.max(orderedIndices[,1])
         d = orderedIndices[max,1]
 
-        k_nhds = append(k_nhds, list(apply(dists,2,order)[1:k,ncol(dists)]))
-
         # Continue until all points are within a k-neighborhood
-        if(!(any(sapply(k_nhds,function(x) x==max)))){
+        if(!(max %in% pt_list)){
           C = append(C,max)
-          f_C = append(f_C,x[max])
+          new_pts = order(proxy::dist(x[max], x, method = dist_method))[1:k]
+          k_nhds = append(k_nhds, list(new_pts))
+          pt_list = append(pt_list, new_pts)
         } else{ break }
       }
     }

From 3e666ecd5cbe731062fce2f24c16286aa4084011 Mon Sep 17 00:00:00 2001
From: yaraskaf <56359026+yaraskaf@users.noreply.github.com>
Date: Wed, 11 Mar 2020 18:53:44 -0400
Subject: [PATCH 13/25] fix(NeighborhoodCover): fix bug with >k repeated lensed
 values

Allow neighborhoods of size greater than k when >k points have the same lens value. Now all points with the same lensed value end up in the same pullback set.
---
 R/landmarks.R | 50 +++++++++++++++++++++++++++-----------------------
 1 file changed, 27 insertions(+), 23 deletions(-)

diff --git a/R/landmarks.R b/R/landmarks.R
index f6a9ee8..280d2a6 100644
--- a/R/landmarks.R
+++ b/R/landmarks.R
@@ -80,38 +80,42 @@ landmarks <- function(x, n=NULL, eps=NULL, k=NULL, dist_method = "euclidean", se
 
     # STEP 1: Pick point in the space (seed) and add it to the list of centers/landmarks
     C = list(seed_index)
-    f_C = list(x[seed_index])
 
     # STEP 2: Compute distance between landmark set and each point in the space
-    dists = sapply(f_C, function(c) {
-      proxy::dist(c, x, method = dist_method)
-    })
-    max = which.max(dists)
-    d = dists[max]
+    dists = proxy::dist(x[seed_index], x, method = dist_method)
+    equivalent_pts = which(dists == 0)
 
-    k_nhds = list(apply(dists,2,order)[1:k])
+    if(length(equivalent_pts) > k){
+      k_nhds = list(equivalent_pts)
+    } else{
+      k_nhds = list(order(dists)[1:k])
+    }
     pt_list = k_nhds[[1]]
 
-    # Continue if distance is greater than epsilon
-    if(!(max %in% pt_list)){
+    while(TRUE){
       # STEP 2: Compute distance between landmark set and each point in the space
-      while(TRUE){
-        dists = sapply(pt_list, function(c) {
-          proxy::dist(x[c], x, method = dist_method)
-        })
-        orderedIndices = t(apply(dists,1, sort))
-        max = which.max(orderedIndices[,1])
-        d = orderedIndices[max,1]
+      dists = sapply(pt_list, function(pt) {
+        proxy::dist(x[pt], x, method = dist_method)
+      })
+      orderedIndices = t(apply(dists,1, sort))
 
-        # Continue until all points are within a k-neighborhood
-        if(!(max %in% pt_list)){
-          C = append(C,max)
-          new_pts = order(proxy::dist(x[max], x, method = dist_method))[1:k]
-          k_nhds = append(k_nhds, list(new_pts))
-          pt_list = append(pt_list, new_pts)
-        } else{ break }
+      max_vector = which(orderedIndices[,1] == max(orderedIndices[,1]))
+      max = max_vector[1]
+
+      if(length(max_vector) > k){
+        new_pts = max_vector
+      } else{
+        new_pts = order(proxy::dist(x[max], x, method = dist_method))[1:k]
       }
+
+      # Continue until all points are within a k-neighborhood
+      if(!(max %in% pt_list)){
+        C = append(C,max)
+        k_nhds = append(k_nhds, list(new_pts))
+        pt_list = append(pt_list, new_pts)
+      } else{ break }
     }
+
     idx_list = as.character(unlist(C))
     landmark_idx = structure(k_nhds, names=idx_list)
   }

From d371c4897a50d482c54c718de4d77f502609fa59 Mon Sep 17 00:00:00 2001
From: yaraskaf <56359026+yaraskaf@users.noreply.github.com>
Date: Wed, 15 Apr 2020 17:40:57 -0400
Subject: [PATCH 14/25] docs: Update documentation for NeighborhoodCover.R and
 LandmarkBallCover.R

Update headers, add reference to Dlotko paper.
---
 R/LandmarkBallCover.R | 31 +++++++++++++++++++------------
 R/NeighborhoodCover.R | 16 +++++++++-------
 2 files changed, 28 insertions(+), 19 deletions(-)

diff --git a/R/LandmarkBallCover.R b/R/LandmarkBallCover.R
index 16cccb0..e23e333 100644
--- a/R/LandmarkBallCover.R
+++ b/R/LandmarkBallCover.R
@@ -1,20 +1,30 @@
-#' Ball Cover
+#' Landmark Ball Cover
 #'
 #' @docType class
-#' @description This class provides a cover whose open sets are formed by \deqn{\epsilon}-balls centered
-#' about each point. Using this class requires the \code{RANN} package to be installed, and thus explicitly assumes
-#' the filter space endowed with the euclidean metric.
+#' @description This class provides a cover whose open sets are formed by balls centered about each
+#' point in a landmark set. Given a radius \deqn{\epsilon}, choose a set of landmark points via the
+#' algorithm presented in Dłotko to produce a cover by balls of radius \deqn{\epsilon}. Alternatively,
+#' given a number of cover sets \code{n}, choose \code{n} landmarks via maxmin algorithm.
+#'
+#' This differs from BallCover.R in that it does NOT union intersecting cover sets.
 #'
-#' This differs from the BallCover in that it does NOT union intersecting cover sets.
+#' Using this class requires the \code{RANN} package to be installed, and thus explicitly assumes
+#' the filter space endowed with the euclidean metric.
 #'
-#' @field epsilon := radius of the ball to form around each point
-#' @author Cory Brunsion, Yara Skaf
+#' @field epsilon := radius of the ball to form around each landmark point
+#' @field num_sets := desired number of balls/cover sets
+#' @field seed_index := index of data point to use as the seed for landmark set calculation
+#' @field seed_method := method to select a seed (user specified, random, highest eccentricity)
+#' @author Yara Skaf, Cory Brunsion
+#' @family cover
+#' @references Dłotko, Paweł. "Ball Mapper: A Shape Summary for Topological Data Analysis." (2019). Web.
 #' @export
 
 library(proxy)
 
 # Seed methods: SPEC (specify index), RAND (random index), ECC (seed with highest eccentricity data point)
 # Default: specified index using first data point (seed_method = "SPEC", seed_index = 1)
+#' @export
 LandmarkBallCover <- R6::R6Class(
   classname = "LandmarkBallCover",
   inherit = CoverRef,
@@ -53,10 +63,8 @@ LandmarkBallCover$set("public", "format", function(...){
   }
   if (!is.null(self$num_sets)) {
     sprintf("%s Cover: (num_sets = %s, seed_index = %s)", titlecase(private$.typename), self$num_sets, self$seed_index)
-  }else{
-    if (!is.null(self$epsilon)) {
-      sprintf("%s Cover: (epsilon = %.2f, seed_index = %s)", titlecase(private$.typename), self$epsilon, self$seed_index)
-    }
+  }else if (!is.null(self$epsilon)){
+    sprintf("%s Cover: (epsilon = %.2f, seed_index = %s)", titlecase(private$.typename), self$epsilon, self$seed_index)
   }
 })
 
@@ -111,7 +119,6 @@ LandmarkBallCover$set("public", "construct_cover", function(filter, index=NULL){
       }else{
         self$level_sets <- structure(as.list(x), names=self$index_set)
       }
-
     }
   }else if (!is.null(self$epsilon)) {
       eps_lm <- landmarks(x=fv, eps=self$epsilon, seed_index=self$seed_index) # compute landmark set
diff --git a/R/NeighborhoodCover.R b/R/NeighborhoodCover.R
index 18b315d..e7127cc 100644
--- a/R/NeighborhoodCover.R
+++ b/R/NeighborhoodCover.R
@@ -1,14 +1,16 @@
-#' Ball Cover
+#' Neighborhood Cover
 #'
 #' @docType class
-#' @description This class provides a cover whose open sets are formed by \deqn{\epsilon}-balls centered
-#' about each point. Using this class requires the \code{RANN} package to be installed, and thus explicitly assumes
+#' @description This class provides a cover whose open sets are formed by \code{k}-neighborhoods about a landmark
+#' set. Cover sets may contain more than \code{k} points if there are more than \code{k} points equidistant from the
+#' central point. Using this class requires the \code{RANN} package to be installed, and thus explicitly assumes
 #' the filter space endowed with the euclidean metric.
 #'
-#' This differs from the BallCover in that it does NOT union intersecting cover sets.
-#'
-#' @field epsilon := radius of the ball to form around each point
-#' @author Cory Brunsion, Yara Skaf
+#' @field k := desired number of neighbord to include in a cover set
+#' @field seed_index := index of data point to use as the seed for landmark set calculation
+#' @field seed_method := method to select a seed (user specified, random, highest eccentricity)
+#' @author Yara Skaf, Cory Brunsion
+#' @family cover
 #' @export
 
 library(proxy)

From 8f41462dfdbf110d2e6ae890e0ca525684b4f834 Mon Sep 17 00:00:00 2001
From: yaraskaf <56359026+yaraskaf@users.noreply.github.com>
Date: Wed, 15 Apr 2020 18:10:42 -0400
Subject: [PATCH 15/25] docs: Update documentation for NeighborhoodCover.R and
 LandmarkBallCover.R

---
 R/LandmarkBallCover.R | 54 ++++++++++++++++++++++---------------------
 R/NeighborhoodCover.R | 17 +++++++-------
 2 files changed, 37 insertions(+), 34 deletions(-)

diff --git a/R/LandmarkBallCover.R b/R/LandmarkBallCover.R
index e23e333..865fc50 100644
--- a/R/LandmarkBallCover.R
+++ b/R/LandmarkBallCover.R
@@ -4,7 +4,8 @@
 #' @description This class provides a cover whose open sets are formed by balls centered about each
 #' point in a landmark set. Given a radius \deqn{\epsilon}, choose a set of landmark points via the
 #' algorithm presented in Dłotko to produce a cover by balls of radius \deqn{\epsilon}. Alternatively,
-#' given a number of cover sets \code{n}, choose \code{n} landmarks via maxmin algorithm.
+#' given a number of cover sets \code{n}, choose \code{n} landmarks via maxmin algorithm. If no seed
+#' or seed_method is specified, default behavior uses the first data point as the seed.
 #'
 #' This differs from BallCover.R in that it does NOT union intersecting cover sets.
 #'
@@ -14,16 +15,14 @@
 #' @field epsilon := radius of the ball to form around each landmark point
 #' @field num_sets := desired number of balls/cover sets
 #' @field seed_index := index of data point to use as the seed for landmark set calculation
-#' @field seed_method := method to select a seed (user specified, random, highest eccentricity)
+#' @field seed_method := method to select a seed ("SPEC" : user specified index | "RAND" : random index
+#'  | "ECC" : point with highest eccentricity in the filter space)
 #' @author Yara Skaf, Cory Brunsion
 #' @family cover
 #' @references Dłotko, Paweł. "Ball Mapper: A Shape Summary for Topological Data Analysis." (2019). Web.
-#' @export
 
 library(proxy)
 
-# Seed methods: SPEC (specify index), RAND (random index), ECC (seed with highest eccentricity data point)
-# Default: specified index using first data point (seed_method = "SPEC", seed_index = 1)
 #' @export
 LandmarkBallCover <- R6::R6Class(
   classname = "LandmarkBallCover",
@@ -32,6 +31,7 @@ LandmarkBallCover <- R6::R6Class(
 )
 
 ## initialize ------
+#' @export
 LandmarkBallCover$set("public", "initialize", function(...){
   super$initialize(typename="landmark_ball")
   params <- list(...)
@@ -62,9 +62,9 @@ LandmarkBallCover$set("public", "format", function(...){
     paste(toupper(substring(s, 1, 1)), substring(s, 2), sep = "", collapse = " ")
   }
   if (!is.null(self$num_sets)) {
-    sprintf("%s Cover: (num_sets = %s, seed_index = %s)", titlecase(private$.typename), self$num_sets, self$seed_index)
+    sprintf("%s Cover: (number of sets = %s, seed index = %s)", titlecase(private$.typename), self$num_sets, self$seed_index)
   }else if (!is.null(self$epsilon)){
-    sprintf("%s Cover: (epsilon = %.2f, seed_index = %s)", titlecase(private$.typename), self$epsilon, self$seed_index)
+    sprintf("%s Cover: (epsilon = %.2f, seed index = %s)", titlecase(private$.typename), self$epsilon, self$seed_index)
   }
 })
 
@@ -90,8 +90,10 @@ LandmarkBallCover$set("public", "construct_cover", function(filter, index=NULL){
     }
   }
 
+  ## Construct the balls
   if (!is.null(self$num_sets)) {
-    eps_lm <- landmarks(x=fv, n=self$num_sets, seed_index=self$seed_index) # compute landmark set
+    ## Compute the landmark set
+    eps_lm <- landmarks(x=fv, n=self$num_sets, seed_index=self$seed_index)
 
     ## Get distance from each point to landmarks
     dist_to_lm <- proxy::dist(fv, fv[eps_lm,,drop=FALSE])
@@ -121,31 +123,31 @@ LandmarkBallCover$set("public", "construct_cover", function(filter, index=NULL){
       }
     }
   }else if (!is.null(self$epsilon)) {
-      eps_lm <- landmarks(x=fv, eps=self$epsilon, seed_index=self$seed_index) # compute landmark set
-      dist_to_lm <- proxy::dist(fv, fv[eps_lm,,drop=FALSE]) # Get distance from each point to landmarks
+    ## Compute the landmark set
+    eps_lm <- landmarks(x=fv, eps=self$epsilon, seed_index=self$seed_index) # compute landmark set
+    dist_to_lm <- proxy::dist(fv, fv[eps_lm,,drop=FALSE]) # Get distance from each point to landmarks
 
-      pts_within_eps <- function(lm_dist){ which(lm_dist <= self$epsilon) }
+    pts_within_eps <- function(lm_dist){ which(lm_dist <= self$epsilon) }
 
-      ## Construct the index set
-      self$index_set <- as.character(eps_lm)
+    ## Construct the index set
+    self$index_set <- as.character(eps_lm)
 
-      ## Construct the preimages
-      if(length(eps_lm) == 1){
-        self$level_sets <- structure(as.list(list(t(apply(dist_to_lm, 2, pts_within_eps))[1,])), names=self$index_set)
+    ## Construct the preimages
+    if(length(eps_lm) == 1){
+      self$level_sets <- structure(as.list(list(t(apply(dist_to_lm, 2, pts_within_eps))[1,])), names=self$index_set)
+    }else{
+      x = apply(dist_to_lm, 2, pts_within_eps)
+      # if all level sets contain the same number of points, apply returns a matrix and gives an error
+      #   -> need to split columns into list elements in this case
+      if(is.matrix(x)){
+        self$level_sets <- structure(split(x, rep(1:ncol(x), each = nrow(x))), names=self$index_set)
       }else{
-        x = apply(dist_to_lm, 2, pts_within_eps)
-        # if all level sets contain the same number of points, apply returns a matrix and gives an error
-        #   -> need to split columns into list elements in this case
-        if(is.matrix(x)){
-          self$level_sets <- structure(split(x, rep(1:ncol(x), each = nrow(x))), names=self$index_set)
-        }else{
-          self$level_sets <- structure(as.list(x), names=self$index_set)
-        }
+        self$level_sets <- structure(as.list(x), names=self$index_set)
       }
     }
+  }
 
-  if (!missing(index)){
-    return(self$level_sets[[index]]) }
+  if (!missing(index)){ return(self$level_sets[[index]]) }
 
   ## Always return self
   invisible(self)
diff --git a/R/NeighborhoodCover.R b/R/NeighborhoodCover.R
index e7127cc..d622f81 100644
--- a/R/NeighborhoodCover.R
+++ b/R/NeighborhoodCover.R
@@ -2,21 +2,21 @@
 #'
 #' @docType class
 #' @description This class provides a cover whose open sets are formed by \code{k}-neighborhoods about a landmark
-#' set. Cover sets may contain more than \code{k} points if there are more than \code{k} points equidistant from the
-#' central point. Using this class requires the \code{RANN} package to be installed, and thus explicitly assumes
-#' the filter space endowed with the euclidean metric.
+#' set. If no seed or seed_method is specified, default behavior uses the first data point as the seed. Cover sets
+#' may contain more than \code{k} points if there are more than \code{k} points equidistant from the central point.
+#' Using this class requires the \code{RANN} package to be installed, and thus explicitly assumes the filter space
+#' endowed with the euclidean metric.
 #'
 #' @field k := desired number of neighbord to include in a cover set
 #' @field seed_index := index of data point to use as the seed for landmark set calculation
-#' @field seed_method := method to select a seed (user specified, random, highest eccentricity)
+#' @field seed_method := method to select a seed ("SPEC" : user specified index | "RAND" : random index
+#'  | "ECC" : point with highest eccentricity in the filter space)
 #' @author Yara Skaf, Cory Brunsion
 #' @family cover
 #' @export
 
 library(proxy)
 
-# Seed methods: SPEC (specify index), RAND (random index), ECC (seed with highest eccentricity data point)
-# Default: specified index using first data point (seed_method = "SPEC", seed_index = 1)
 #' @export
 NeighborhoodCover <- R6::R6Class(
   classname = "NeighborhoodCover",
@@ -53,7 +53,7 @@ NeighborhoodCover$set("public", "format", function(...){
     s <- strsplit(x, " ")[[1]]
     paste(toupper(substring(s, 1, 1)), substring(s, 2), sep = "", collapse = " ")
   }
-  sprintf("%s Cover: (k = %s, seed_index = %s)", titlecase(private$.typename), self$k, self$seed_index)
+  sprintf("%s Cover: (k = %s, seed index = %s)", titlecase(private$.typename), self$k, self$seed_index)
 })
 
 ## construct_cover ------
@@ -78,7 +78,8 @@ NeighborhoodCover$set("public", "construct_cover", function(filter, index=NULL){
     }
   }
 
-  eps_lm <- landmarks(x=fv, k=self$k, seed_index=self$seed_index) # compute landmark set
+  ## Compute the landmark set
+  eps_lm <- landmarks(x=fv, k=self$k, seed_index=self$seed_index)
 
   ## Construct the index set
   self$index_set <- as.character(attr(eps_lm,"names"))

From f1e79832e41365e2d1e7cdf15ef5a680a59e2571 Mon Sep 17 00:00:00 2001
From: yaraskaf <56359026+yaraskaf@users.noreply.github.com>
Date: Wed, 15 Apr 2020 21:03:00 -0400
Subject: [PATCH 16/25] refactor(LandmarkBallCover): cleaned up code in
 construct_cover

---
 R/LandmarkBallCover.R | 75 +++++++++++++------------------------------
 1 file changed, 23 insertions(+), 52 deletions(-)

diff --git a/R/LandmarkBallCover.R b/R/LandmarkBallCover.R
index 865fc50..b0664ff 100644
--- a/R/LandmarkBallCover.R
+++ b/R/LandmarkBallCover.R
@@ -79,74 +79,45 @@ LandmarkBallCover$set("public", "construct_cover", function(filter, index=NULL){
   fv <- filter()
   f_size <- nrow(fv)
 
-  ## Set the seed index if necessary
+  ## Construct the balls
   if(is.null(index)){
-    if(all(self$seed_method == "RAND")) {
-      self$seed_index = sample(1:f_size, 1)
-    }
-    if(all(self$seed_method == "ECC")) {
-      ecc = eccentricity(from=fv, x=fv)
-      self$seed_index = which.max(ecc)
-    }
-  }
+    ## Set the seed index if necessary
+    if(all(self$seed_method == "RAND")) { self$seed_index = sample(1:f_size, 1) }
+    if(all(self$seed_method == "ECC")) {  self$seed_index = which.max(eccentricity(from=fv, x=fv)) }
 
-  ## Construct the balls
-  if (!is.null(self$num_sets)) {
     ## Compute the landmark set
-    eps_lm <- landmarks(x=fv, n=self$num_sets, seed_index=self$seed_index)
-
-    ## Get distance from each point to landmarks
-    dist_to_lm <- proxy::dist(fv, fv[eps_lm,,drop=FALSE])
-    pts_within_eps <- function(lm_dist){ which(lm_dist <= self$epsilon) }
+    if (!is.null(self$num_sets)) { eps_lm <- landmarks(x=fv, n=self$num_sets, seed_index=self$seed_index)
+    } else if (!is.null(self$epsilon)) { eps_lm <- landmarks(x=fv, eps=self$epsilon, seed_index=self$seed_index) }
 
     ## Construct the index set
     self$index_set <- as.character(eps_lm)
 
-    ## Construct the preimages
-    if(length(eps_lm) == 1){
-      orderedIndices = apply(dist_to_lm,1, sort)
-      max = which.max(orderedIndices)
-      self$epsilon = dist_to_lm[max]    # ball radius should be distance of the farthest point from the landmark set so that all points are in at least one ball
-      self$level_sets <- structure(as.list(list(t(apply(dist_to_lm, 2, pts_within_eps))[1,])), names=self$index_set)
-    }else{
-      orderedIndices = t(apply(dist_to_lm,1, sort))
-      max = which.max(orderedIndices[,1])
-      self$epsilon = dist_to_lm[max]    # ball radius should be distance of the farthest point from the landmark set so that all points are in at least one ball
+    ## Get distance from each point to landmarks
+    dist_to_lm <- proxy::dist(fv, fv[eps_lm,,drop=FALSE])
+    pts_within_eps <- function(lm_dist){ which(lm_dist <= self$epsilon) }
 
-      x = apply(dist_to_lm, 2, pts_within_eps)
-      # if all level sets contain the same number of points, apply returns a matrix and gives an error
-      #   -> need to split columns into list elements in this case
-      if(is.matrix(x)){
-        self$level_sets <- structure(split(x, rep(1:ncol(x), each = nrow(x))), names=self$index_set)
-      }else{
-        self$level_sets <- structure(as.list(x), names=self$index_set)
+    ## Calculate an epsilon if one was not given
+    if (!is.null(self$num_sets)) {
+      if(length(eps_lm) == 1){
+        orderedIndices = apply(dist_to_lm,1, sort)
+        max = which.max(orderedIndices)
+      } else {
+        orderedIndices = t(apply(dist_to_lm,1, sort))
+        max = which.max(orderedIndices[,1])
       }
+      self$epsilon = dist_to_lm[max]    # ball radius should be distance of the farthest point from the landmark set so that all points are in at least one ball
     }
-  }else if (!is.null(self$epsilon)) {
-    ## Compute the landmark set
-    eps_lm <- landmarks(x=fv, eps=self$epsilon, seed_index=self$seed_index) # compute landmark set
-    dist_to_lm <- proxy::dist(fv, fv[eps_lm,,drop=FALSE]) # Get distance from each point to landmarks
-
-    pts_within_eps <- function(lm_dist){ which(lm_dist <= self$epsilon) }
-
-    ## Construct the index set
-    self$index_set <- as.character(eps_lm)
 
     ## Construct the preimages
-    if(length(eps_lm) == 1){
+    if(length(eps_lm) == 1) {
       self$level_sets <- structure(as.list(list(t(apply(dist_to_lm, 2, pts_within_eps))[1,])), names=self$index_set)
-    }else{
+    } else {
       x = apply(dist_to_lm, 2, pts_within_eps)
-      # if all level sets contain the same number of points, apply returns a matrix and gives an error
-      #   -> need to split columns into list elements in this case
-      if(is.matrix(x)){
-        self$level_sets <- structure(split(x, rep(1:ncol(x), each = nrow(x))), names=self$index_set)
-      }else{
-        self$level_sets <- structure(as.list(x), names=self$index_set)
-      }
+      # if all level sets contain the same number of points, apply returns a matrix -> need to split columns into list elements
+      if(is.matrix(x)){ self$level_sets <- structure(split(x, rep(1:ncol(x), each = nrow(x))), names=self$index_set)
+      } else { self$level_sets <- structure(as.list(x), names=self$index_set) }
     }
   }
-
   if (!missing(index)){ return(self$level_sets[[index]]) }
 
   ## Always return self

From 6f4f3dd80035a98fc4293ee48b8f74ba5e5a8036 Mon Sep 17 00:00:00 2001
From: yaraskaf <56359026+yaraskaf@users.noreply.github.com>
Date: Wed, 15 Apr 2020 21:05:48 -0400
Subject: [PATCH 17/25] refactor(NeighborhoodCover): cleaned up code in
 construct_cover

---
 R/NeighborhoodCover.R | 24 +++++++++---------------
 1 file changed, 9 insertions(+), 15 deletions(-)

diff --git a/R/NeighborhoodCover.R b/R/NeighborhoodCover.R
index d622f81..9263b73 100644
--- a/R/NeighborhoodCover.R
+++ b/R/NeighborhoodCover.R
@@ -67,24 +67,18 @@ NeighborhoodCover$set("public", "construct_cover", function(filter, index=NULL){
   fv <- filter()
   f_size <- nrow(fv)
 
-  ## Set the seed index if necessary
   if(is.null(index)){
-    if(all(self$seed_method == "RAND")) {
-      self$seed_index = sample(1:f_size, 1)
-    }
-    if(all(self$seed_method == "ECC")) {
-      ecc = eccentricity(from=fv, x=fv)
-      self$seed_index = which.max(ecc)
-    }
-  }
-
-  ## Compute the landmark set
-  eps_lm <- landmarks(x=fv, k=self$k, seed_index=self$seed_index)
+    ## Set the seed index if necessary
+    if(all(self$seed_method == "RAND")) { self$seed_index = sample(1:f_size, 1) }
+    if(all(self$seed_method == "ECC")) {  self$seed_index = which.max(eccentricity(from=fv, x=fv)) }
 
-  ## Construct the index set
-  self$index_set <- as.character(attr(eps_lm,"names"))
-  self$level_sets <- eps_lm
+    ## Compute the landmark set
+    eps_lm <- landmarks(x=fv, k=self$k, seed_index=self$seed_index)
 
+    ## Construct the index set and level sets
+    self$index_set <- as.character(attr(eps_lm,"names"))
+    self$level_sets <- eps_lm
+  }
   if (!missing(index)){ return(self$level_sets[[index]]) }
 
   ## Always return self

From f71639e05fc068d417c9478b989d06fbaee4aa12 Mon Sep 17 00:00:00 2001
From: yaraskaf <56359026+yaraskaf@users.noreply.github.com>
Date: Wed, 15 Apr 2020 21:17:58 -0400
Subject: [PATCH 18/25] refactor(landmarks): remove comments

Remove commented out code that computed landmarks in onle one dimensional data.
---
 R/landmarks.R | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/R/landmarks.R b/R/landmarks.R
index 280d2a6..b8236d2 100644
--- a/R/landmarks.R
+++ b/R/landmarks.R
@@ -10,6 +10,7 @@
 #' @param seed_index the first landmark to seed the algorithm.
 #' @param shuffle_data whether to first randomly shuffle the data.
 #' @references De Silva, Vin, and Gunnar E. Carlsson. "Topological estimation using witness complexes." SPBG 4 (2004): 157-166.
+#' @references Dłotko, Paweł. "Ball Mapper: A Shape Summary for Topological Data Analysis." (2019). Web.
 #' @export
 landmarks <- function(x, n=NULL, eps=NULL, k=NULL, dist_method = "euclidean", seed_index = 1, shuffle_data=FALSE){
   stopifnot(is.matrix(x))
@@ -44,33 +45,29 @@ landmarks <- function(x, n=NULL, eps=NULL, k=NULL, dist_method = "euclidean", se
 
     # STEP 1: Pick point in the space (seed) and add it to the list of centers/landmarks
     C = list(seed_index)
-    f_C = list(x[seed_index])
+    f_C = matrix(x[seed_index,], nrow=1, byrow=TRUE)
 
     # STEP 2: Compute distance between landmark set and each point in the space
-    dists = sapply(f_C, function(c) {
-      proxy::dist(c, x, method = dist_method)
-    })
+    dists = proxy::dist(f_C, x, method = dist_method)
     max = which.max(dists)
     d = dists[max]
 
     # Continue if distance is greater than epsilon
     if(d >= eps){
       C = append(C, max)
-      f_C = append(f_C, x[max])
+      f_C = rbind(f_C, x[max,])
 
       # STEP 2: Compute distance between landmark set and each point in the space
       while(TRUE){
-        dists = sapply(f_C, function(c) {
-          proxy::dist(c, x, method = dist_method)
-        })
-        orderedIndices = t(apply(dists,1, sort))
+        dists = proxy::dist(f_C, x, method = dist_method)
+        orderedIndices = t(apply(dists,2, sort))
         max = which.max(orderedIndices[,1])
         d = orderedIndices[max,1]
 
         # Continue until distance is less than epsilon (i.e. stop when all points are contained within an epsilon-ball)
         if(d >= eps){
           C = append(C,max)
-          f_C = append(f_C,x[max])
+          f_C = rbind(f_C, x[max,])
         } else{ break }
       }
     }
@@ -119,6 +116,5 @@ landmarks <- function(x, n=NULL, eps=NULL, k=NULL, dist_method = "euclidean", se
     idx_list = as.character(unlist(C))
     landmark_idx = structure(k_nhds, names=idx_list)
   }
-
   if (is.na(shuffle_idx)){ landmark_idx } else { shuffle_idx[landmark_idx] }
 }

From a1fd4673fb9483063c3b0605478fed2db51d497b Mon Sep 17 00:00:00 2001
From: yaraskaf <56359026+yaraskaf@users.noreply.github.com>
Date: Wed, 15 Apr 2020 23:05:37 -0400
Subject: [PATCH 19/25] fix(LandmarkBallCover): fix bug where num_sets > unique
 filtered points

In this case, just take the unique balls as the cover sets. Duplicate lensed values should not form distinct centers.
---
 R/LandmarkBallCover.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/LandmarkBallCover.R b/R/LandmarkBallCover.R
index b0664ff..f09a1d5 100644
--- a/R/LandmarkBallCover.R
+++ b/R/LandmarkBallCover.R
@@ -86,7 +86,7 @@ LandmarkBallCover$set("public", "construct_cover", function(filter, index=NULL){
     if(all(self$seed_method == "ECC")) {  self$seed_index = which.max(eccentricity(from=fv, x=fv)) }
 
     ## Compute the landmark set
-    if (!is.null(self$num_sets)) { eps_lm <- landmarks(x=fv, n=self$num_sets, seed_index=self$seed_index)
+    if (!is.null(self$num_sets)) { eps_lm <- unique(landmarks(x=fv, n=self$num_sets, seed_index=self$seed_index))
     } else if (!is.null(self$epsilon)) { eps_lm <- landmarks(x=fv, eps=self$epsilon, seed_index=self$seed_index) }
 
     ## Construct the index set

From 45e4bb945a067dbd9fceca2e10236b6542099e46 Mon Sep 17 00:00:00 2001
From: yaraskaf <56359026+yaraskaf@users.noreply.github.com>
Date: Wed, 15 Apr 2020 23:17:23 -0400
Subject: [PATCH 20/25] refactor(landmarks): removed unnecessary loop in
 eps-landmark calculation

---
 R/landmarks.R | 20 ++++++--------------
 1 file changed, 6 insertions(+), 14 deletions(-)

diff --git a/R/landmarks.R b/R/landmarks.R
index b8236d2..87a62fc 100644
--- a/R/landmarks.R
+++ b/R/landmarks.R
@@ -52,24 +52,16 @@ landmarks <- function(x, n=NULL, eps=NULL, k=NULL, dist_method = "euclidean", se
     max = which.max(dists)
     d = dists[max]
 
-    # Continue if distance is greater than epsilon
-    if(d >= eps){
+    # Continue until distance is less than epsilon (i.e. stop when all points are contained within an epsilon-ball)
+    while(d >= eps){
       C = append(C, max)
       f_C = rbind(f_C, x[max,])
 
       # STEP 2: Compute distance between landmark set and each point in the space
-      while(TRUE){
-        dists = proxy::dist(f_C, x, method = dist_method)
-        orderedIndices = t(apply(dists,2, sort))
-        max = which.max(orderedIndices[,1])
-        d = orderedIndices[max,1]
-
-        # Continue until distance is less than epsilon (i.e. stop when all points are contained within an epsilon-ball)
-        if(d >= eps){
-          C = append(C,max)
-          f_C = rbind(f_C, x[max,])
-        } else{ break }
-      }
+      dists = proxy::dist(f_C, x, method = dist_method)
+      orderedIndices = t(apply(dists,2, sort))
+      max = which.max(orderedIndices[,1])
+      d = orderedIndices[max,1]
     }
     landmark_idx = unlist(C)
   } else if (!is.null(k)){

From dffbbb8017cbf6300ae506eba1c16d5b03a7d9c2 Mon Sep 17 00:00:00 2001
From: yaraskaf <56359026+yaraskaf@users.noreply.github.com>
Date: Thu, 16 Apr 2020 17:35:56 -0400
Subject: [PATCH 21/25] refactor(NeighborhoodBallCover): cleaned up code to
 construct cover sets

Moved calculation of k-neighborhoods to NeighborhoodCover.R, eliminated unnecessary code, removed k-nhd functionality from landmarks.R
---
 R/NeighborhoodCover.R | 38 +++++++++++++++++++++++++++++-----
 R/landmarks.R         | 47 ++-----------------------------------------
 2 files changed, 35 insertions(+), 50 deletions(-)

diff --git a/R/NeighborhoodCover.R b/R/NeighborhoodCover.R
index 9263b73..12a0d4e 100644
--- a/R/NeighborhoodCover.R
+++ b/R/NeighborhoodCover.R
@@ -57,14 +57,16 @@ NeighborhoodCover$set("public", "format", function(...){
 })
 
 ## construct_cover ------
-NeighborhoodCover$set("public", "construct_cover", function(filter, index=NULL){
+NeighborhoodCover$set("public", "construct_cover", function(filter, index=NULL, dist_method = "euclidean"){
   if (!requireNamespace("RANN", quietly = TRUE)){
     stop("Package \"RANN\" is needed for to use this cover.", call. = FALSE)
   }
   self$validate(filter)
+  stopifnot(toupper(dist_method) %in% toupper(proxy::pr_DB$get_entry_names()))
 
   ## Get filter values
   fv <- filter()
+  f_dim <- ncol(fv)
   f_size <- nrow(fv)
 
   if(is.null(index)){
@@ -72,12 +74,38 @@ NeighborhoodCover$set("public", "construct_cover", function(filter, index=NULL){
     if(all(self$seed_method == "RAND")) { self$seed_index = sample(1:f_size, 1) }
     if(all(self$seed_method == "ECC")) {  self$seed_index = which.max(eccentricity(from=fv, x=fv)) }
 
-    ## Compute the landmark set
-    eps_lm <- landmarks(x=fv, k=self$k, seed_index=self$seed_index)
+    ## Compute the set of k-nhds
+    C = c() # create an empty list to store indices of centers
+    k_nhds = list() # create empty list of lists to store points in each neighborhood
+    ptsLeft = c(1:f_size) # keep track of indices that are still available to be chosen as a center
+
+    nextC = self$seed_index # use the seed as the first center
+    while(TRUE){
+      # add the new center to the list and compute its k-neighborhood
+      C = append(C, nextC)
+      neighbors = proxy::dist(matrix(fv[nextC,], ncol=f_dim), fv, method = dist_method)
+      sortedNeighbors = sort(neighbors)
+
+      # include all points that are equidistant from the center, even if those points create a nhd size > k
+      i = 1
+      while( ((self$k+i) <= f_size) && (sortedNeighbors[self$k] == sortedNeighbors[self$k + i]) ){i = i + 1}
+      nhd = order(neighbors)[1:(self$k + i - 1)]
+      k_nhds = append(k_nhds, list(nhd))
+
+      # points that are included in a nhd are no longer eligible to be chosen as centers
+      ptsLeft = setdiff(ptsLeft, nhd)
+      if(length(ptsLeft) <= 0){break}
+
+      # select the next center
+      dists = proxy::dist(matrix(fv[C,], ncol=f_dim), matrix(fv[ptsLeft,], ncol=f_dim), method = dist_method)
+      sortedDists = matrix(apply(dists,2,sort),ncol=length(ptsLeft))
+      max = which.max(sortedDists[1,])
+      nextC = ptsLeft[max]
+    }
 
     ## Construct the index set and level sets
-    self$index_set <- as.character(attr(eps_lm,"names"))
-    self$level_sets <- eps_lm
+    self$index_set = as.character(C)
+    self$level_sets = structure(k_nhds, names=self$index_set)
   }
   if (!missing(index)){ return(self$level_sets[[index]]) }
 
diff --git a/R/landmarks.R b/R/landmarks.R
index 87a62fc..ad0906c 100644
--- a/R/landmarks.R
+++ b/R/landmarks.R
@@ -12,10 +12,10 @@
 #' @references De Silva, Vin, and Gunnar E. Carlsson. "Topological estimation using witness complexes." SPBG 4 (2004): 157-166.
 #' @references Dłotko, Paweł. "Ball Mapper: A Shape Summary for Topological Data Analysis." (2019). Web.
 #' @export
-landmarks <- function(x, n=NULL, eps=NULL, k=NULL, dist_method = "euclidean", seed_index = 1, shuffle_data=FALSE){
+landmarks <- function(x, n=NULL, eps=NULL, dist_method = "euclidean", seed_index = 1, shuffle_data=FALSE){
   stopifnot(is.matrix(x))
   stopifnot(seed_index >= 1L && seed_index <= nrow(x))
-  stopifnot(!is.null(n) || !is.null(eps) || !is.null(k)) # must specify a number of balls, a radius, or k-neighborhood
+  stopifnot(!is.null(n) || !is.null(eps)) # must specify a number of balls or a radius
 
   shuffle_idx <- NA
   if (shuffle_data){
@@ -64,49 +64,6 @@ landmarks <- function(x, n=NULL, eps=NULL, k=NULL, dist_method = "euclidean", se
       d = orderedIndices[max,1]
     }
     landmark_idx = unlist(C)
-  } else if (!is.null(k)){
-    stopifnot(toupper(dist_method) %in% toupper(proxy::pr_DB$get_entry_names()))
-
-    # STEP 1: Pick point in the space (seed) and add it to the list of centers/landmarks
-    C = list(seed_index)
-
-    # STEP 2: Compute distance between landmark set and each point in the space
-    dists = proxy::dist(x[seed_index], x, method = dist_method)
-    equivalent_pts = which(dists == 0)
-
-    if(length(equivalent_pts) > k){
-      k_nhds = list(equivalent_pts)
-    } else{
-      k_nhds = list(order(dists)[1:k])
-    }
-    pt_list = k_nhds[[1]]
-
-    while(TRUE){
-      # STEP 2: Compute distance between landmark set and each point in the space
-      dists = sapply(pt_list, function(pt) {
-        proxy::dist(x[pt], x, method = dist_method)
-      })
-      orderedIndices = t(apply(dists,1, sort))
-
-      max_vector = which(orderedIndices[,1] == max(orderedIndices[,1]))
-      max = max_vector[1]
-
-      if(length(max_vector) > k){
-        new_pts = max_vector
-      } else{
-        new_pts = order(proxy::dist(x[max], x, method = dist_method))[1:k]
-      }
-
-      # Continue until all points are within a k-neighborhood
-      if(!(max %in% pt_list)){
-        C = append(C,max)
-        k_nhds = append(k_nhds, list(new_pts))
-        pt_list = append(pt_list, new_pts)
-      } else{ break }
-    }
-
-    idx_list = as.character(unlist(C))
-    landmark_idx = structure(k_nhds, names=idx_list)
   }
   if (is.na(shuffle_idx)){ landmark_idx } else { shuffle_idx[landmark_idx] }
 }

From eb977e6dba843bec9293ce67f480f4b88b8f7b3a Mon Sep 17 00:00:00 2001
From: yaraskaf <56359026+yaraskaf@users.noreply.github.com>
Date: Thu, 16 Apr 2020 18:11:50 -0400
Subject: [PATCH 22/25] refactor(landmarks): eliminate unnecessary code in
 epsilon-net computation

Refactor code for epsilon-landmarks in landmarks.R, update documentation
---
 R/landmarks.R | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/R/landmarks.R b/R/landmarks.R
index ad0906c..14045cc 100644
--- a/R/landmarks.R
+++ b/R/landmarks.R
@@ -4,8 +4,11 @@
 #' \code{x}. Maxmin is a simple greedy algorithm that is relatively efficient, but it has a tendency to pick out extremal points.
 #' If the distance metric is euclidean, an efficient Rcpp implementation is used. If another metric is requested,
 #' the algorithm is performed in R.
+#' Alternatively, a radius \deqn{\epsilon} can be specified so that the landmark set is computed to cover the space with
+#' \deqn{\epsilon}-balls centered at the landmarks.
 #' @param x a data matrix.
 #' @param n the number of landmarks requested.
+#' @param eps the desired radius of balls in the cover set.
 #' @param dist_method the distance metric to use. Any distance measure in the \code{proxy} package is supported.
 #' @param seed_index the first landmark to seed the algorithm.
 #' @param shuffle_data whether to first randomly shuffle the data.
@@ -42,28 +45,25 @@ landmarks <- function(x, n=NULL, eps=NULL, dist_method = "euclidean", seed_index
     }
   } else if(!is.null(eps)){
     stopifnot(toupper(dist_method) %in% toupper(proxy::pr_DB$get_entry_names()))
+    f_dim <- ncol(x)
+    f_size <- nrow(x)
 
-    # STEP 1: Pick point in the space (seed) and add it to the list of centers/landmarks
-    C = list(seed_index)
-    f_C = matrix(x[seed_index,], nrow=1, byrow=TRUE)
+    # algorithm and variable names as specified in Dlotko paper
+    C = c() # create a list to store indices of centers/landmarks
+    next_pt = seed_index # first landmark should be the seed point
+    while(TRUE){
+      C = append(C, next_pt) # add new point to list of landmarks
 
-    # STEP 2: Compute distance between landmark set and each point in the space
-    dists = proxy::dist(f_C, x, method = dist_method)
-    max = which.max(dists)
-    d = dists[max]
+      # compute distance between landmark set and each point in the space
+      dists = proxy::dist(matrix(x[C,], ncol=f_dim), x, method = dist_method)
+      sortedDists = matrix(apply(dists,2,sort),ncol=f_size)
 
-    # Continue until distance is less than epsilon (i.e. stop when all points are contained within an epsilon-ball)
-    while(d >= eps){
-      C = append(C, max)
-      f_C = rbind(f_C, x[max,])
-
-      # STEP 2: Compute distance between landmark set and each point in the space
-      dists = proxy::dist(f_C, x, method = dist_method)
-      orderedIndices = t(apply(dists,2, sort))
-      max = which.max(orderedIndices[,1])
-      d = orderedIndices[max,1]
+      # the next landmark is the point with greatest distance from the current landmark set
+      next_pt = which.max(sortedDists[1,])
+      d = sortedDists[1,next_pt] # done when this max distance is < eps, i.e. when all pts are contained in an eps-ball
+      if(d < eps){break}
     }
-    landmark_idx = unlist(C)
+    landmark_idx = C
   }
   if (is.na(shuffle_idx)){ landmark_idx } else { shuffle_idx[landmark_idx] }
 }

From c5f9c3fa678233eaec520a35f71f52330482127b Mon Sep 17 00:00:00 2001
From: yaraskaf <56359026+yaraskaf@users.noreply.github.com>
Date: Thu, 16 Apr 2020 20:26:42 -0400
Subject: [PATCH 23/25] refactor(LandmarkBallCover): remove unnecessary code
 from construct_cover

---
 R/LandmarkBallCover.R | 27 ++++++++++-----------------
 1 file changed, 10 insertions(+), 17 deletions(-)

diff --git a/R/LandmarkBallCover.R b/R/LandmarkBallCover.R
index f09a1d5..88982d0 100644
--- a/R/LandmarkBallCover.R
+++ b/R/LandmarkBallCover.R
@@ -98,25 +98,18 @@ LandmarkBallCover$set("public", "construct_cover", function(filter, index=NULL){
 
     ## Calculate an epsilon if one was not given
     if (!is.null(self$num_sets)) {
-      if(length(eps_lm) == 1){
-        orderedIndices = apply(dist_to_lm,1, sort)
-        max = which.max(orderedIndices)
-      } else {
-        orderedIndices = t(apply(dist_to_lm,1, sort))
-        max = which.max(orderedIndices[,1])
-      }
-      self$epsilon = dist_to_lm[max]    # ball radius should be distance of the farthest point from the landmark set so that all points are in at least one ball
+      sortedDists = matrix(apply(dist_to_lm,1,sort),nrow=f_size,byrow=TRUE)
+      max = which.max(sortedDists[,1])
+      self$epsilon = sortedDists[max,1] # radius should be distance of the farthest pt from the landmark set so that all pts are in at least one ball
     }
 
-    ## Construct the preimages
-    if(length(eps_lm) == 1) {
-      self$level_sets <- structure(as.list(list(t(apply(dist_to_lm, 2, pts_within_eps))[1,])), names=self$index_set)
-    } else {
-      x = apply(dist_to_lm, 2, pts_within_eps)
-      # if all level sets contain the same number of points, apply returns a matrix -> need to split columns into list elements
-      if(is.matrix(x)){ self$level_sets <- structure(split(x, rep(1:ncol(x), each = nrow(x))), names=self$index_set)
-      } else { self$level_sets <- structure(as.list(x), names=self$index_set) }
-    }
+    x = apply(dist_to_lm, 2, pts_within_eps)
+
+    # if all level sets contain the same number of points, apply returns a matrix -> need to split columns into list elements
+    if(is.matrix(x)){ self$level_sets <- structure(split(x, rep(1:ncol(x), each = nrow(x))), names=self$index_set)
+    } else { self$level_sets <- structure(as.list(x), names=self$index_set) }
+
+    print(self$level_sets)
   }
   if (!missing(index)){ return(self$level_sets[[index]]) }
 

From 7ef71eccb7b407bcb0d01d690f98d005244f665d Mon Sep 17 00:00:00 2001
From: yaraskaf <56359026+yaraskaf@users.noreply.github.com>
Date: Mon, 27 Apr 2020 22:51:43 -0400
Subject: [PATCH 24/25] fix(LandmarkBallCover, NeighborhoodCover):

Change list() to c() to avoid bug with pasting lists of parameters.
---
 R/LandmarkBallCover.R | 4 +---
 R/NeighborhoodCover.R | 2 +-
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/R/LandmarkBallCover.R b/R/LandmarkBallCover.R
index 88982d0..251f5ed 100644
--- a/R/LandmarkBallCover.R
+++ b/R/LandmarkBallCover.R
@@ -34,7 +34,7 @@ LandmarkBallCover <- R6::R6Class(
 #' @export
 LandmarkBallCover$set("public", "initialize", function(...){
   super$initialize(typename="landmark_ball")
-  params <- list(...)
+  params <- c(...)
   if ("epsilon" %in% names(params)){ self$epsilon <- params[["epsilon"]] }
   if ("num_sets" %in% names(params)){ self$num_sets <- params[["num_sets"]] }
   if ("seed_index" %in% names(params)){ self$seed_index <- params[["seed_index"]] }
@@ -108,8 +108,6 @@ LandmarkBallCover$set("public", "construct_cover", function(filter, index=NULL){
     # if all level sets contain the same number of points, apply returns a matrix -> need to split columns into list elements
     if(is.matrix(x)){ self$level_sets <- structure(split(x, rep(1:ncol(x), each = nrow(x))), names=self$index_set)
     } else { self$level_sets <- structure(as.list(x), names=self$index_set) }
-
-    print(self$level_sets)
   }
   if (!missing(index)){ return(self$level_sets[[index]]) }
 
diff --git a/R/NeighborhoodCover.R b/R/NeighborhoodCover.R
index 12a0d4e..133cb89 100644
--- a/R/NeighborhoodCover.R
+++ b/R/NeighborhoodCover.R
@@ -28,7 +28,7 @@ NeighborhoodCover <- R6::R6Class(
 #' @export
 NeighborhoodCover$set("public", "initialize", function(...){
   super$initialize(typename="neighborhood")
-  params <- list(...)
+  params <- c(...)
   if ("k" %in% names(params)){ self$k <- params[["k"]] }
   if ("seed_index" %in% names(params)){ self$seed_index <- params[["seed_index"]] }
   if ("seed_method" %in% names(params)){ self$seed_method <- params[["seed_method"]] }

From c91b39b284e229af6b36d151a5a0d1b832b826a4 Mon Sep 17 00:00:00 2001
From: yaraskaf <56359026+yaraskaf@users.noreply.github.com>
Date: Tue, 5 May 2020 13:28:56 -0400
Subject: [PATCH 25/25] fix(LandmarkBallCover): fix printing of epsilon format
 method

---
 R/LandmarkBallCover.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/LandmarkBallCover.R b/R/LandmarkBallCover.R
index 251f5ed..c93ba5e 100644
--- a/R/LandmarkBallCover.R
+++ b/R/LandmarkBallCover.R
@@ -64,7 +64,7 @@ LandmarkBallCover$set("public", "format", function(...){
   if (!is.null(self$num_sets)) {
     sprintf("%s Cover: (number of sets = %s, seed index = %s)", titlecase(private$.typename), self$num_sets, self$seed_index)
   }else if (!is.null(self$epsilon)){
-    sprintf("%s Cover: (epsilon = %.2f, seed index = %s)", titlecase(private$.typename), self$epsilon, self$seed_index)
+    sprintf("%s Cover: (epsilon = %s, seed index = %s)", titlecase(private$.typename), self$epsilon, self$seed_index)
   }
 })