From d83d236331da5bbe03911cd58e360dd619a239b9 Mon Sep 17 00:00:00 2001 From: yaraskaf <56359026+yaraskaf@users.noreply.github.com> Date: Sun, 23 Feb 2020 21:45:11 -0500 Subject: [PATCH 01/25] feat(LandmarkBallCover): initial commit Implementation of ball cover of lensed data space using landmark point set. Fixed bug in landmarks.cpp --- R/YS_BallCover.R | 199 +++++++++++++++++++ R/mapperRef.R | 473 +++++++++++++++++++++++----------------------- src/landmarks.cpp | 11 +- 3 files changed, 443 insertions(+), 240 deletions(-) create mode 100644 R/YS_BallCover.R diff --git a/R/YS_BallCover.R b/R/YS_BallCover.R new file mode 100644 index 0000000..b845746 --- /dev/null +++ b/R/YS_BallCover.R @@ -0,0 +1,199 @@ +#' Ball Cover +#' +#' @docType class +#' @description This class provides a cover whose open sets are formed by \deqn{\epsilon}-balls centered +#' about each point. Using this class requires the \code{RANN} package to be installed, and thus explicitly assumes +#' the filter space endowed with the euclidean metric. +#' +#' This differs from the DisjointBallCover in that it does NOT union intersecting cover sets. +#' +#' @field epsilon := radius of the ball to form around each point +#' @author Cory Brunsion, Yara Skaf +#' @export + +library(proxy) + +# TODO: make default spec with default seed 1l -> user can use default 1 or specify other seed +# Seed methods: SPEC, RAND, ECC (or specify seed_index, unspecified uses the first index) +YS_BallCover <- R6::R6Class( + classname = "YS_BallCover", + inherit = CoverRef, + public = list(epsilon=NULL, num_sets=NULL, seed_index=1, seed_method="SPEC") +) + +## initialize ------ +YS_BallCover$set("public", "initialize", function(...){ + super$initialize(typename="ys_ball") + params <- list(...) + if ("epsilon" %in% names(params)){ self$epsilon <- params[["epsilon"]] } + if ("num_sets" %in% names(params)){ self$num_sets <- params[["num_sets"]] } + if ("seed_index" %in% names(params)){ self$seed_index <- params[["seed_index"]] } + if ("seed_method" %in% names(params)){ self$seed_method <- params[["seed_method"]] } +}) + +## validate ------ +YS_BallCover$set("public", "validate", function(filter){ + stopifnot(!is.null(self$epsilon) || !is.null(self$num_sets)) + writeLines("\n(eps, num_sets, seed_index, seed_method):\n") + print(self$epsilon) + print(self$num_sets) + print(self$seed_index) + print(self$seed_method) +}) + +## format ---- +YS_BallCover$set("public", "format", function(...){ + titlecase <- function(x){ + s <- strsplit(x, " ")[[1]] + paste(toupper(substring(s, 1, 1)), substring(s, 2), sep = "", collapse = " ") + } + if (!is.null(self$num_sets)) { + sprintf("%s Cover: (num_sets = %s, seed_index = %s)", titlecase(private$.typename), self$num_sets, self$seed_index) + } + if (!is.null(self$epsilon)) { + sprintf("%s Cover: (epsilon = %.2f, seed_index = %s)", titlecase(private$.typename), self$epsilon, self$seed_index) + } +}) + +## construct_cover ------ +YS_BallCover$set("public", "construct_cover", function(filter, index=NULL){ + writeLines("Index:") + print(index) + print("constructing") + if (!requireNamespace("RANN", quietly = TRUE)){ + stop("Package \"RANN\" is needed for to use this cover.", call. = FALSE) + } + print("validating") + self$validate() + + print("filtering") + ## Get filter values + fv <- filter() + f_dim <- ncol(fv) + f_size <- nrow(fv) + + + ## Set the seed index if necessary + if(is.null(index)){ + if(all(self$seed_method == "RAND")) { + self$seed_index = sample(1:f_size, 1) + print(self$seed_index) + print("rand") + } + if(all(self$seed_method == "ECC")) { + #self$seed_index = sample(1:f_size, 1) + print(self$seed_index) + print("ecc") + } + + } + + + if (!is.null(self$num_sets)) { + print("seed") + print(self$seed_index) + print(self$num_sets) + eps_lm <- landmarks(x=fv, n=self$num_sets, seed_index=self$seed_index) + print(eps_lm) + ## Get distance from each point to landmarks + dist_to_lm <- proxy::dist(fv, fv[eps_lm,,drop=FALSE]) + + orderedIndices = t(apply(dist_to_lm,1, sort)) + max = which.max(orderedIndices[,1]) + self$epsilon = dist_to_lm[max] # ball radius should be distance of the farthest point from the landmark set so that all points are in at least one ball + pts_within_eps <- function(lm_dist){ which(lm_dist <= self$epsilon) } + + ## Construct the index set + the preimages + self$index_set <- as.character(eps_lm) + print(self$index_set) + self$level_sets <- structure(as.list(apply(dist_to_lm, 2, pts_within_eps)), names=self$index_set) + print("done level sets") + }else{ + if (!is.null(self$epsilon)) { + print("eps") + + } + } + + + + + ## Construct the balls + # if spec, elif rand, elif ecc -> set self$seed_index + # then if epsilon elif num_sets using self$seed_index + # if (!is.null(self$seed_method)){ + # if (all(self$seed_method == "RAND")) { + # self$seed_index = sample(1:f_size, 1) + # print(self$seed_index) + # print("rand") + # eps_lm <- landmarks(fv, self$num_sets, seed_index=self$seed_index) + # + # ## Get distance from each point to landmarks + # dist_to_lm <- proxy::dist(fv, fv[eps_lm,,drop=FALSE]) + # pts_within_eps <- function(lm_dist){ which(lm_dist < self$epsilon) } # TODO: does this make sense? we shouldnt be able to sepcify both eps and num_sets + # + # ## Construct the index set + the preimages + # self$index_set <- as.character(eps_lm) + # writeLines("set index set") + # self$level_sets <- structure(as.list(apply(dist_to_lm, 2, pts_within_eps)), names=self$index_set) + # writeLines("set level sets") + # } + # if (all(self$seed_method == "ECC")) { + # print("ecc") + # } + # }else{ + # if (!is.null(self$num_sets)){ + # print("seed") + # print(self$seed_index) + # print(self$num_sets) + # eps_lm <- landmarks(x=fv, n=self$num_sets, seed_index=self$seed_index) + # print(eps_lm) + # ## Get distance from each point to landmarks + # dist_to_lm <- proxy::dist(fv, fv[eps_lm,,drop=FALSE]) + # + # orderedIndices = t(apply(dist_to_lm,1, sort)) + # max = which.max(orderedIndices[,1]) + # self$epsilon = dist_to_lm[max] # ball radius should be distance of the farthest point from the landmark set so that all points are in at least one ball + # pts_within_eps <- function(lm_dist){ which(lm_dist <= self$epsilon) } + # + # ## Construct the index set + the preimages + # self$index_set <- as.character(eps_lm) + # print(self$index_set) + # self$level_sets <- structure(as.list(apply(dist_to_lm, 2, pts_within_eps)), names=self$index_set) + # }else { + # writeLines("else") + # eps_lm <- landmarks(fv, 2L) + # + # ## Get distance from each point to landmarks + # dist_to_lm <- proxy::dist(fv, fv[eps_lm,,drop=FALSE]) + # pts_within_eps <- function(lm_dist){ which(lm_dist < self$epsilon) } + # + # ## Construct the index set + the preimages + # self$index_set <- as.character(eps_lm) + # self$level_sets <- structure(as.list(apply(dist_to_lm, 2, pts_within_eps)), names=self$index_set) + # } + # } + + + # print("making balls") + # ## Construct the balls + # ball_cover <- RANN::nn2(fv, query = fv, searchtype = "radius", radius = self$epsilon) + # + # ## Assign the centers of the balls as the index set + # self$index_set <- as.character(ball_cover[["nn.idx"]][,1]) + # + # ## Calculate level (pullback) sets + # ls <- lapply(seq_len(nrow(ball_cover[["nn.idx"]])), function(i) ball_cover[["nn.idx"]][i,]) + # ls <- lapply(ls, function(i) Filter(function(x) any(x != 0), i)) + # + # self$level_sets <- structure(ls, names=self$index_set) + print("test") + print(index) + if (!missing(index)){ + print("hello") + return(self$level_sets[[index]]) } + print("done") + + ## Always return self + invisible(self) +}) diff --git a/R/mapperRef.R b/R/mapperRef.R index 93c3d5a..a79ebe2 100644 --- a/R/mapperRef.R +++ b/R/mapperRef.R @@ -2,14 +2,14 @@ #' @name MapperRef #' @aliases Mapper #' @description `MapperRef` is an \link{R6} class built for parameterizing and computing mappers efficiently. -#' @section Details: To create a new \code{MapperRef} instance object, instantiate the class with the \code{\link[R6:R6Class]{R6 new}} operator. +#' @section Details: To create a new \code{MapperRef} instance object, instantiate the class with the \code{\link[R6:R6Class]{R6 new}} operator. #' Instantiation of a \code{MapperRef} objects requires a data matrix, or a function returning one. \cr -#' The primary output of the Mapper method is a simplicial complex. -#' \cr -#' The underlying complex does not need to be modified by the user, i.e. is completely maintained by \code{\link{MapperRef}} methods +#' The primary output of the Mapper method is a simplicial complex. +#' \cr +#' The underlying complex does not need to be modified by the user, i.e. is completely maintained by \code{\link{MapperRef}} methods #' (e.g. \code{\link{construct_nerve}}, \code{\link{construct_k_skeleton}}, etc.). #' @section Usage: -#' \preformatted{m = MapperRef$new(X)} +#' \preformatted{m = MapperRef$new(X)} #' @section Arguments: #' \itemize{ #' \item{\strong{X}}: The data, either as a matrix, or a function that returns a matrix. @@ -17,15 +17,15 @@ #' @section Fields: #' \itemize{ #' \item{\strong{X}}: The data, as a function. Evaluation returns the data as a matrix. -#' \item{\strong{filter}}: The filter, as a function. Evaluation returns the filtered data as a matrix. -#' \item{\strong{cover}}: The cover (a \code{\link{CoverRef}} derived object). +#' \item{\strong{filter}}: The filter, as a function. Evaluation returns the filtered data as a matrix. +#' \item{\strong{cover}}: The cover (a \code{\link{CoverRef}} derived object). #' \item{\strong{clustering_algorithm}}: The clustering algorithm to use in the pullback. -#' \item{\strong{measure}}: Distance measure to use to compute distances in ambient space. See \code{use_distance_measure} for more details. -#' \item{\strong{pullback}}: Mapping between the sets in the cover (by index) and the vertices (by id). -#' \item{\strong{vertices}}: The mapper vertices. +#' \item{\strong{measure}}: Distance measure to use to compute distances in ambient space. See \code{use_distance_measure} for more details. +#' \item{\strong{pullback}}: Mapping between the sets in the cover (by index) and the vertices (by id). +#' \item{\strong{vertices}}: The mapper vertices. #' \item{\strong{simplicial_complex}}: A \code{\link[simplextree:simplextree]{simplex tree}} object. #' } -#' +#' #' @section Methods: #' \itemize{ #' \item{\code{\link{use_filter}}: Specifies the filter.} @@ -41,9 +41,9 @@ #' } #' @section More information: #' Full documentation available \href{https://peekxc.github.io/Mapper}{online}. -#' +#' #' @return \code{\link{MapperRef}} instance equipped with methods for building the mapper. -#' +#' #' @import methods #' @importFrom Rcpp sourceCpp #' @author Matt Piekenbrock, \email{matt.piekenbrock@@gmail.com} @@ -53,23 +53,23 @@ NULL #' @export -MapperRef <- R6::R6Class("MapperRef", +MapperRef <- R6::R6Class("MapperRef", private = list( - .X = NULL, + .X = NULL, .filter = NULL, - .cover = NULL, - .clustering_algorithm = NULL, - .measure = "euclidean", - .measure_opt = NULL, + .cover = NULL, + .clustering_algorithm = NULL, + .measure = "euclidean", + .measure_opt = NULL, .pullback=list(), .vertices = list(), .simplicial_complex = NULL ), lock_class = FALSE, ## Feel free to add your own members - lock_objects = FALSE ## Or change existing ones + lock_objects = FALSE ## Or change existing ones ) -## To add a public member function +## To add a public member function ## add function ---- MapperRef$set("public", "add_function", function(name, FUN) { self[[name]] <- FUN @@ -78,9 +78,9 @@ MapperRef$set("public", "add_function", function(name, FUN) { ## The set index -> (vertex) decomposition mapping. ## pullback ---- -MapperRef$set("active", "pullback", - function(value){ - if (missing(value)){ private$.pullback } +MapperRef$set("active", "pullback", + function(value){ + if (missing(value)){ private$.pullback } else { private$.pullback <- value } } ) @@ -88,16 +88,16 @@ MapperRef$set("active", "pullback", ## cover ---- ## @title Mapper cover ## @name cover -## @description Every \code{\link{MapperRef}} object requires a \code{\link{CoverRef}} object as -## is \code{cover} member field. In the context of Mapper, a cover is used to discretize the filter -## space into a partition, which is then used via a \emph{pullback} operation to construct the vertices. \cr -## \cr -## The \code{\link{MapperRef}} class makes no restrictions on the cover that is used; only that it fulfills the +## @description Every \code{\link{MapperRef}} object requires a \code{\link{CoverRef}} object as +## is \code{cover} member field. In the context of Mapper, a cover is used to discretize the filter +## space into a partition, which is then used via a \emph{pullback} operation to construct the vertices. \cr +## \cr +## The \code{\link{MapperRef}} class makes no restrictions on the cover that is used; only that it fulfills the ## requirements of being a valid \code{\link{CoverRef}} instance. -## @seealso \code{\link{CoverRef}} -MapperRef$set("active", "cover", +## @seealso \code{\link{CoverRef}} +MapperRef$set("active", "cover", function(value){ #function(fv, type = c("restrained rectangular"), ...) - if (missing(value)){ private$.cover } + if (missing(value)){ private$.cover } else { stopifnot(inherits(value, "CoverRef")) private$.cover <- value @@ -108,7 +108,7 @@ MapperRef$set("active", "cover", ## Mapper stores the vertices as a list ## vertices ---- -MapperRef$set("active", "vertices", +MapperRef$set("active", "vertices", function(value){ if(missing(value)){ private$.vertices @@ -116,32 +116,32 @@ MapperRef$set("active", "vertices", private$.vertices <- value # stop("`$vertices` is read-only. To update the vertex membership, use the 'compute_vertices' function.", call. = FALSE) } - } + } ) ## simplicial_complex ---- -## @title Simplicial Complex -## @name simplicial_complex -## @description The relational information of the Mapper construction. -## @details The primary output of the Mapper method is a simplicial complex. With \code{\link{MapperRef}} objects, -## the simplicial complex is stored as a \code{\link[simplextree]{simplextree}}. -## \cr -## The underlying complex does not need to be modified by the user, i.e. is completely maintained by \code{\link{MapperRef}} methods +## @title Simplicial Complex +## @name simplicial_complex +## @description The relational information of the Mapper construction. +## @details The primary output of the Mapper method is a simplicial complex. With \code{\link{MapperRef}} objects, +## the simplicial complex is stored as a \code{\link[simplextree]{simplextree}}. +## \cr +## The underlying complex does not need to be modified by the user, i.e. is completely maintained by \code{\link{MapperRef}} methods ## (e.g. \code{\link{construct_nerve}}, \code{\link{construct_k_skeleton}}, etc.). -MapperRef$set("active", "simplicial_complex", +MapperRef$set("active", "simplicial_complex", function(value){ if (missing(value)){ private$.simplicial_complex } else { stopifnot(is(value, "Rcpp_SimplexTree")) - private$.simplicial_complex <- value + private$.simplicial_complex <- value # stop("`$simplicial_complex` is read-only. To change the complex, use the objects (simplex tree) methods directly.", call. = FALSE) } } ) ## clustering_algorithm ---- -## Clustering algorithm must be a function that takes as arguments 'pid' and 'idx' -MapperRef$set("active", "clustering_algorithm", +## Clustering algorithm must be a function that takes as arguments 'pid' and 'idx' +MapperRef$set("active", "clustering_algorithm", function(value){ if (missing(value)){ private$.clustering_algorithm } else { @@ -157,9 +157,9 @@ MapperRef$set("active", "data", function(value){ else { if (is.matrix(value)){ X_acc <- function(data_matrix){ - function(idx=NULL){ + function(idx=NULL){ if (missing(idx)){ return(data_matrix) } - return(data_matrix[idx,,drop=FALSE]) + return(data_matrix[idx,,drop=FALSE]) } } private$.X <- X_acc(value) @@ -170,19 +170,19 @@ MapperRef$set("active", "data", function(value){ } } }) - + ## X ---- ## The data should be held fixed -MapperRef$set("active", "X", +MapperRef$set("active", "X", function(value){ if (missing(value)){ return(private$.X) } else { if (is.matrix(value)){ X_acc <- function(data_matrix){ - function(idx=NULL){ + function(idx=NULL){ if (missing(idx)){ return(data_matrix) } - return(data_matrix[idx,,drop=FALSE]) + return(data_matrix[idx,,drop=FALSE]) } } private$.X <- X_acc(value) @@ -198,16 +198,16 @@ MapperRef$set("active", "X", ## filter ---- ## The filter function associated with the Mapper instance -MapperRef$set("active", "filter", +MapperRef$set("active", "filter", function(value){ if (missing(value)){ return(private$.filter) } else { # browser() if (is.matrix(value)){ filter_acc <- function(filter_matrix){ - function(idx=NULL){ + function(idx=NULL){ if (missing(idx) || is.null(idx)){ return(filter_matrix) } - return(filter_matrix[idx,,drop=FALSE]) + return(filter_matrix[idx,,drop=FALSE]) } } private$.filter <- filter_acc(value) @@ -236,7 +236,7 @@ MapperRef$set("active", "measure", ) ## initialize ---- -## Initialization method relies on active binding +## Initialization method relies on active binding MapperRef$set("public", "initialize", function(X = NULL){ private$.simplicial_complex <- simplextree::simplex_tree() self$use_distance_measure(measure = "euclidean") @@ -249,19 +249,19 @@ MapperRef$set("public", "initialize", function(X = NULL){ #' @name use_data #' @title Sets the data #' @description Sets the data matrix to associate with the mapper. -#' @param data either a matrix, a function, or a data set name. See details. +#' @param data either a matrix, a function, or a data set name. See details. #' @param ... additional parameters to pass to the filter function. -#' @details This function sets the data for the Mapper to work on. If \code{data} is a string, -#' it must be one of the (illustrative) data sets listed in \code{data(package="Mapper")}. -#' Otherwise, \code{data} must be either a matrix of coordinate values, a function that returns a matrix of +#' @details This function sets the data for the Mapper to work on. If \code{data} is a string, +#' it must be one of the (illustrative) data sets listed in \code{data(package="Mapper")}. +#' Otherwise, \code{data} must be either a matrix of coordinate values, a function that returns a matrix of #' coordinate values. MapperRef$set("public", "use_data", function(X){ if (is.character(X)){ stopifnot(X %in% c("noisy_circle", "wvs_us_wave6")) - self$X <- local({ + self$X <- local({ load(system.file(sprintf("data/%s.Rdata", X), package="Mapper")) X <- eval(parse(text=X)) - return(scale(as.matrix(X))) + return(scale(as.matrix(X))) }) } else { self$X <- X } return(invisible(self)) @@ -271,10 +271,10 @@ MapperRef$set("public", "use_data", function(X){ #' @name use_filter #' @title Sets the filter #' @description Sets the map, or \emph{filter}, to associate with the instance -#' @param filter either the filter name to use, a matrix, or a function. See details. +#' @param filter either the filter name to use, a matrix, or a function. See details. #' @param ... additional parameters to pass to the filter function. -#' @details \code{filter} must be either a matrix of coordinate values, a function that returns a matrix of -#' coordinate values, or one of the following predefined filters: +#' @details \code{filter} must be either a matrix of coordinate values, a function that returns a matrix of +#' coordinate values, or one of the following predefined filters: #' \itemize{ #' \item{\strong{PC}}{ Principle components (with \code{\link[stats:prcomp]{prcomp}})} #' \item{\strong{IC}}{ Independent components (with \code{\link[fastICA:fastICA]{fastICA}})} @@ -287,15 +287,15 @@ MapperRef$set("public", "use_data", function(X){ #' \item{\strong{UMAP}}{ Uniform Manifold Approximation and Projection (with \code{\link[umap:umap]{umap}})} #' } #' Nearly all the pre-configured filters essentially call functions in other packages with -#' somewhat reasonable default parameters to perform the mapping. Any parameters supplied to \code{...} -#' are passed to the corresponding package function linked above, which override any default parameters. -#' If the package needed to compute the filter is not installed, a prompt is given asking the user +#' somewhat reasonable default parameters to perform the mapping. Any parameters supplied to \code{...} +#' are passed to the corresponding package function linked above, which override any default parameters. +#' If the package needed to compute the filter is not installed, a prompt is given asking the user #' whether they would like to install it. \cr #' \cr -#' \strong{NOTE:} The predefined filters are meant to be used for exploratory or illustrative purposes only---this function +#' \strong{NOTE:} The predefined filters are meant to be used for exploratory or illustrative purposes only---this function #' is \emph{not} meant to act as a comprensive interface to the functions each filter corresponds too. MapperRef$set("public", "use_filter", function(filter=c("PC", "IC", "ECC", "KDE", "DTM", "MDS", "ISOMAP", "LE", "UMAP"), ...){ - if (is.function(filter)){ private$.filter <- filter } + if (is.function(filter)){ private$.filter <- filter } else if (is.matrix(filter)){ self$filter <- filter } else if (is.numeric(filter) && is.vector(filter)){ @@ -323,13 +323,13 @@ MapperRef$set("public", "use_filter", function(filter=c("PC", "IC", "ECC", "KDE" } return(params) } - + ## Basic filters if (filter == "PC"){ default_params <- list(x = self$X(), scale. = TRUE, center = TRUE, rank. = 2L) params <- modifyList(default_params, given_params) res <- do.call(stats::prcomp, params) - self$filter <- matrix(res$x, ncol = params[["rank."]]) + self$filter <- matrix(res$x, ncol = params[["rank."]]) # c("mapper_filter", "function") } else if (filter == "IC"){ @@ -338,15 +338,15 @@ MapperRef$set("public", "use_filter", function(filter=c("PC", "IC", "ECC", "KDE" params <- modifyList(default_params, given_params) res <- do.call(fastICA::fastICA, params) self$filter <- matrix(res$S, ncol = params[["n.comp"]]) ## S stores independent components - } + } else if (filter == "ECC"){ ## eccentricity has_pd <- requireNamespace("parallelDist", quietly = TRUE) dist_f <- ifelse(has_pd, parallelDist::parallelDist, stats::dist) params <- modifyList(list(p=1), given_params) p_str <- c("manhattan", "euclidean", "maximum")[match(params$p, list(1, 2, Inf))] - if (params$p != Inf) { + if (params$p != Inf) { self$filter <- matrix(colMeans(as.matrix(dist_f(self$X())^(1/params$p))), ncol = 1) - } else { self$filter <- matrix(apply(as.matrix(dist_f(self$X())), 2, max), ncol = 1) } + } else { self$filter <- matrix(apply(as.matrix(dist_f(self$X())), 2, max), ncol = 1) } } else if (filter == "KDE"){ require_but_ask("ks") @@ -403,39 +403,39 @@ MapperRef$set("public", "use_filter", function(filter=c("PC", "IC", "ECC", "KDE" ## use_distance_measure ---- #' @name use_distance_measure #' @title Assign a distance measure -#' @description Assigns a distance measure to the \code{\link{MapperRef}} instance to use in the clustering algorithm. +#' @description Assigns a distance measure to the \code{\link{MapperRef}} instance to use in the clustering algorithm. #' @section Usage: #' \preformatted{ $use_distance_measure(measure, ...) } -#' @section Arguments: +#' @section Arguments: #' \describe{ #' \item{\code{measure}}{The distance measure to use (string).} #' \item{\code{...}}{Extra parameters passed to the distance function. See details. } #' } -#' @section Details: Unless the \code{\link[Mapper:use_clustering_algorithm]{clustering_algorithm}} has been replaced by the user, -#' by default, Mapper requires a notion of distance between objects to be defined in creating the vertices of the construction. -#' +#' @section Details: Unless the \code{\link[Mapper:use_clustering_algorithm]{clustering_algorithm}} has been replaced by the user, +#' by default, Mapper requires a notion of distance between objects to be defined in creating the vertices of the construction. +#' #' The distance function is determined based on the supplied \code{measure}. \code{measure} must be one of: #' \preformatted{["euclidean", "maximum", "manhattan", "canberra", "binary", "minkowski"]} #' or, if the \pkg{proxy} and \code{\link[parallelDist]{parallelDist}} packages are installed, any name in \code{proxy::pr_DB$get_entry_names()}.\cr #' \cr -#' Additional parameters passed via \code{...} are passed to either \code{\link[stats]{dist}} (or -#' \code{\link[parallelDist]{parallelDist}} if installed). +#' Additional parameters passed via \code{...} are passed to either \code{\link[stats]{dist}} (or +#' \code{\link[parallelDist]{parallelDist}} if installed). #' @section Value: -#' The mapper instance, with the measure field assigned. -#' @examples +#' The mapper instance, with the measure field assigned. +#' @examples #' data(noisy_circle) #' m <- MapperRef$new(noisy_circle) #' m$use_filter(noisy_circle[,1]) #' m$use_cover("fixed interval", number_intervals = 5, percent_overlap = 25) -#' +#' #' ## Constructs clusters with euclidean metric (default) #' m$use_distance_measure("euclidean") -#' m$construct_pullback() -#' +#' m$construct_pullback() +#' #' ## Constructs clusters with p-norm (p = 1) #' m$use_distance_measure("Minkowski", p = 1L) -#' m$construct_pullback() -#' +#' m$construct_pullback() +#' #' \dontrun{ #' ## To see list of available measures, use: #' proxy::pr_DB$get_entry_names() @@ -448,26 +448,26 @@ MapperRef$set("public", "use_distance_measure", function(measure, ...){ }) ## use_cover ---- -#' @name use_cover -#' @title Selects one of the available covering methods to use. -#' @description -#' Convenience method to select, parameterize, and construct a cover and associate it with the calling objects \code{cover} field. -#' @param cover Either a pre-configured cover, or name of the cover to use. -#' @param ... Additional parameter values to pass to the covering generators initialize method. -#' @details Every \code{\link{MapperRef}} object requires a \code{\link{CoverRef}} object as -#' is \code{cover} member field. In the context of Mapper, a cover is used to discretize the filter -#' space into a partition, which is then used via a \emph{pullback} operation to construct the vertices. \cr -#' \cr -#' The \code{\link{MapperRef}} class makes no restrictions on the cover that is used; only that it fulfills the -#' requirements of being a valid \code{\link{CoverRef}} instance (e.g. a \code{\link{FixedIntervalCover}}), -#' or one of the cover typenames listed in the \code{\link{covers_available}()}. -#' If a typename is given, the cover is automatically constructed before being assigned to the \code{$cover} field. -#' @examples +#' @name use_cover +#' @title Selects one of the available covering methods to use. +#' @description +#' Convenience method to select, parameterize, and construct a cover and associate it with the calling objects \code{cover} field. +#' @param cover Either a pre-configured cover, or name of the cover to use. +#' @param ... Additional parameter values to pass to the covering generators initialize method. +#' @details Every \code{\link{MapperRef}} object requires a \code{\link{CoverRef}} object as +#' is \code{cover} member field. In the context of Mapper, a cover is used to discretize the filter +#' space into a partition, which is then used via a \emph{pullback} operation to construct the vertices. \cr +#' \cr +#' The \code{\link{MapperRef}} class makes no restrictions on the cover that is used; only that it fulfills the +#' requirements of being a valid \code{\link{CoverRef}} instance (e.g. a \code{\link{FixedIntervalCover}}), +#' or one of the cover typenames listed in the \code{\link{covers_available}()}. +#' If a typename is given, the cover is automatically constructed before being assigned to the \code{$cover} field. +#' @examples #' data(noisy_circle) #' m <- MapperRef$new(noisy_circle) #' m$use_filter(noisy_circle[,1]) #' m$use_cover("fixed interval", number_intervals = 5, percent_overlap = 25) -#' +#' #' ## Alternative way to specify (and construct) the cover #' cover <- FixedIntervalCover$new(number_intervals = 5, percent_overlap = 25) #' cover$construct_cover(filter = m$filter) @@ -475,73 +475,75 @@ MapperRef$set("public", "use_distance_measure", function(measure, ...){ MapperRef$set("public", "use_cover", function(cover="fixed interval", ...){ stopifnot(!is.null(self$filter)) if (missing(cover)){ cover <- "fixed interval"} - self$cover <- switch(cover, - "fixed interval"=FixedIntervalCover$new(...)$construct_cover(self$filter), + self$cover <- switch(cover, + "fixed interval"=FixedIntervalCover$new(...)$construct_cover(self$filter), "restrained interval"=RestrainedIntervalCover$new(...)$construct_cover(self$filter), # "adaptive"=AdaptiveCover$new(...)$construct_cover(self$filter), "ball"=BallCover$new(...)$construct_cover(self$filter), + "ys_ball"=YS_BallCover$new(...)$construct_cover(self$filter), stop(sprintf("Unknown cover type: %s, please specify a cover typename listed in `covers_available()`", cover)) ) + print("leaving use cover") invisible(self) }) -#@param filter_values (n x d) numeric matrix of values giving the results of the map. +#@param filter_values (n x d) numeric matrix of values giving the results of the map. ## use_clustering_algorithm ---- -#' @name use_clustering_algorithm -#' @title Sets the clustering algorithm +#' @name use_clustering_algorithm +#' @title Sets the clustering algorithm #' @description Sets the clustering algorithm used to construct the connected components in the pullback cover. #' @param cl Either one of the link criteria used in the \code{\link[stats]{hclust}} function (string) or a function. See details. #' @param cutoff_method Type of heuristic to determine cut value. See details. Ignored is \code{cl} is a function. -#' @param ... Additional parameters passed as defaults to the cutoff method. See details. -#' @details If \code{cl} is a linkage criterion, a standard hierarchical clustering algorithm is used with suitable default parameters. -#' If it is a function, it must have a signature compatible with \code{function(pid, idx, self, ...)} where -#' \code{pid} is the index of the (pullback) set to cluster (see \code{CoverRef}), +#' @param ... Additional parameters passed as defaults to the cutoff method. See details. +#' @details If \code{cl} is a linkage criterion, a standard hierarchical clustering algorithm is used with suitable default parameters. +#' If it is a function, it must have a signature compatible with \code{function(pid, idx, self, ...)} where +#' \code{pid} is the index of the (pullback) set to cluster (see \code{CoverRef}), #' \code{idx} are the indices of the points in \code{X} to cluster on, and -#' \code{self} is the \code{MapperRef} instance environment. +#' \code{self} is the \code{MapperRef} instance environment. #' \cr #' The cutoff method may be one of either c("continuous", "histogram"). See \code{\link{cutoff_first_bin}} and \code{\link{cutoff_first_threshold}} -#' for what they each correspond to, respectively. Additional named parameters passed via \code{...} act as defaults to the cutting method chosen. -#' If not chosen, reasonable defaults are used. \cr +#' for what they each correspond to, respectively. Additional named parameters passed via \code{...} act as defaults to the cutting method chosen. +#' If not chosen, reasonable defaults are used. \cr #' \cr -#' Additional named parameters passed via the dots in \code{$clustering_algorithm} -#' will taken precedence and override all previously set default settings. -MapperRef$set("public", "use_clustering_algorithm", - function(cl = c("single", "ward.D", "ward.D2", "complete", "average", "mcquitty", "median", "centroid"), +#' Additional named parameters passed via the dots in \code{$clustering_algorithm} +#' will taken precedence and override all previously set default settings. +MapperRef$set("public", "use_clustering_algorithm", + function(cl = c("single", "ward.D", "ward.D2", "complete", "average", "mcquitty", "median", "centroid"), cutoff_method = c("continuous", "histogram"), ...){ ## Use a linkage criterion + cutting rule default_cutting_params <- list(...) if (missing(cl)) { cl <- "single" } - if (missing(cutoff_method)){ cutoff_method <- "continuous" } + if (missing(cutoff_method)){ cutoff_method <- "continuous" } if (class(cl) == "character"){ hclust_opts <- c("single", "ward.D", "ward.D2", "complete", "average", "mcquitty", "median", "centroid") if (!cl %in% hclust_opts){ stop(sprint("Unknown linkage method passed. Please use one of (%s). See ?hclust for details.", paste0(hclust_opts, collapse = ", "))) } stopifnot(is.character(self$measure)) - ## Closured representation to substitute default parameters. + ## Closured representation to substitute default parameters. create_cl <- function(cl, cutoff_method, cut_defaults){ cutoff_f <- switch(cutoff_method, "histogram"=cutoff_first_bin, "continuous"=cutoff_first_threshold) function(pid, idx, self, ...){ if (is.null(idx) || length(idx) == 0){ return(integer(0L)) } if (length(idx) <= 2L){ return(rep(1L, length(idx))); } override_params <- list(...) - + ## Use parallelDist package if available has_pd <- requireNamespace("parallelDist", quietly = TRUE) dist_f <- ifelse(has_pd, parallelDist::parallelDist, stats::dist) dist_params <- list(x=self$X(idx), method=tolower(self$measure)) if (!is.null(private$.measure_opt)){ dist_params <- append(dist_params, private$.measure_opt) } dist_x <- do.call(dist_f, dist_params) - - ## Use fastcluster if available + + ## Use fastcluster if available has_fc <- requireNamespace("fastcluster", quietly = TRUE) cl_f <- ifelse(has_fc, fastcluster::hclust, stats::hclust) hcl <- do.call(cl_f, list(d=dist_x, method=cl)) - - cutoff_params <- if (cutoff_method == "histogram"){ - list(hcl = hcl, num_bins = 10L) + + cutoff_params <- if (cutoff_method == "histogram"){ + list(hcl = hcl, num_bins = 10L) } else { list(hcl = hcl, threshold = 0.0) } cutoff_params <- modifyList(modifyList(cutoff_params, cut_defaults), override_params) - + ## Use heuristic cutting value to produce the partitioning eps <- do.call(cutoff_f, cutoff_params) return(cutree(hcl, h = eps)) @@ -556,18 +558,18 @@ MapperRef$set("public", "use_clustering_algorithm", ) ## construct_pullback ---- -#' @name construct_pullback -#' @title Constructs the (decomposed) pullback cover. -#' @description Executes the clustering algorithm on subsets of \code{X}, +#' @name construct_pullback +#' @title Constructs the (decomposed) pullback cover. +#' @description Executes the clustering algorithm on subsets of \code{X}, #' @param pullback_ids indices of the \code{\link[Mapper:CoverRef]{covers}} \code{index_set}, or \code{NULL}. #' @param ... additional parameters to pass to the \code{clustering_algorithm}. -#' @details This methods uses the function given by \code{clustering_algorithm} field to -#' decompose the preimages returned by the \code{cover} member into connected components, which are -#' stored as \code{vertices}. Indices may be passed to limit which sets are decomposed, otherwise +#' @details This methods uses the function given by \code{clustering_algorithm} field to +#' decompose the preimages returned by the \code{cover} member into connected components, which are +#' stored as \code{vertices}. Indices may be passed to limit which sets are decomposed, otherwise #' the sets in the cover all considered. #' \cr -#' Note that this method removes the \code{vertices} associated with \code{pullback_ids}, but does not -#' modify the simplicial complex. +#' Note that this method removes the \code{vertices} associated with \code{pullback_ids}, but does not +#' modify the simplicial complex. #' @seealso \code{\link{construct_k_skeleton}} \code{\link{construct_nerve}} \code{\link{use_clustering_algorithm}} MapperRef$set("public", "construct_pullback", function(pullback_ids=NULL, ...){ stopifnot(!is.null(self$cover$level_sets)) @@ -575,7 +577,7 @@ MapperRef$set("public", "construct_pullback", function(pullback_ids=NULL, ...){ pids_supplied <- (!missing(pullback_ids) && !is.null(pullback_ids)) if (pids_supplied){ stopifnot(all(pullback_ids %in% self$cover$index_set)) } pullback_ids <- if (pids_supplied){ pullback_ids } else { self$cover$index_set } - + print("constructing pullback") ## If specific pullback ids not given, then resist the pullback mapping if (!pids_supplied || length(private$.pullback) == 0){ n_sets <- length(self$cover$index_set) @@ -583,14 +585,16 @@ MapperRef$set("public", "construct_pullback", function(pullback_ids=NULL, ...){ self$vertices <- list() private$.pullback <- structure(replicate(n_sets, integer(0)), names = self$cover$index_set) } - - ## Update the vertices for the given level sets. This requires updating both the simplex tree's - ## internal representation as well as the outward-facing vertex list. The new vertices are returned - ## to replace the current list. The pullback map is also updated. + print("after if") + ## Update the vertices for the given level sets. This requires updating both the simplex tree's + ## internal representation as well as the outward-facing vertex list. The new vertices are returned + ## to replace the current list. The pullback map is also updated. calc_preimage <- function(){ + print("begin preimage") function(index){ self$cover$construct_cover(self$filter, index) } } partial_cluster <- function(...){ + print("begin cluster") extra <- list(...) function(pid, idx){ do.call(self$clustering_algorithm, append(list(pid, idx, self), extra)) } } @@ -603,29 +607,29 @@ MapperRef$set("public", "construct_pullback", function(pullback_ids=NULL, ...){ # private$.simplicial_complex$insert(as.list(ids)) # return(ids) # } - + print("decomposing") ## Do the decomposition private$.vertices <- Mapper:::decompose_preimages( pullback_ids = as.character(pullback_ids), - cluster_f = partial_cluster(...), - level_set_f = calc_preimage(), + cluster_f = partial_cluster(...), + level_set_f = calc_preimage(), vertices = private$.vertices, # id_generator = id_gen, pullback = private$.pullback ) - + print("made pullback") ## Return self invisible(self) }) ## construct_nerve ---- -#' @name construct_nerve -#' @title Compute the nerve of the cover. -#' @description Computes (or updates) the k-simplices composing the Mapper, where k >= 0. -#' @param k The order of the simplices to construct. See details. +#' @name construct_nerve +#' @title Compute the nerve of the cover. +#' @description Computes (or updates) the k-simplices composing the Mapper, where k >= 0. +#' @param k The order of the simplices to construct. See details. #' @param indices (n x k) matrix of indices of the covers index set to update. #' @param min_weight minimum intersection size to consider as a simplex. Defaults to 1. -#' @details Compared to \code{construct_k_skeleton}, this method \emph{only} intersections +#' @details Compared to \code{construct_k_skeleton}, this method \emph{only} intersections #' between (k-1) simplices in the complex. MapperRef$set("public", "construct_nerve", function(k, indices = NULL, min_weight=1L, modify=TRUE){ stopifnot(length(private$.vertices) > 0) @@ -639,49 +643,49 @@ MapperRef$set("public", "construct_nerve", function(k, indices = NULL, min_weigh ## If k==0 is specified, just build the vertices stree_ptr <- private$.simplicial_complex$as_XPtr() - if (k == 0){ - if (!modify){ return(as.integer(names(self$vertices))) } + if (k == 0){ + if (!modify){ return(as.integer(names(self$vertices))) } build_0_skeleton(as.integer(names(self$vertices)), stree_ptr) return(invisible(self)) } - - ## Retrieve the valid level set index pairs to compare. In the worst case, with no cover-specific optimization, + + ## Retrieve the valid level set index pairs to compare. In the worst case, with no cover-specific optimization, ## this may just be all pairwise combinations of LSFI's for the full simplicial complex. indices <- if (idx_specified){ indices } else { self$cover$neighborhood(self$filter, k) } - + ## If no indices to compute, nothing to do for k > 1. return self invisibly. if (is.null(indices)){ return(invisible(self)) } - + ## Build the parameter list params <- list(pullback_ids = indices, vertices = self$vertices, pullback = private$.pullback, stree = stree_ptr, modify = modify) - if (k == 1){ - params <- modifyList(params, list(min_sz = min_weight)) - if (!params$modify){ return(do.call(rbind, do.call(build_1_skeleton, params)) ) } + if (k == 1){ + params <- modifyList(params, list(min_sz = min_weight)) + if (!params$modify){ return(do.call(rbind, do.call(build_1_skeleton, params)) ) } do.call(build_1_skeleton, params) return(invisible(self)) - } else { - params <- modifyList(params, list(k = k)) - if (!params$modify){ return(do.call(rbind, do.call(build_k_skeleton, params)) ) } + } else { + params <- modifyList(params, list(k = k)) + if (!params$modify){ return(do.call(rbind, do.call(build_k_skeleton, params)) ) } do.call(build_k_skeleton, params) return(invisible(self)) - } + } }) ## construct_k_skeleton ---- -#' @name construct_k_skeleton +#' @name construct_k_skeleton #' @title Constructs the k-skeleton #' @param k the maximal dimension to consider. #' @description Computes the k-skeleton of the mapper by computing the nerve of the pullback of \code{cover} member. -#' @details -#' The primary output of the Mapper method is a simplicial complex. With \code{\link{MapperRef}} objects, -#' the simplicial complex is stored as a \code{\link[simplextree]{simplextree}}. The underlying complex does not need to be modified by the user, i.e. is completely maintained by \code{\link{MapperRef}} methods -#' (e.g. this method, \code{\link{construct_nerve}}, etc.). \cr -#' \cr -#' This function computes the k-skeleton inductively, e.g. by first computing the vertices, -#' then the edges, etc. up to the dimension specified. A check is performed to ensure the pullback has been decomposed, and +#' @details +#' The primary output of the Mapper method is a simplicial complex. With \code{\link{MapperRef}} objects, +#' the simplicial complex is stored as a \code{\link[simplextree]{simplextree}}. The underlying complex does not need to be modified by the user, i.e. is completely maintained by \code{\link{MapperRef}} methods +#' (e.g. this method, \code{\link{construct_nerve}}, etc.). \cr +#' \cr +#' This function computes the k-skeleton inductively, e.g. by first computing the vertices, +#' then the edges, etc. up to the dimension specified. A check is performed to ensure the pullback has been decomposed, and #' if not, then \code{\link{construct_pullback}} is called. \cr #' \cr -#' For an algorithmic description of this process, see Singh et. al, section 3.2. +#' For an algorithmic description of this process, see Singh et. al, section 3.2. #' @references Gurjeet Singh, Facundo Mémoli, and Gunnar Carlsson. "Topological methods for the analysis of high dimensional data sets and 3d object recognition." SPBG. 2007. MapperRef$set("public", "construct_k_skeleton", function(k=1L){ stopifnot(k >= 0) @@ -705,19 +709,19 @@ MapperRef$set("public", "format", function(...){ }) ## as_igraph ---- -#' @name as_igraph +#' @name as_igraph #' @title Exports Mapper as an igraph object. #' @description Exports the 1-skeleton to a graph using the igraph library. -#' @param vertex_scale scaling function for the vertex sizes. -#' @param vertex_min minimum vertex size. -#' @param vertex_min maximum vertex size. -#' @param col_pal color palette to color the vertices by. -#' @details This method converts the 1-skeleton of the Mapper to an igraph object, and assigns some -#' default visual properties. Namely, the vertex attributes "color", "size", and "label" and the -#' edge attribute "color" are assigned. -#' The vertex colors are colored according on the given color palette (default is rainbow) according -#' to their mean filter value (see \code{\link{bin_color}}). The vertex sizes are scaled according -#' to the number of points they contain, scaled by \code{vertex_scale}, and bounded between +#' @param vertex_scale scaling function for the vertex sizes. +#' @param vertex_min minimum vertex size. +#' @param vertex_min maximum vertex size. +#' @param col_pal color palette to color the vertices by. +#' @details This method converts the 1-skeleton of the Mapper to an igraph object, and assigns some +#' default visual properties. Namely, the vertex attributes "color", "size", and "label" and the +#' edge attribute "color" are assigned. +#' The vertex colors are colored according on the given color palette (default is rainbow) according +#' to their mean filter value (see \code{\link{bin_color}}). The vertex sizes are scaled according +#' to the number of points they contain, scaled by \code{vertex_scale}, and bounded between #' (\code{vertex_min}, \code{vertex_max}). The vertex labels are in the format ":".\cr #' \cr #' The edges are colored similarly by the average filter value of the points intersecting @@ -728,15 +732,15 @@ MapperRef$set("public", "as_igraph", function(vertex_scale=c("linear", "log"), v am <- private$.simplicial_complex$as_adjacency_matrix() colnames(am) <- as.character(private$.simplicial_complex$vertices) G <- igraph::graph_from_adjacency_matrix(am, mode = "undirected", add.colnames = NULL) ## NULL makes named vertices - + ## Coloring + aggregation functions - agg_val <- function(lst) { sapply(sapply(lst, function(idx){ rowMeans(self$filter(idx)) }), mean) } - + agg_val <- function(lst) { sapply(sapply(lst, function(idx){ rowMeans(self$filter(idx)) }), mean) } + ## Aggregate node filter values v_idx <- match(private$.simplicial_complex$vertices, as.integer(names(self$vertices))) agg_node_val <- agg_val(private$.vertices) igraph::vertex_attr(G, name = "color") <- bin_color(agg_node_val[v_idx], col_pal = col_pal) - + ## Extract indices in the edges edges <- igraph::as_edgelist(G) edge_idx <- lapply(seq(nrow(edges)), function(i){ @@ -745,9 +749,9 @@ MapperRef$set("public", "as_igraph", function(vertex_scale=c("linear", "log"), v }) agg_edge_val <- agg_val(edge_idx) igraph::edge_attr(G, name = "color") <- bin_color(agg_edge_val, col_pal = col_pal) - + ## Normalize between 0-1, unless all the same - normalize <- function(x) { + normalize <- function(x) { if (all(x == x[1])){ return(rep(1, length(x))) } else { (x - min(x))/(max(x) - min(x)) } } @@ -784,75 +788,75 @@ MapperRef$set("public", "as_igraph", function(vertex_scale=c("linear", "log"), v # top_vids <- names(m$vertices)[order(v_len, decreasing = TRUE)][1:v_idx] # avg_v_f <- sapply(vids, function(vid) { mean(rowMeans(m$filter(m$vertices[[vid]]))) }) # row_col <- bin_color(avg_v_f)[match(top_vids, vids)] -# # list("matrix_rows, colors = c(Boston = "green", NYC = "navy", LA = "purple"), +# # list("matrix_rows, colors = c(Boston = "green", NYC = "navy", LA = "purple"), # # alpha = 0.5))) # vids_int <- as.integer(top_vids) -# -# +# +# # ## Color intersections by f # res_f <- m$simplicial_complex$ltraverse(empty_face, function(simplex){ # if (all(simplex %in% vids_int)){ -# list(simplex = simplex, f_val = mean(avg_v_f[match(simplex, vids)])) +# list(simplex = simplex, f_val = mean(avg_v_f[match(simplex, vids)])) # } -# }, type = "dfs") +# }, type = "dfs") # res_f <- Filter(function(x) { !is.null(x) }, res_f) -# params <- lapply(res_f, function(el){ -# list(query = intersects, +# params <- lapply(res_f, function(el){ +# list(query = intersects, # params = as.list(paste0("v", as.character(el$simplex))), -# color = bin_color(avg_v_f)[match(el$simplex, vids)], +# color = bin_color(avg_v_f)[match(el$simplex, vids)], # active = FALSE # ) # }) # sapply(params, function(p) unlist(p[[2]])) -# +# # # queries = list(list(query = intersects, params = list("Drama"), color = "red", active = F) # us_df <- as.data.frame(x_groups) # meta_data <- data.frame(sets=as.factor(paste0("v", top_vids))) # meta_data$cat_id <- as.character(paste0("v", top_vids)) # id_color_map <- structure(row_col, names=as.character(meta_data$cat_id)) -# UpSetR::upset(data = us_df, nsets = v_idx, -# sets.x.label = "Node size", -# scale.intersections = "identity", +# UpSetR::upset(data = us_df, nsets = v_idx, +# sets.x.label = "Node size", +# scale.intersections = "identity", # set.metadata = list( # data=meta_data, # plots=list(list(type="matrix_rows", column="cat_id", colors=id_color_map, alpha=0.5)) # ) # ) -# -# # queries = wut, +# +# # queries = wut, # # queries = list(list(query = intersects, params = list("v43"), color = "red", active = F)), -# #queries = list(list(query = intersects, params = list("v43", "v37"), color = "red", active = F)), +# #queries = list(list(query = intersects, params = list("v43", "v37"), color = "red", active = F)), # mb.ratio = c(0.35, 0.65)) # sets.bar.color = row_col -# -# -# -# +# +# +# +# # }) # MapperRef$set("public", "as_grapher", function(construct_widget=TRUE, ...){ # requireNamespace("grapher", quietly = TRUE) -# -# ## Make the igraph +# +# ## Make the igraph # am <- private$.simplicial_complex$as_adjacency_matrix() -# G <- igraph::graph_from_adjacency_matrix(am, mode = "undirected", add.colnames = NA) +# G <- igraph::graph_from_adjacency_matrix(am, mode = "undirected", add.colnames = NA) # json_config <- grapher::getDefaultJsonConfig(network=G) -# +# # ## Color nodes and edges by a default rainbow palette # rbw_pal <- rev(rainbow(100, start = 0, end = 4/6)) -# agg_node_val <- sapply(sapply(private$.vertices, function(v_idx){ +# agg_node_val <- sapply(sapply(private$.vertices, function(v_idx){ # apply(as.matrix(self$cover$filter_values[v_idx,]), 1, mean) # }), mean) # binned_idx <- cut(agg_node_val, breaks = 100, labels = F) # vertex_colors <- rbw_pal[binned_idx] -# vertex_sizes <- sapply(private$.vertices, length) -# +# vertex_sizes <- sapply(private$.vertices, length) +# # ## Normalize between 0-1, unless all the same -# normalize <- function(x) { +# normalize <- function(x) { # if (all(x == x[1])){ return(rep(1, length(x))) } # else { (x - min(x))/(max(x) - min(x)) } # } -# +# # ## Create the vertices. By default, color them on a rainbow palette according to their mean filter value. # if (length(igraph::V(G)) > 0){ # vertex_radii <- (7L - 2L)*normalize(vertex_sizes) + 2L @@ -865,7 +869,7 @@ MapperRef$set("public", "as_igraph", function(vertex_scale=c("linear", "log"), v # } else { # json_config$nodes <- integer(length = 0L) # } -# +# # ## Create the edges w/ a similar coloring scheme. # if (length(igraph::E(G)) > 0){ # el <- igraph::as_edgelist(G, names = FALSE) @@ -873,14 +877,14 @@ MapperRef$set("public", "as_igraph", function(vertex_scale=c("linear", "log"), v # edge_links <- matrix(apply(el, 2, as.integer) - 1L, ncol = 2) # json_config$links$from <- edge_links[,1] # json_config$links$to <- edge_links[,2] -# json_config$links$color <- substr(rbw_pal[edge_binned_idx], start = 0, stop = 7) +# json_config$links$color <- substr(rbw_pal[edge_binned_idx], start = 0, stop = 7) # } else { # json_config$links <- integer(length = 0L) # } -# -# ## Return the grapher instance or just the json config if requested +# +# ## Return the grapher instance or just the json config if requested # if (construct_widget){ -# grapher::grapher(json_config) +# grapher::grapher(json_config) # } else { # return(json_config) # } @@ -889,22 +893,22 @@ MapperRef$set("public", "as_igraph", function(vertex_scale=c("linear", "log"), v ## as_pixiplex ---- #' @name as_pixiplex #' @title Exports the complex as a pixiplex object. -#' @description Uses the pixiplex library. +#' @description Uses the pixiplex library. MapperRef$set("public", "as_pixiplex", function(...){ requireNamespace("pixiplex", quietly = TRUE) pp <- pixiplex::pixiplex(private$.simplicial_complex) - + ## Default styling rbw_pal <- rev(rainbow(100, start = 0, end = 4/6)) - agg_node_val <- sapply(sapply(private$.vertices, function(v_idx){ + agg_node_val <- sapply(sapply(private$.vertices, function(v_idx){ rowMeans(self$filter(v_idx)) }), mean) binned_idx <- cut(agg_node_val, breaks = 100, labels = F) vertex_colors <- rbw_pal[binned_idx] - vertex_sizes <- sapply(private$.vertices, length) - + vertex_sizes <- sapply(private$.vertices, length) + ## Normalize between 0-1, unless all the same - normalize <- function(x) { + normalize <- function(x) { if (all(x == x[1])){ return(rep(1, length(x))) } else { (x - min(x))/(max(x) - min(x)) } } @@ -919,20 +923,20 @@ MapperRef$set("public", "as_pixiplex", function(...){ #' @title Exports minimal information about the Mapper #' @description This function exports a few core characteristics about the mapper complex as a list. #' @param graph_type export preference on the structure the graph output. -#' @return List with the following members: +#' @return List with the following members: #' \itemize{ #' \item \emph{vertices} list of the indices of the original data the current vertex intersects with. #' \item \emph{graph} some adjacency representation of the mapper graph. #' \item \emph{pullback} map connecting the index set of the cover and the vertices. #' } -#' @details \code{graph_type} must be one of 'adjacency_matrix', 'adjacency_list', or 'edgelist'. +#' @details \code{graph_type} must be one of 'adjacency_matrix', 'adjacency_list', or 'edgelist'. MapperRef$set("public", "exportMapper", function(graph_type=c("adjacency_matrix", "adjacency_list", "edgelist")){ result <- list(pullback = private$.pullback, vertices = private$.vertices) if (missing(graph_type)){ graph_type <- "adjacency_matrix" } - result$graph <- switch(graph_type, + result$graph <- switch(graph_type, "adjacency_matrix"=self$simplicial_complex$as_adjacency_matrix(), - "adjacency_list"=self$simplicial_complex$as_adjacency_list(), - "edgelist"=self$simplicial_complex$as_edge_list(), + "adjacency_list"=self$simplicial_complex$as_adjacency_list(), + "edgelist"=self$simplicial_complex$as_edge_list(), stop("'graph_type' must be one of: 'adjacency_matrix', 'adjacency_list', 'edgelist'")) n_simplices <- self$simplicial_complex$n_simplices x_dim <- ncol(self$X()) @@ -944,4 +948,3 @@ MapperRef$set("public", "exportMapper", function(graph_type=c("adjacency_matrix" class(result) <- "Mapper" return(result) }) - diff --git a/src/landmarks.cpp b/src/landmarks.cpp index 19e1588..b414b17 100644 --- a/src/landmarks.cpp +++ b/src/landmarks.cpp @@ -12,7 +12,7 @@ inline double sq_dist(const NumericVector& x, const NumericVector& y){ } // Eccentricity -// Computes a distance measure from the barycenter of the data, or from a given center, if provided. +// Computes a distance measure from the barycenter of the data, or from a given center, if provided. // [[Rcpp::export]] NumericVector eccentricity(const NumericMatrix& from, const NumericMatrix& x, const int type = 1) { if (from.ncol() != x.ncol()){ stop("Matrices must have identical number of columns."); } @@ -29,25 +29,26 @@ NumericVector eccentricity(const NumericMatrix& from, const NumericMatrix& x, co return(wrap(pt_ecc)); } -// Uses the euclidean maxmin procedure to choose n landmarks. +// Uses the euclidean maxmin procedure to choose n landmarks. // [[Rcpp::export]] IntegerVector landmark_maxmin(const NumericMatrix& x, const int n, const int seed = 0) { const size_t n_pts = x.nrow(); std::vector< double > lm_dist(n_pts, std::numeric_limits::infinity()); IntegerVector landmark_idx = no_init_vector(n); - landmark_idx[seed] = 0; + //landmark_idx[seed] = 0; + landmark_idx[0] = seed; IntegerVector::iterator c_landmark = landmark_idx.begin(); double new_min_dist; std::generate(landmark_idx.begin()+1, landmark_idx.end(), [&](){ size_t i = 0; new_min_dist = std::numeric_limits::infinity(); - + // Replace nearest landmark distances std::replace_if(lm_dist.begin(), lm_dist.end(), [&i, &x, &c_landmark, &new_min_dist](const double c_dist){ new_min_dist = sq_dist(x.row(*c_landmark), x.row(i++)); return(new_min_dist < c_dist); }, new_min_dist); - + // Find the point that maximizes said distances, move to next landmark c_landmark++; auto max_landmark = std::max_element(lm_dist.begin(), lm_dist.end()); From 4332a690b1b1fb8cc0b603a9573152e0fbad672a Mon Sep 17 00:00:00 2001 From: yaraskaf <56359026+yaraskaf@users.noreply.github.com> Date: Sun, 23 Feb 2020 21:45:42 -0500 Subject: [PATCH 02/25] chore: add Mapper.dll to .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 097b3de..7bb888c 100644 --- a/.gitignore +++ b/.gitignore @@ -49,3 +49,4 @@ ignore/ doc Meta *cache +src/Mapper.dll From 05b143f033e5bb4f4bdd356fe335750bc0e4eb19 Mon Sep 17 00:00:00 2001 From: yaraskaf <56359026+yaraskaf@users.noreply.github.com> Date: Sun, 23 Feb 2020 21:52:05 -0500 Subject: [PATCH 03/25] refactor(LandmarkBallCover): rename YS_BallCover to LandmarkBallCover --- R/{YS_BallCover.R => LandmarkBallCover.R} | 12 ++++++------ R/mapperRef.R | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) rename R/{YS_BallCover.R => LandmarkBallCover.R} (95%) diff --git a/R/YS_BallCover.R b/R/LandmarkBallCover.R similarity index 95% rename from R/YS_BallCover.R rename to R/LandmarkBallCover.R index b845746..d4f8fcc 100644 --- a/R/YS_BallCover.R +++ b/R/LandmarkBallCover.R @@ -15,14 +15,14 @@ library(proxy) # TODO: make default spec with default seed 1l -> user can use default 1 or specify other seed # Seed methods: SPEC, RAND, ECC (or specify seed_index, unspecified uses the first index) -YS_BallCover <- R6::R6Class( - classname = "YS_BallCover", +LandmarkBallCover <- R6::R6Class( + classname = "LandmarkBallCover", inherit = CoverRef, public = list(epsilon=NULL, num_sets=NULL, seed_index=1, seed_method="SPEC") ) ## initialize ------ -YS_BallCover$set("public", "initialize", function(...){ +LandmarkBallCover$set("public", "initialize", function(...){ super$initialize(typename="ys_ball") params <- list(...) if ("epsilon" %in% names(params)){ self$epsilon <- params[["epsilon"]] } @@ -32,7 +32,7 @@ YS_BallCover$set("public", "initialize", function(...){ }) ## validate ------ -YS_BallCover$set("public", "validate", function(filter){ +LandmarkBallCover$set("public", "validate", function(filter){ stopifnot(!is.null(self$epsilon) || !is.null(self$num_sets)) writeLines("\n(eps, num_sets, seed_index, seed_method):\n") print(self$epsilon) @@ -42,7 +42,7 @@ YS_BallCover$set("public", "validate", function(filter){ }) ## format ---- -YS_BallCover$set("public", "format", function(...){ +LandmarkBallCover$set("public", "format", function(...){ titlecase <- function(x){ s <- strsplit(x, " ")[[1]] paste(toupper(substring(s, 1, 1)), substring(s, 2), sep = "", collapse = " ") @@ -56,7 +56,7 @@ YS_BallCover$set("public", "format", function(...){ }) ## construct_cover ------ -YS_BallCover$set("public", "construct_cover", function(filter, index=NULL){ +LandmarkBallCover$set("public", "construct_cover", function(filter, index=NULL){ writeLines("Index:") print(index) print("constructing") diff --git a/R/mapperRef.R b/R/mapperRef.R index a79ebe2..7bae5df 100644 --- a/R/mapperRef.R +++ b/R/mapperRef.R @@ -480,7 +480,7 @@ MapperRef$set("public", "use_cover", function(cover="fixed interval", ...){ "restrained interval"=RestrainedIntervalCover$new(...)$construct_cover(self$filter), # "adaptive"=AdaptiveCover$new(...)$construct_cover(self$filter), "ball"=BallCover$new(...)$construct_cover(self$filter), - "ys_ball"=YS_BallCover$new(...)$construct_cover(self$filter), + "ys_ball"=LandmarkBallCover$new(...)$construct_cover(self$filter), stop(sprintf("Unknown cover type: %s, please specify a cover typename listed in `covers_available()`", cover)) ) print("leaving use cover") From 08397866b6959cf259e3605da14022ffa058ccae Mon Sep 17 00:00:00 2001 From: yaraskaf <56359026+yaraskaf@users.noreply.github.com> Date: Sun, 23 Feb 2020 22:04:15 -0500 Subject: [PATCH 04/25] refactor(LandmarkBallCover): rename typename ys_ball to landmark_ball --- R/LandmarkBallCover.R | 2 +- R/mapperRef.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/LandmarkBallCover.R b/R/LandmarkBallCover.R index d4f8fcc..1d94a69 100644 --- a/R/LandmarkBallCover.R +++ b/R/LandmarkBallCover.R @@ -23,7 +23,7 @@ LandmarkBallCover <- R6::R6Class( ## initialize ------ LandmarkBallCover$set("public", "initialize", function(...){ - super$initialize(typename="ys_ball") + super$initialize(typename="landmark_ball") params <- list(...) if ("epsilon" %in% names(params)){ self$epsilon <- params[["epsilon"]] } if ("num_sets" %in% names(params)){ self$num_sets <- params[["num_sets"]] } diff --git a/R/mapperRef.R b/R/mapperRef.R index 7bae5df..235e849 100644 --- a/R/mapperRef.R +++ b/R/mapperRef.R @@ -480,7 +480,7 @@ MapperRef$set("public", "use_cover", function(cover="fixed interval", ...){ "restrained interval"=RestrainedIntervalCover$new(...)$construct_cover(self$filter), # "adaptive"=AdaptiveCover$new(...)$construct_cover(self$filter), "ball"=BallCover$new(...)$construct_cover(self$filter), - "ys_ball"=LandmarkBallCover$new(...)$construct_cover(self$filter), + "landmark_ball"=LandmarkBallCover$new(...)$construct_cover(self$filter), stop(sprintf("Unknown cover type: %s, please specify a cover typename listed in `covers_available()`", cover)) ) print("leaving use cover") From 5568886d52013a623a2e965a45d75f182a8f5306 Mon Sep 17 00:00:00 2001 From: yaraskaf <56359026+yaraskaf@users.noreply.github.com> Date: Sun, 23 Feb 2020 22:18:39 -0500 Subject: [PATCH 05/25] refactor(LandmarkBallCover): remove print statements --- R/LandmarkBallCover.R | 29 ++--------------------------- R/mapperRef.R | 7 ------- 2 files changed, 2 insertions(+), 34 deletions(-) diff --git a/R/LandmarkBallCover.R b/R/LandmarkBallCover.R index 1d94a69..55d8c38 100644 --- a/R/LandmarkBallCover.R +++ b/R/LandmarkBallCover.R @@ -34,11 +34,6 @@ LandmarkBallCover$set("public", "initialize", function(...){ ## validate ------ LandmarkBallCover$set("public", "validate", function(filter){ stopifnot(!is.null(self$epsilon) || !is.null(self$num_sets)) - writeLines("\n(eps, num_sets, seed_index, seed_method):\n") - print(self$epsilon) - print(self$num_sets) - print(self$seed_index) - print(self$seed_method) }) ## format ---- @@ -57,16 +52,11 @@ LandmarkBallCover$set("public", "format", function(...){ ## construct_cover ------ LandmarkBallCover$set("public", "construct_cover", function(filter, index=NULL){ - writeLines("Index:") - print(index) - print("constructing") if (!requireNamespace("RANN", quietly = TRUE)){ stop("Package \"RANN\" is needed for to use this cover.", call. = FALSE) } - print("validating") self$validate() - print("filtering") ## Get filter values fv <- filter() f_dim <- ncol(fv) @@ -77,24 +67,16 @@ LandmarkBallCover$set("public", "construct_cover", function(filter, index=NULL){ if(is.null(index)){ if(all(self$seed_method == "RAND")) { self$seed_index = sample(1:f_size, 1) - print(self$seed_index) - print("rand") } if(all(self$seed_method == "ECC")) { - #self$seed_index = sample(1:f_size, 1) - print(self$seed_index) - print("ecc") } } if (!is.null(self$num_sets)) { - print("seed") - print(self$seed_index) - print(self$num_sets) eps_lm <- landmarks(x=fv, n=self$num_sets, seed_index=self$seed_index) - print(eps_lm) + ## Get distance from each point to landmarks dist_to_lm <- proxy::dist(fv, fv[eps_lm,,drop=FALSE]) @@ -105,13 +87,9 @@ LandmarkBallCover$set("public", "construct_cover", function(filter, index=NULL){ ## Construct the index set + the preimages self$index_set <- as.character(eps_lm) - print(self$index_set) self$level_sets <- structure(as.list(apply(dist_to_lm, 2, pts_within_eps)), names=self$index_set) - print("done level sets") }else{ if (!is.null(self$epsilon)) { - print("eps") - } } @@ -187,12 +165,9 @@ LandmarkBallCover$set("public", "construct_cover", function(filter, index=NULL){ # ls <- lapply(ls, function(i) Filter(function(x) any(x != 0), i)) # # self$level_sets <- structure(ls, names=self$index_set) - print("test") - print(index) + if (!missing(index)){ - print("hello") return(self$level_sets[[index]]) } - print("done") ## Always return self invisible(self) diff --git a/R/mapperRef.R b/R/mapperRef.R index 235e849..fd9eea2 100644 --- a/R/mapperRef.R +++ b/R/mapperRef.R @@ -483,7 +483,6 @@ MapperRef$set("public", "use_cover", function(cover="fixed interval", ...){ "landmark_ball"=LandmarkBallCover$new(...)$construct_cover(self$filter), stop(sprintf("Unknown cover type: %s, please specify a cover typename listed in `covers_available()`", cover)) ) - print("leaving use cover") invisible(self) }) #@param filter_values (n x d) numeric matrix of values giving the results of the map. @@ -577,7 +576,6 @@ MapperRef$set("public", "construct_pullback", function(pullback_ids=NULL, ...){ pids_supplied <- (!missing(pullback_ids) && !is.null(pullback_ids)) if (pids_supplied){ stopifnot(all(pullback_ids %in% self$cover$index_set)) } pullback_ids <- if (pids_supplied){ pullback_ids } else { self$cover$index_set } - print("constructing pullback") ## If specific pullback ids not given, then resist the pullback mapping if (!pids_supplied || length(private$.pullback) == 0){ n_sets <- length(self$cover$index_set) @@ -585,16 +583,13 @@ MapperRef$set("public", "construct_pullback", function(pullback_ids=NULL, ...){ self$vertices <- list() private$.pullback <- structure(replicate(n_sets, integer(0)), names = self$cover$index_set) } - print("after if") ## Update the vertices for the given level sets. This requires updating both the simplex tree's ## internal representation as well as the outward-facing vertex list. The new vertices are returned ## to replace the current list. The pullback map is also updated. calc_preimage <- function(){ - print("begin preimage") function(index){ self$cover$construct_cover(self$filter, index) } } partial_cluster <- function(...){ - print("begin cluster") extra <- list(...) function(pid, idx){ do.call(self$clustering_algorithm, append(list(pid, idx, self), extra)) } } @@ -607,7 +602,6 @@ MapperRef$set("public", "construct_pullback", function(pullback_ids=NULL, ...){ # private$.simplicial_complex$insert(as.list(ids)) # return(ids) # } - print("decomposing") ## Do the decomposition private$.vertices <- Mapper:::decompose_preimages( pullback_ids = as.character(pullback_ids), @@ -617,7 +611,6 @@ MapperRef$set("public", "construct_pullback", function(pullback_ids=NULL, ...){ # id_generator = id_gen, pullback = private$.pullback ) - print("made pullback") ## Return self invisible(self) }) From 5275dfa98e87aff02145e7e8c15fd5a61d0cad93 Mon Sep 17 00:00:00 2001 From: yaraskaf <56359026+yaraskaf@users.noreply.github.com> Date: Wed, 4 Mar 2020 22:28:50 -0500 Subject: [PATCH 06/25] feat(LandmarkBallCover): add fixed epsilon width balls Addition of ability to specify a radius rather than a number of balls to construct a LandmarkBallCover. Modified LandmarkBallCover.R to support use of epsilon parameter, added new landmark function to landmarks.R to find landmarks by radius rather than number. --- R/LandmarkBallCover.R | 125 ++++++++++++------------------------------ R/landmarks.R | 96 +++++++++++++++++++++++--------- 2 files changed, 105 insertions(+), 116 deletions(-) diff --git a/R/LandmarkBallCover.R b/R/LandmarkBallCover.R index 55d8c38..2bee8d2 100644 --- a/R/LandmarkBallCover.R +++ b/R/LandmarkBallCover.R @@ -5,7 +5,7 @@ #' about each point. Using this class requires the \code{RANN} package to be installed, and thus explicitly assumes #' the filter space endowed with the euclidean metric. #' -#' This differs from the DisjointBallCover in that it does NOT union intersecting cover sets. +#' This differs from the BallCover in that it does NOT union intersecting cover sets. #' #' @field epsilon := radius of the ball to form around each point #' @author Cory Brunsion, Yara Skaf @@ -13,8 +13,8 @@ library(proxy) -# TODO: make default spec with default seed 1l -> user can use default 1 or specify other seed -# Seed methods: SPEC, RAND, ECC (or specify seed_index, unspecified uses the first index) +# Seed methods: SPEC (specify index), RAND (random index), ECC (seed with highest eccentricity data point) +# Default: specified index using first data point (seed_method = "SPEC", seed_index = 1) LandmarkBallCover <- R6::R6Class( classname = "LandmarkBallCover", inherit = CoverRef, @@ -44,9 +44,10 @@ LandmarkBallCover$set("public", "format", function(...){ } if (!is.null(self$num_sets)) { sprintf("%s Cover: (num_sets = %s, seed_index = %s)", titlecase(private$.typename), self$num_sets, self$seed_index) - } - if (!is.null(self$epsilon)) { - sprintf("%s Cover: (epsilon = %.2f, seed_index = %s)", titlecase(private$.typename), self$epsilon, self$seed_index) + }else{ + if (!is.null(self$epsilon)) { + sprintf("%s Cover: (epsilon = %.2f, seed_index = %s)", titlecase(private$.typename), self$epsilon, self$seed_index) + } } }) @@ -59,112 +60,56 @@ LandmarkBallCover$set("public", "construct_cover", function(filter, index=NULL){ ## Get filter values fv <- filter() - f_dim <- ncol(fv) f_size <- nrow(fv) - ## Set the seed index if necessary if(is.null(index)){ if(all(self$seed_method == "RAND")) { self$seed_index = sample(1:f_size, 1) } if(all(self$seed_method == "ECC")) { + # todo } - } - if (!is.null(self$num_sets)) { - eps_lm <- landmarks(x=fv, n=self$num_sets, seed_index=self$seed_index) + eps_lm <- landmarks(x=fv, n=self$num_sets, seed_index=self$seed_index) # compute landmark set ## Get distance from each point to landmarks dist_to_lm <- proxy::dist(fv, fv[eps_lm,,drop=FALSE]) - - orderedIndices = t(apply(dist_to_lm,1, sort)) - max = which.max(orderedIndices[,1]) - self$epsilon = dist_to_lm[max] # ball radius should be distance of the farthest point from the landmark set so that all points are in at least one ball pts_within_eps <- function(lm_dist){ which(lm_dist <= self$epsilon) } - ## Construct the index set + the preimages + ## Construct the index set self$index_set <- as.character(eps_lm) - self$level_sets <- structure(as.list(apply(dist_to_lm, 2, pts_within_eps)), names=self$index_set) - }else{ - if (!is.null(self$epsilon)) { - } - } + ## Construct the preimages + if(length(eps_lm) == 1){ + orderedIndices = apply(dist_to_lm,1, sort) + max = which.max(orderedIndices) + self$epsilon = dist_to_lm[max] # ball radius should be distance of the farthest point from the landmark set so that all points are in at least one ball + self$level_sets <- structure(as.list(list(t(apply(dist_to_lm, 2, pts_within_eps))[1,])), names=self$index_set) + }else{ + orderedIndices = t(apply(dist_to_lm,1, sort)) + max = which.max(orderedIndices[,1]) + self$epsilon = dist_to_lm[max] # ball radius should be distance of the farthest point from the landmark set so that all points are in at least one ball + self$level_sets <- structure(as.list(apply(dist_to_lm, 2, pts_within_eps)), names=self$index_set) + } + }else if (!is.null(self$epsilon)) { + eps_lm <- landmarks(x=fv, eps=self$epsilon, seed_index=self$seed_index) # compute landmark set + dist_to_lm <- proxy::dist(fv, fv[eps_lm,,drop=FALSE]) # Get distance from each point to landmarks + pts_within_eps <- function(lm_dist){ which(lm_dist <= self$epsilon) } + ## Construct the index set + self$index_set <- as.character(eps_lm) - ## Construct the balls - # if spec, elif rand, elif ecc -> set self$seed_index - # then if epsilon elif num_sets using self$seed_index - # if (!is.null(self$seed_method)){ - # if (all(self$seed_method == "RAND")) { - # self$seed_index = sample(1:f_size, 1) - # print(self$seed_index) - # print("rand") - # eps_lm <- landmarks(fv, self$num_sets, seed_index=self$seed_index) - # - # ## Get distance from each point to landmarks - # dist_to_lm <- proxy::dist(fv, fv[eps_lm,,drop=FALSE]) - # pts_within_eps <- function(lm_dist){ which(lm_dist < self$epsilon) } # TODO: does this make sense? we shouldnt be able to sepcify both eps and num_sets - # - # ## Construct the index set + the preimages - # self$index_set <- as.character(eps_lm) - # writeLines("set index set") - # self$level_sets <- structure(as.list(apply(dist_to_lm, 2, pts_within_eps)), names=self$index_set) - # writeLines("set level sets") - # } - # if (all(self$seed_method == "ECC")) { - # print("ecc") - # } - # }else{ - # if (!is.null(self$num_sets)){ - # print("seed") - # print(self$seed_index) - # print(self$num_sets) - # eps_lm <- landmarks(x=fv, n=self$num_sets, seed_index=self$seed_index) - # print(eps_lm) - # ## Get distance from each point to landmarks - # dist_to_lm <- proxy::dist(fv, fv[eps_lm,,drop=FALSE]) - # - # orderedIndices = t(apply(dist_to_lm,1, sort)) - # max = which.max(orderedIndices[,1]) - # self$epsilon = dist_to_lm[max] # ball radius should be distance of the farthest point from the landmark set so that all points are in at least one ball - # pts_within_eps <- function(lm_dist){ which(lm_dist <= self$epsilon) } - # - # ## Construct the index set + the preimages - # self$index_set <- as.character(eps_lm) - # print(self$index_set) - # self$level_sets <- structure(as.list(apply(dist_to_lm, 2, pts_within_eps)), names=self$index_set) - # }else { - # writeLines("else") - # eps_lm <- landmarks(fv, 2L) - # - # ## Get distance from each point to landmarks - # dist_to_lm <- proxy::dist(fv, fv[eps_lm,,drop=FALSE]) - # pts_within_eps <- function(lm_dist){ which(lm_dist < self$epsilon) } - # - # ## Construct the index set + the preimages - # self$index_set <- as.character(eps_lm) - # self$level_sets <- structure(as.list(apply(dist_to_lm, 2, pts_within_eps)), names=self$index_set) - # } - # } - - - # print("making balls") - # ## Construct the balls - # ball_cover <- RANN::nn2(fv, query = fv, searchtype = "radius", radius = self$epsilon) - # - # ## Assign the centers of the balls as the index set - # self$index_set <- as.character(ball_cover[["nn.idx"]][,1]) - # - # ## Calculate level (pullback) sets - # ls <- lapply(seq_len(nrow(ball_cover[["nn.idx"]])), function(i) ball_cover[["nn.idx"]][i,]) - # ls <- lapply(ls, function(i) Filter(function(x) any(x != 0), i)) - # - # self$level_sets <- structure(ls, names=self$index_set) + ## Construct the preimages + if(length(eps_lm) == 1){ + self$level_sets <- structure(as.list(list(t(apply(dist_to_lm, 2, pts_within_eps))[1,])), names=self$index_set) + }else{ + self$level_sets <- structure(as.list(apply(dist_to_lm, 2, pts_within_eps)), names=self$index_set) + } + } if (!missing(index)){ return(self$level_sets[[index]]) } diff --git a/R/landmarks.R b/R/landmarks.R index 4791643..7005b2f 100644 --- a/R/landmarks.R +++ b/R/landmarks.R @@ -1,39 +1,83 @@ #' landmarks #' @description Computes a set of \code{n} landmarks of a point cloud. -#' @details This function uses the greedy maxmin procedure to produce a set of \code{n} evenly spaced landmark points in the data set -#' \code{x}. Maxmin is a simple greedy algorithm that is relatively efficient, but it has a tendency to pick out extremal points. -#' If the distance metric is euclidean, an efficient Rcpp implementation is used. If another metric is requested, -#' the algorithm is performed in R. -#' @param x a data matrix. -#' @param n the number of landmarks requested. +#' @details This function uses the greedy maxmin procedure to produce a set of \code{n} evenly spaced landmark points in the data set +#' \code{x}. Maxmin is a simple greedy algorithm that is relatively efficient, but it has a tendency to pick out extremal points. +#' If the distance metric is euclidean, an efficient Rcpp implementation is used. If another metric is requested, +#' the algorithm is performed in R. +#' @param x a data matrix. +#' @param n the number of landmarks requested. #' @param dist_method the distance metric to use. Any distance measure in the \code{proxy} package is supported. -#' @param seed_index the first landmark to seed the algorithm. +#' @param seed_index the first landmark to seed the algorithm. #' @param shuffle_data whether to first randomly shuffle the data. #' @references De Silva, Vin, and Gunnar E. Carlsson. "Topological estimation using witness complexes." SPBG 4 (2004): 157-166. #' @export -landmarks <- function(x, n, dist_method = "euclidean", seed_index = 1, shuffle_data=FALSE){ +landmarks <- function(x, n=NULL, eps=NULL, dist_method = "euclidean", seed_index = 1, shuffle_data=FALSE){ stopifnot(is.matrix(x)) - stopifnot(seed_index >= 1L && seed_index < nrow(x)) + stopifnot(seed_index >= 1L && seed_index <= nrow(x)) + stopifnot(!is.null(n) || !is.null(eps)) # must specify either a number of balls or a radius + shuffle_idx <- NA - if (shuffle_data){ + if (shuffle_data){ shuffle_idx <- sample(seq(nrow(x))) - x <- x[,,drop=FALSE] + x <- x[,,drop=FALSE] } - if (missing(dist_method) || toupper(dist_method) == "EUCLIDEAN"){ - landmark_idx <- landmark_maxmin(x, n, seed_index-1L) - } else if (requireNamespace("proxy", quietly = TRUE)){ - stopifnot(toupper(dist_method) %in% toupper(proxy::pr_DB$get_entry_names())) - landmark_idx <- vector(mode="integer", n) - landmark_idx[1L] <- seed_index - landmark_min_dist <- rep(Inf, nrow(x)) - for (i in 2L:n){ - landmark_dist <- proxy::dist(x, x[landmark_idx[i-1L],,drop=FALSE], method = dist_method) - landmark_min_dist <- pmin(landmark_dist, landmark_min_dist) - potential_idx <- setdiff(seq(nrow(x)), landmark_idx[c(1:i)]) - landmark_idx[i] <- potential_idx[which.max(landmark_min_dist[potential_idx])] + + if(!is.null(n)){ + if (missing(dist_method) || toupper(dist_method) == "EUCLIDEAN"){ + landmark_idx <- landmark_maxmin(x, n, seed_index-1L) + } else if (requireNamespace("proxy", quietly = TRUE)){ + stopifnot(toupper(dist_method) %in% toupper(proxy::pr_DB$get_entry_names())) + landmark_idx <- vector(mode="integer", n) + landmark_idx[1L] <- seed_index + landmark_min_dist <- rep(Inf, nrow(x)) + for (i in 2L:n){ + landmark_dist <- proxy::dist(x, x[landmark_idx[i-1L],,drop=FALSE], method = dist_method) + landmark_min_dist <- pmin(landmark_dist, landmark_min_dist) + potential_idx <- setdiff(seq(nrow(x)), landmark_idx[c(1:i)]) + landmark_idx[i] <- potential_idx[which.max(landmark_min_dist[potential_idx])] + } + } else { + stop(sprintf("Unsupported distance method passed: %s\n", dist_method)) } - } else { - stop(sprintf("Unsupported distance method passed: %s\n", dist_method)) + } else if(!is.null(eps)){ + # TODO: Currently written for euclidean distance in 1-dimensional lens space + stopifnot(missing(dist_method) || toupper(dist_method) == "EUCLIDEAN") + + # STEP 1: Pick point in the space (seed) and add it to the list of centers/landmarks + C = list(seed_index) + f_C = list(x[seed_index]) + + # STEP 2: Compute distance between landmark set and each point in the space + dists = sapply(f_C, function(c) { + abs(c-x) + }) + max = which.max(dists) + d = dists[max] + + # Continue if distance is greater than epsilon + if(d >= eps){ + C = append(C, max) + f_C = append(f_C, x[max]) + + # STEP 2: Compute distance between landmark set and each point in the space + while(TRUE){ + dists = sapply(f_C, function(c) { + abs(c-x) + }) + + orderedIndices = t(apply(dists,1, sort)) + max = which.max(orderedIndices[,1]) + d = orderedIndices[max,1] + + # Continue until distance is less than epsilon (i.e. stop when all points are contained within an epsilon-ball) + if(d >= eps){ + C = append(C,max) + f_C = append(f_C,x[max]) + } else{ break } + } + } + landmark_idx = unlist(C) } + if (is.na(shuffle_idx)){ landmark_idx } else { shuffle_idx[landmark_idx] } -} \ No newline at end of file +} From 848787f23c6df2dc03e27ebc017667c06f6cbb03 Mon Sep 17 00:00:00 2001 From: yaraskaf <56359026+yaraskaf@users.noreply.github.com> Date: Wed, 4 Mar 2020 22:51:41 -0500 Subject: [PATCH 07/25] feat(LandmarkBallCover): add option to use filtered value of maximum eccentricity as seed for landmark selection Modified construct_cover function in LandmarkBallCover.R to include selection of seed based on maximum eccentricity. --- R/LandmarkBallCover.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/LandmarkBallCover.R b/R/LandmarkBallCover.R index 2bee8d2..72a6ced 100644 --- a/R/LandmarkBallCover.R +++ b/R/LandmarkBallCover.R @@ -68,7 +68,8 @@ LandmarkBallCover$set("public", "construct_cover", function(filter, index=NULL){ self$seed_index = sample(1:f_size, 1) } if(all(self$seed_method == "ECC")) { - # todo + ecc = eccentricity(from=fv, x=fv) + self$seed_index = which.max(ecc) } } From c02ad6222dfd4d851744d93d07b06dde6cd060e0 Mon Sep 17 00:00:00 2001 From: yaraskaf <56359026+yaraskaf@users.noreply.github.com> Date: Thu, 5 Mar 2020 12:47:02 -0500 Subject: [PATCH 08/25] fix(LandmarkBallCover): Fix bug with apply when all balls have same number of points When every ball has the same number of points, apply() returns a matrix rather than a list of lists, causing a crash. Replaced this function with splitting the matrix by column in this case. --- R/LandmarkBallCover.R | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/R/LandmarkBallCover.R b/R/LandmarkBallCover.R index 72a6ced..e7c39b7 100644 --- a/R/LandmarkBallCover.R +++ b/R/LandmarkBallCover.R @@ -93,7 +93,16 @@ LandmarkBallCover$set("public", "construct_cover", function(filter, index=NULL){ orderedIndices = t(apply(dist_to_lm,1, sort)) max = which.max(orderedIndices[,1]) self$epsilon = dist_to_lm[max] # ball radius should be distance of the farthest point from the landmark set so that all points are in at least one ball - self$level_sets <- structure(as.list(apply(dist_to_lm, 2, pts_within_eps)), names=self$index_set) + + x = apply(dist_to_lm, 2, pts_within_eps) + # if all level sets contain the same number of points, apply returns a matrix and gives an error + # -> need to split columns into list elements in this case + if(is.matrix(x)){ + self$level_sets <- structure(split(x, rep(1:ncol(x), each = nrow(x))), names=self$index_set) + }else{ + self$level_sets <- structure(as.list(x), names=self$index_set) + } + } }else if (!is.null(self$epsilon)) { eps_lm <- landmarks(x=fv, eps=self$epsilon, seed_index=self$seed_index) # compute landmark set @@ -108,7 +117,14 @@ LandmarkBallCover$set("public", "construct_cover", function(filter, index=NULL){ if(length(eps_lm) == 1){ self$level_sets <- structure(as.list(list(t(apply(dist_to_lm, 2, pts_within_eps))[1,])), names=self$index_set) }else{ - self$level_sets <- structure(as.list(apply(dist_to_lm, 2, pts_within_eps)), names=self$index_set) + x = apply(dist_to_lm, 2, pts_within_eps) + # if all level sets contain the same number of points, apply returns a matrix and gives an error + # -> need to split columns into list elements in this case + if(is.matrix(x)){ + self$level_sets <- structure(split(x, rep(1:ncol(x), each = nrow(x))), names=self$index_set) + }else{ + self$level_sets <- structure(as.list(x), names=self$index_set) + } } } From 4863777ebd40cee4d548e5a29e6315deaa3ae4fb Mon Sep 17 00:00:00 2001 From: yaraskaf <56359026+yaraskaf@users.noreply.github.com> Date: Thu, 5 Mar 2020 14:39:34 -0500 Subject: [PATCH 09/25] feat(Landmarks): add ability to use any distance metric in pr_DB to compute eps-landmark set Replaced 1D euclidean distance with proxy::dist(), allowing arbitrary dist_method from proxy::pr_DB --- R/landmarks.R | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/R/landmarks.R b/R/landmarks.R index 7005b2f..ecdbddc 100644 --- a/R/landmarks.R +++ b/R/landmarks.R @@ -40,8 +40,7 @@ landmarks <- function(x, n=NULL, eps=NULL, dist_method = "euclidean", seed_index stop(sprintf("Unsupported distance method passed: %s\n", dist_method)) } } else if(!is.null(eps)){ - # TODO: Currently written for euclidean distance in 1-dimensional lens space - stopifnot(missing(dist_method) || toupper(dist_method) == "EUCLIDEAN") + stopifnot(toupper(dist_method) %in% toupper(proxy::pr_DB$get_entry_names())) # STEP 1: Pick point in the space (seed) and add it to the list of centers/landmarks C = list(seed_index) @@ -49,7 +48,7 @@ landmarks <- function(x, n=NULL, eps=NULL, dist_method = "euclidean", seed_index # STEP 2: Compute distance between landmark set and each point in the space dists = sapply(f_C, function(c) { - abs(c-x) + proxy::dist(c, x, method = dist_method) }) max = which.max(dists) d = dists[max] @@ -62,9 +61,8 @@ landmarks <- function(x, n=NULL, eps=NULL, dist_method = "euclidean", seed_index # STEP 2: Compute distance between landmark set and each point in the space while(TRUE){ dists = sapply(f_C, function(c) { - abs(c-x) + proxy::dist(c, x, method = dist_method) }) - orderedIndices = t(apply(dists,1, sort)) max = which.max(orderedIndices[,1]) d = orderedIndices[max,1] From 984f1fde82454f6431710d19bbc9b9b7c6941058 Mon Sep 17 00:00:00 2001 From: yaraskaf <56359026+yaraskaf@users.noreply.github.com> Date: Thu, 5 Mar 2020 15:04:33 -0500 Subject: [PATCH 10/25] feat(LandmarkBallCover): add appropriate validation for cover parameters Updated validate() method in LandmarkBallCover.R to ensure appropriate values of parameters for cover construction. --- R/LandmarkBallCover.R | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/R/LandmarkBallCover.R b/R/LandmarkBallCover.R index e7c39b7..16cccb0 100644 --- a/R/LandmarkBallCover.R +++ b/R/LandmarkBallCover.R @@ -33,7 +33,16 @@ LandmarkBallCover$set("public", "initialize", function(...){ ## validate ------ LandmarkBallCover$set("public", "validate", function(filter){ - stopifnot(!is.null(self$epsilon) || !is.null(self$num_sets)) + ## Get filter values + fv <- filter() + f_size <- nrow(fv) + + ## validate parameters + stopifnot(!is.null(self$epsilon) || !is.null(self$num_sets)) # require either radius or # balls + stopifnot(is.null(self$num_sets) || (self$num_sets <= f_size && self$num_sets > 0)) # cannot have more cover sets than data points + stopifnot(is.null(self$epsilon) || self$epsilon >= 0) # radius must be positive + stopifnot(self$seed_index <= f_size && self$seed_index > 0) # seed index must be within the range of the data indices + stopifnot(all(self$seed_method == "RAND") || all(self$seed_method == "SPEC") || all(self$seed_method == "ECC")) # must use one of available seed methods }) ## format ---- @@ -56,7 +65,7 @@ LandmarkBallCover$set("public", "construct_cover", function(filter, index=NULL){ if (!requireNamespace("RANN", quietly = TRUE)){ stop("Package \"RANN\" is needed for to use this cover.", call. = FALSE) } - self$validate() + self$validate(filter) ## Get filter values fv <- filter() From 84dd145d6a7b8d4d8155edb97726887571961ae5 Mon Sep 17 00:00:00 2001 From: yaraskaf <56359026+yaraskaf@users.noreply.github.com> Date: Mon, 9 Mar 2020 14:45:42 -0400 Subject: [PATCH 11/25] feat(NeighborhoodCover): initial commit Implementation of cover by k-nearest neighbor sets. --- R/CoverRef.R | 67 +++++++++++++++++---------------- R/NeighborhoodCover.R | 87 +++++++++++++++++++++++++++++++++++++++++++ R/landmarks.R | 46 ++++++++++++++++++++++- R/mapperRef.R | 1 + 4 files changed, 166 insertions(+), 35 deletions(-) create mode 100644 R/NeighborhoodCover.R diff --git a/R/CoverRef.R b/R/CoverRef.R index 9e62ec7..416543f 100644 --- a/R/CoverRef.R +++ b/R/CoverRef.R @@ -1,17 +1,17 @@ #' Cover abstract class #' @aliases cover -#' @description Reference Class (R6) implementation of a Cover. This class is meant to act as an abstract class to derive other +#' @description Reference Class (R6) implementation of a Cover. This class is meant to act as an abstract class to derive other #' types of covering generators with. Minimally, a derived covering class must implement the -#' 'construct_cover' method to populate the 'level_sets' list with point indices, and any parameters -#' that the derived class requires. -#' -#' Additional methods may also be added to improve the efficiency of the cover. -#' See the \href{https://peekxc.github.io/Mapper/articles/UsingCustomCover.html}{vignette} on creating a custom -#' cover for details. -#' +#' 'construct_cover' method to populate the 'level_sets' list with point indices, and any parameters +#' that the derived class requires. +#' +#' Additional methods may also be added to improve the efficiency of the cover. +#' See the \href{https://peekxc.github.io/Mapper/articles/UsingCustomCover.html}{vignette} on creating a custom +#' cover for details. +#' #' @section Fields: -#' The following is a list of the fields available for derived classes. Each may be accessed -#' by the \code{self} environment. See \code{?R6} for more details. +#' The following is a list of the fields available for derived classes. Each may be accessed +#' by the \code{self} environment. See \code{?R6} for more details. #' \itemize{ #' \item{\emph{level_sets}:}{ named list, indexed by \code{index_set}, whose values represent indices in the original data set to cluster over.} #' \item{\emph{index_set}:}{ character vector of keys that uniquely index the open sets of the cover.} @@ -20,14 +20,14 @@ #' @format An \code{\link{R6Class}} generator object #' @author Matt Piekenbrock #' @export CoverRef -CoverRef <- R6::R6Class("CoverRef", +CoverRef <- R6::R6Class("CoverRef", private = list( .level_sets = NULL, - .index_set = NULL, + .index_set = NULL, .typename = character(0) - ), + ), lock_class = FALSE, ## Feel free to add your own members - lock_objects = FALSE ## Or change existing ones + lock_objects = FALSE ## Or change existing ones ) ## Cover initialization @@ -43,7 +43,7 @@ CoverRef$set("public", "format", function(...){ ## Typename field ## typename ---- -CoverRef$set("active", "typename", +CoverRef$set("active", "typename", function(value){ if (missing(value)){ private$.typename } else { stop("Cover 'typename' member is read-only.") @@ -53,7 +53,7 @@ CoverRef$set("active", "typename", ## The index set may be composed of any data type, but the collection of indices must uniquely ## index the level sets list via the `[[` operator. ## index_set ---- -CoverRef$set("active", "index_set", +CoverRef$set("active", "index_set", function(value){ if (missing(value)){ private$.index_set @@ -65,10 +65,10 @@ CoverRef$set("active", "index_set", } }) -## The level sets must be a list indexed by the index set. If the list is named, a check is performed to make sure the -## names match the values of the index set, and in the proper order. Otherwise, the order is assumed to be correct. +## The level sets must be a list indexed by the index set. If the list is named, a check is performed to make sure the +## names match the values of the index set, and in the proper order. Otherwise, the order is assumed to be correct. ## level_sets ---- -CoverRef$set("active", "level_sets", +CoverRef$set("active", "level_sets", function(value){ if (missing(value)){ private$.level_sets @@ -80,23 +80,23 @@ CoverRef$set("active", "level_sets", } ) -## Default cover +## Default cover ## construct_cover ---- CoverRef$set("public", "construct_cover", function(index=NULL){ stop("Base class cover construction called. This method must be overridden to be used.") }) -## Given an index in the index set, returns the indices of point intersect the image -## of f in the cover. The default method relies on the construct_cover method. +## Given an index in the index set, returns the indices of point intersect the image +## of f in the cover. The default method relies on the construct_cover method. # CoverRef$set("public", "construct_pullback", function(index){ -# +# # }) -## Which indices of the index set should be compared in constructing the k-simplices? -## This can be customized based on the cover to (dramatically) reduce -## the number of intersection checks needed to generate the k-skeletons, where k >= 1. -## Defaults to every pairwise combination of level sets. +## Which indices of the index set should be compared in constructing the k-simplices? +## This can be customized based on the cover to (dramatically) reduce +## the number of intersection checks needed to generate the k-skeletons, where k >= 1. +## Defaults to every pairwise combination of level sets. ## neighborhood ---- CoverRef$set("public", "neighborhood", function(filter, k=1){ if (length(private$.index_set) <= 1L){ return(NULL) } @@ -104,7 +104,7 @@ CoverRef$set("public", "neighborhood", function(filter, k=1){ relist(private$.index_set[k_combs], k_combs) }) -## Validates that the constructed cover is indeed a valid cover. +## Validates that the constructed cover is indeed a valid cover. ## validate ---- CoverRef$set("public", "validate", function(){ if ( length(private$.index_set) != length(private$.level_sets) ){ @@ -129,13 +129,14 @@ covers_available <- function(){ line_format <- " %-28s %-34s %-15s" writeLines(c( sprintf("Typename:%-20sGenerator:%-25sParameters:%-26s", "", "", ""), - sprintf(line_format, "fixed interval", "FixedIntervalCover", paste0(c("number_intervals", "percent_overlap"), collapse = ", ")), + sprintf(line_format, "fixed interval", "FixedIntervalCover", paste0(c("number_intervals", "percent_overlap"), collapse = ", ")), sprintf(line_format, "restrained interval", "RestrainedIntervalCover", paste0(c("number_intervals", "percent_overlap"), collapse = ", ")), # sprintf(line_format, "adaptive", "AdaptiveCover", paste0(c("number_intervals", "percent_overlap", "quantile_method"), collapse = ", ")), - sprintf(line_format, "ball", "BallCover", paste0("epsilon", collapse = ", ")) + sprintf(line_format, "ball", "BallCover", paste0("epsilon", collapse = ", ")), + sprintf(line_format, "landmark_ball", "LandmarkBallCover", paste0(c("epsilon", "num_sets", "seed_index", "seed_method"), collapse = ", ")), + sprintf(line_format, "neighborhood", "NeighborhoodCover", paste0(c("k", "seed_index", "seed_method"), collapse = ", ")) )) } -# TODO: add ability to convert cover to new type to preserve copying the filter points multiple times, or -# move the filter points to the MapperRef object - +# TODO: add ability to convert cover to new type to preserve copying the filter points multiple times, or +# move the filter points to the MapperRef object diff --git a/R/NeighborhoodCover.R b/R/NeighborhoodCover.R new file mode 100644 index 0000000..88165f0 --- /dev/null +++ b/R/NeighborhoodCover.R @@ -0,0 +1,87 @@ +#' Ball Cover +#' +#' @docType class +#' @description This class provides a cover whose open sets are formed by \deqn{\epsilon}-balls centered +#' about each point. Using this class requires the \code{RANN} package to be installed, and thus explicitly assumes +#' the filter space endowed with the euclidean metric. +#' +#' This differs from the BallCover in that it does NOT union intersecting cover sets. +#' +#' @field epsilon := radius of the ball to form around each point +#' @author Cory Brunsion, Yara Skaf +#' @export + +library(proxy) + +# Seed methods: SPEC (specify index), RAND (random index), ECC (seed with highest eccentricity data point) +# Default: specified index using first data point (seed_method = "SPEC", seed_index = 1) +NeighborhoodCover <- R6::R6Class( + classname = "NeighborhoodCover", + inherit = CoverRef, + public = list(k=NULL, seed_index=1, seed_method="SPEC") +) + +## initialize ------ +NeighborhoodCover$set("public", "initialize", function(...){ + super$initialize(typename="neighborhood") + params <- list(...) + if ("k" %in% names(params)){ self$k <- params[["k"]] } + if ("seed_index" %in% names(params)){ self$seed_index <- params[["seed_index"]] } + if ("seed_method" %in% names(params)){ self$seed_method <- params[["seed_method"]] } +}) + +## validate ------ +NeighborhoodCover$set("public", "validate", function(filter){ + ## Get filter values + fv <- filter() + f_size <- nrow(fv) + + ## validate parameters + stopifnot(!is.null(self$k)) # require nieghborhood size + stopifnot(self$k >= 1 && self$k <= f_size) # radius must be at least 1 (ball around every point) + stopifnot(self$seed_index <= f_size && self$seed_index > 0) # seed index must be within the range of the data indices + stopifnot(all(self$seed_method == "RAND") || all(self$seed_method == "SPEC") || all(self$seed_method == "ECC")) # must use one of available seed methods +}) + +## format ---- +NeighborhoodCover$set("public", "format", function(...){ + titlecase <- function(x){ + s <- strsplit(x, " ")[[1]] + paste(toupper(substring(s, 1, 1)), substring(s, 2), sep = "", collapse = " ") + } + sprintf("%s Cover: (k = %s, seed_index = %s)", titlecase(private$.typename), self$k, self$seed_index) +}) + +## construct_cover ------ +NeighborhoodCover$set("public", "construct_cover", function(filter, index=NULL){ + if (!requireNamespace("RANN", quietly = TRUE)){ + stop("Package \"RANN\" is needed for to use this cover.", call. = FALSE) + } + self$validate(filter) + + ## Get filter values + fv <- filter() + f_size <- nrow(fv) + + ## Set the seed index if necessary + if(is.null(index)){ + if(all(self$seed_method == "RAND")) { + self$seed_index = sample(1:f_size, 1) + } + if(all(self$seed_method == "ECC")) { + ecc = eccentricity(from=fv, x=fv) + self$seed_index = which.max(ecc) + } + } + + eps_lm <- landmarks(x=fv, k=self$k, seed_index=self$seed_index) # compute landmark set + + ## Construct the index set + self$index_set <- as.character(attr(eps_lm,"names")) + self$level_sets <- eps_lm + + if (!missing(index)){ return(self$level_sets[[index]]) } + + ## Always return self + invisible(self) +}) diff --git a/R/landmarks.R b/R/landmarks.R index ecdbddc..18a5c83 100644 --- a/R/landmarks.R +++ b/R/landmarks.R @@ -11,10 +11,10 @@ #' @param shuffle_data whether to first randomly shuffle the data. #' @references De Silva, Vin, and Gunnar E. Carlsson. "Topological estimation using witness complexes." SPBG 4 (2004): 157-166. #' @export -landmarks <- function(x, n=NULL, eps=NULL, dist_method = "euclidean", seed_index = 1, shuffle_data=FALSE){ +landmarks <- function(x, n=NULL, eps=NULL, k=NULL, dist_method = "euclidean", seed_index = 1, shuffle_data=FALSE){ stopifnot(is.matrix(x)) stopifnot(seed_index >= 1L && seed_index <= nrow(x)) - stopifnot(!is.null(n) || !is.null(eps)) # must specify either a number of balls or a radius + stopifnot(!is.null(n) || !is.null(eps) || !is.null(k)) # must specify a number of balls, a radius, or k-neighborhood shuffle_idx <- NA if (shuffle_data){ @@ -75,6 +75,48 @@ landmarks <- function(x, n=NULL, eps=NULL, dist_method = "euclidean", seed_index } } landmark_idx = unlist(C) + } else if (!is.null(k)){ + stopifnot(toupper(dist_method) %in% toupper(proxy::pr_DB$get_entry_names())) + + # STEP 1: Pick point in the space (seed) and add it to the list of centers/landmarks + C = list(seed_index) + f_C = list(x[seed_index]) + + # STEP 2: Compute distance between landmark set and each point in the space + dists = sapply(f_C, function(c) { + proxy::dist(c, x, method = dist_method) + }) + max = which.max(dists) + d = dists[max] + + k_nhds = list(apply(dists,2,order)[1:k]) + + + # Continue if distance is greater than epsilon + if(!(max %in% k_nhds[[1]])){ + C = append(C, max) + f_C = append(f_C, x[max]) + + # STEP 2: Compute distance between landmark set and each point in the space + while(TRUE){ + dists = sapply(f_C, function(c) { + proxy::dist(c, x, method = dist_method) + }) + orderedIndices = t(apply(dists,1, sort)) + max = which.max(orderedIndices[,1]) + d = orderedIndices[max,1] + + k_nhds = append(k_nhds, list(apply(dists,2,order)[1:k,ncol(dists)])) + + # Continue until all points are within a k-neighborhood + if(!(any(sapply(k_nhds,function(x) x==max)))){ + C = append(C,max) + f_C = append(f_C,x[max]) + } else{ break } + } + } + idx_list = as.character(unlist(C)) + landmark_idx = structure(k_nhds, names=idx_list) } if (is.na(shuffle_idx)){ landmark_idx } else { shuffle_idx[landmark_idx] } diff --git a/R/mapperRef.R b/R/mapperRef.R index fd9eea2..765ed4e 100644 --- a/R/mapperRef.R +++ b/R/mapperRef.R @@ -481,6 +481,7 @@ MapperRef$set("public", "use_cover", function(cover="fixed interval", ...){ # "adaptive"=AdaptiveCover$new(...)$construct_cover(self$filter), "ball"=BallCover$new(...)$construct_cover(self$filter), "landmark_ball"=LandmarkBallCover$new(...)$construct_cover(self$filter), + "neighborhood"=NeighborhoodCover$new(...)$construct_cover(self$filter), stop(sprintf("Unknown cover type: %s, please specify a cover typename listed in `covers_available()`", cover)) ) invisible(self) From 187899c6d3ad048a7da05793907501e921254ec3 Mon Sep 17 00:00:00 2001 From: yaraskaf <56359026+yaraskaf@users.noreply.github.com> Date: Mon, 9 Mar 2020 18:39:58 -0400 Subject: [PATCH 12/25] fix(NeighborhoodCover): fix bug where not all points are included in a cover set Fixed bug where some points are excluded from cover sets. --- R/NeighborhoodCover.R | 4 +++- R/landmarks.R | 19 ++++++++----------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/R/NeighborhoodCover.R b/R/NeighborhoodCover.R index 88165f0..18b315d 100644 --- a/R/NeighborhoodCover.R +++ b/R/NeighborhoodCover.R @@ -15,6 +15,7 @@ library(proxy) # Seed methods: SPEC (specify index), RAND (random index), ECC (seed with highest eccentricity data point) # Default: specified index using first data point (seed_method = "SPEC", seed_index = 1) +#' @export NeighborhoodCover <- R6::R6Class( classname = "NeighborhoodCover", inherit = CoverRef, @@ -22,6 +23,7 @@ NeighborhoodCover <- R6::R6Class( ) ## initialize ------ +#' @export NeighborhoodCover$set("public", "initialize", function(...){ super$initialize(typename="neighborhood") params <- list(...) @@ -38,7 +40,7 @@ NeighborhoodCover$set("public", "validate", function(filter){ ## validate parameters stopifnot(!is.null(self$k)) # require nieghborhood size - stopifnot(self$k >= 1 && self$k <= f_size) # radius must be at least 1 (ball around every point) + stopifnot(self$k >= 2 && self$k <= f_size) # size must be at least 2 points stopifnot(self$seed_index <= f_size && self$seed_index > 0) # seed index must be within the range of the data indices stopifnot(all(self$seed_method == "RAND") || all(self$seed_method == "SPEC") || all(self$seed_method == "ECC")) # must use one of available seed methods }) diff --git a/R/landmarks.R b/R/landmarks.R index 18a5c83..f6a9ee8 100644 --- a/R/landmarks.R +++ b/R/landmarks.R @@ -90,28 +90,25 @@ landmarks <- function(x, n=NULL, eps=NULL, k=NULL, dist_method = "euclidean", se d = dists[max] k_nhds = list(apply(dists,2,order)[1:k]) - + pt_list = k_nhds[[1]] # Continue if distance is greater than epsilon - if(!(max %in% k_nhds[[1]])){ - C = append(C, max) - f_C = append(f_C, x[max]) - + if(!(max %in% pt_list)){ # STEP 2: Compute distance between landmark set and each point in the space while(TRUE){ - dists = sapply(f_C, function(c) { - proxy::dist(c, x, method = dist_method) + dists = sapply(pt_list, function(c) { + proxy::dist(x[c], x, method = dist_method) }) orderedIndices = t(apply(dists,1, sort)) max = which.max(orderedIndices[,1]) d = orderedIndices[max,1] - k_nhds = append(k_nhds, list(apply(dists,2,order)[1:k,ncol(dists)])) - # Continue until all points are within a k-neighborhood - if(!(any(sapply(k_nhds,function(x) x==max)))){ + if(!(max %in% pt_list)){ C = append(C,max) - f_C = append(f_C,x[max]) + new_pts = order(proxy::dist(x[max], x, method = dist_method))[1:k] + k_nhds = append(k_nhds, list(new_pts)) + pt_list = append(pt_list, new_pts) } else{ break } } } From 3e666ecd5cbe731062fce2f24c16286aa4084011 Mon Sep 17 00:00:00 2001 From: yaraskaf <56359026+yaraskaf@users.noreply.github.com> Date: Wed, 11 Mar 2020 18:53:44 -0400 Subject: [PATCH 13/25] fix(NeighborhoodCover): fix bug with >k repeated lensed values Allow neighborhoods of size greater than k when >k points have the same lens value. Now all points with the same lensed value end up in the same pullback set. --- R/landmarks.R | 50 +++++++++++++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/R/landmarks.R b/R/landmarks.R index f6a9ee8..280d2a6 100644 --- a/R/landmarks.R +++ b/R/landmarks.R @@ -80,38 +80,42 @@ landmarks <- function(x, n=NULL, eps=NULL, k=NULL, dist_method = "euclidean", se # STEP 1: Pick point in the space (seed) and add it to the list of centers/landmarks C = list(seed_index) - f_C = list(x[seed_index]) # STEP 2: Compute distance between landmark set and each point in the space - dists = sapply(f_C, function(c) { - proxy::dist(c, x, method = dist_method) - }) - max = which.max(dists) - d = dists[max] + dists = proxy::dist(x[seed_index], x, method = dist_method) + equivalent_pts = which(dists == 0) - k_nhds = list(apply(dists,2,order)[1:k]) + if(length(equivalent_pts) > k){ + k_nhds = list(equivalent_pts) + } else{ + k_nhds = list(order(dists)[1:k]) + } pt_list = k_nhds[[1]] - # Continue if distance is greater than epsilon - if(!(max %in% pt_list)){ + while(TRUE){ # STEP 2: Compute distance between landmark set and each point in the space - while(TRUE){ - dists = sapply(pt_list, function(c) { - proxy::dist(x[c], x, method = dist_method) - }) - orderedIndices = t(apply(dists,1, sort)) - max = which.max(orderedIndices[,1]) - d = orderedIndices[max,1] + dists = sapply(pt_list, function(pt) { + proxy::dist(x[pt], x, method = dist_method) + }) + orderedIndices = t(apply(dists,1, sort)) - # Continue until all points are within a k-neighborhood - if(!(max %in% pt_list)){ - C = append(C,max) - new_pts = order(proxy::dist(x[max], x, method = dist_method))[1:k] - k_nhds = append(k_nhds, list(new_pts)) - pt_list = append(pt_list, new_pts) - } else{ break } + max_vector = which(orderedIndices[,1] == max(orderedIndices[,1])) + max = max_vector[1] + + if(length(max_vector) > k){ + new_pts = max_vector + } else{ + new_pts = order(proxy::dist(x[max], x, method = dist_method))[1:k] } + + # Continue until all points are within a k-neighborhood + if(!(max %in% pt_list)){ + C = append(C,max) + k_nhds = append(k_nhds, list(new_pts)) + pt_list = append(pt_list, new_pts) + } else{ break } } + idx_list = as.character(unlist(C)) landmark_idx = structure(k_nhds, names=idx_list) } From d371c4897a50d482c54c718de4d77f502609fa59 Mon Sep 17 00:00:00 2001 From: yaraskaf <56359026+yaraskaf@users.noreply.github.com> Date: Wed, 15 Apr 2020 17:40:57 -0400 Subject: [PATCH 14/25] docs: Update documentation for NeighborhoodCover.R and LandmarkBallCover.R Update headers, add reference to Dlotko paper. --- R/LandmarkBallCover.R | 31 +++++++++++++++++++------------ R/NeighborhoodCover.R | 16 +++++++++------- 2 files changed, 28 insertions(+), 19 deletions(-) diff --git a/R/LandmarkBallCover.R b/R/LandmarkBallCover.R index 16cccb0..e23e333 100644 --- a/R/LandmarkBallCover.R +++ b/R/LandmarkBallCover.R @@ -1,20 +1,30 @@ -#' Ball Cover +#' Landmark Ball Cover #' #' @docType class -#' @description This class provides a cover whose open sets are formed by \deqn{\epsilon}-balls centered -#' about each point. Using this class requires the \code{RANN} package to be installed, and thus explicitly assumes -#' the filter space endowed with the euclidean metric. +#' @description This class provides a cover whose open sets are formed by balls centered about each +#' point in a landmark set. Given a radius \deqn{\epsilon}, choose a set of landmark points via the +#' algorithm presented in Dłotko to produce a cover by balls of radius \deqn{\epsilon}. Alternatively, +#' given a number of cover sets \code{n}, choose \code{n} landmarks via maxmin algorithm. +#' +#' This differs from BallCover.R in that it does NOT union intersecting cover sets. #' -#' This differs from the BallCover in that it does NOT union intersecting cover sets. +#' Using this class requires the \code{RANN} package to be installed, and thus explicitly assumes +#' the filter space endowed with the euclidean metric. #' -#' @field epsilon := radius of the ball to form around each point -#' @author Cory Brunsion, Yara Skaf +#' @field epsilon := radius of the ball to form around each landmark point +#' @field num_sets := desired number of balls/cover sets +#' @field seed_index := index of data point to use as the seed for landmark set calculation +#' @field seed_method := method to select a seed (user specified, random, highest eccentricity) +#' @author Yara Skaf, Cory Brunsion +#' @family cover +#' @references Dłotko, Paweł. "Ball Mapper: A Shape Summary for Topological Data Analysis." (2019). Web. #' @export library(proxy) # Seed methods: SPEC (specify index), RAND (random index), ECC (seed with highest eccentricity data point) # Default: specified index using first data point (seed_method = "SPEC", seed_index = 1) +#' @export LandmarkBallCover <- R6::R6Class( classname = "LandmarkBallCover", inherit = CoverRef, @@ -53,10 +63,8 @@ LandmarkBallCover$set("public", "format", function(...){ } if (!is.null(self$num_sets)) { sprintf("%s Cover: (num_sets = %s, seed_index = %s)", titlecase(private$.typename), self$num_sets, self$seed_index) - }else{ - if (!is.null(self$epsilon)) { - sprintf("%s Cover: (epsilon = %.2f, seed_index = %s)", titlecase(private$.typename), self$epsilon, self$seed_index) - } + }else if (!is.null(self$epsilon)){ + sprintf("%s Cover: (epsilon = %.2f, seed_index = %s)", titlecase(private$.typename), self$epsilon, self$seed_index) } }) @@ -111,7 +119,6 @@ LandmarkBallCover$set("public", "construct_cover", function(filter, index=NULL){ }else{ self$level_sets <- structure(as.list(x), names=self$index_set) } - } }else if (!is.null(self$epsilon)) { eps_lm <- landmarks(x=fv, eps=self$epsilon, seed_index=self$seed_index) # compute landmark set diff --git a/R/NeighborhoodCover.R b/R/NeighborhoodCover.R index 18b315d..e7127cc 100644 --- a/R/NeighborhoodCover.R +++ b/R/NeighborhoodCover.R @@ -1,14 +1,16 @@ -#' Ball Cover +#' Neighborhood Cover #' #' @docType class -#' @description This class provides a cover whose open sets are formed by \deqn{\epsilon}-balls centered -#' about each point. Using this class requires the \code{RANN} package to be installed, and thus explicitly assumes +#' @description This class provides a cover whose open sets are formed by \code{k}-neighborhoods about a landmark +#' set. Cover sets may contain more than \code{k} points if there are more than \code{k} points equidistant from the +#' central point. Using this class requires the \code{RANN} package to be installed, and thus explicitly assumes #' the filter space endowed with the euclidean metric. #' -#' This differs from the BallCover in that it does NOT union intersecting cover sets. -#' -#' @field epsilon := radius of the ball to form around each point -#' @author Cory Brunsion, Yara Skaf +#' @field k := desired number of neighbord to include in a cover set +#' @field seed_index := index of data point to use as the seed for landmark set calculation +#' @field seed_method := method to select a seed (user specified, random, highest eccentricity) +#' @author Yara Skaf, Cory Brunsion +#' @family cover #' @export library(proxy) From 8f41462dfdbf110d2e6ae890e0ca525684b4f834 Mon Sep 17 00:00:00 2001 From: yaraskaf <56359026+yaraskaf@users.noreply.github.com> Date: Wed, 15 Apr 2020 18:10:42 -0400 Subject: [PATCH 15/25] docs: Update documentation for NeighborhoodCover.R and LandmarkBallCover.R --- R/LandmarkBallCover.R | 54 ++++++++++++++++++++++--------------------- R/NeighborhoodCover.R | 17 +++++++------- 2 files changed, 37 insertions(+), 34 deletions(-) diff --git a/R/LandmarkBallCover.R b/R/LandmarkBallCover.R index e23e333..865fc50 100644 --- a/R/LandmarkBallCover.R +++ b/R/LandmarkBallCover.R @@ -4,7 +4,8 @@ #' @description This class provides a cover whose open sets are formed by balls centered about each #' point in a landmark set. Given a radius \deqn{\epsilon}, choose a set of landmark points via the #' algorithm presented in Dłotko to produce a cover by balls of radius \deqn{\epsilon}. Alternatively, -#' given a number of cover sets \code{n}, choose \code{n} landmarks via maxmin algorithm. +#' given a number of cover sets \code{n}, choose \code{n} landmarks via maxmin algorithm. If no seed +#' or seed_method is specified, default behavior uses the first data point as the seed. #' #' This differs from BallCover.R in that it does NOT union intersecting cover sets. #' @@ -14,16 +15,14 @@ #' @field epsilon := radius of the ball to form around each landmark point #' @field num_sets := desired number of balls/cover sets #' @field seed_index := index of data point to use as the seed for landmark set calculation -#' @field seed_method := method to select a seed (user specified, random, highest eccentricity) +#' @field seed_method := method to select a seed ("SPEC" : user specified index | "RAND" : random index +#' | "ECC" : point with highest eccentricity in the filter space) #' @author Yara Skaf, Cory Brunsion #' @family cover #' @references Dłotko, Paweł. "Ball Mapper: A Shape Summary for Topological Data Analysis." (2019). Web. -#' @export library(proxy) -# Seed methods: SPEC (specify index), RAND (random index), ECC (seed with highest eccentricity data point) -# Default: specified index using first data point (seed_method = "SPEC", seed_index = 1) #' @export LandmarkBallCover <- R6::R6Class( classname = "LandmarkBallCover", @@ -32,6 +31,7 @@ LandmarkBallCover <- R6::R6Class( ) ## initialize ------ +#' @export LandmarkBallCover$set("public", "initialize", function(...){ super$initialize(typename="landmark_ball") params <- list(...) @@ -62,9 +62,9 @@ LandmarkBallCover$set("public", "format", function(...){ paste(toupper(substring(s, 1, 1)), substring(s, 2), sep = "", collapse = " ") } if (!is.null(self$num_sets)) { - sprintf("%s Cover: (num_sets = %s, seed_index = %s)", titlecase(private$.typename), self$num_sets, self$seed_index) + sprintf("%s Cover: (number of sets = %s, seed index = %s)", titlecase(private$.typename), self$num_sets, self$seed_index) }else if (!is.null(self$epsilon)){ - sprintf("%s Cover: (epsilon = %.2f, seed_index = %s)", titlecase(private$.typename), self$epsilon, self$seed_index) + sprintf("%s Cover: (epsilon = %.2f, seed index = %s)", titlecase(private$.typename), self$epsilon, self$seed_index) } }) @@ -90,8 +90,10 @@ LandmarkBallCover$set("public", "construct_cover", function(filter, index=NULL){ } } + ## Construct the balls if (!is.null(self$num_sets)) { - eps_lm <- landmarks(x=fv, n=self$num_sets, seed_index=self$seed_index) # compute landmark set + ## Compute the landmark set + eps_lm <- landmarks(x=fv, n=self$num_sets, seed_index=self$seed_index) ## Get distance from each point to landmarks dist_to_lm <- proxy::dist(fv, fv[eps_lm,,drop=FALSE]) @@ -121,31 +123,31 @@ LandmarkBallCover$set("public", "construct_cover", function(filter, index=NULL){ } } }else if (!is.null(self$epsilon)) { - eps_lm <- landmarks(x=fv, eps=self$epsilon, seed_index=self$seed_index) # compute landmark set - dist_to_lm <- proxy::dist(fv, fv[eps_lm,,drop=FALSE]) # Get distance from each point to landmarks + ## Compute the landmark set + eps_lm <- landmarks(x=fv, eps=self$epsilon, seed_index=self$seed_index) # compute landmark set + dist_to_lm <- proxy::dist(fv, fv[eps_lm,,drop=FALSE]) # Get distance from each point to landmarks - pts_within_eps <- function(lm_dist){ which(lm_dist <= self$epsilon) } + pts_within_eps <- function(lm_dist){ which(lm_dist <= self$epsilon) } - ## Construct the index set - self$index_set <- as.character(eps_lm) + ## Construct the index set + self$index_set <- as.character(eps_lm) - ## Construct the preimages - if(length(eps_lm) == 1){ - self$level_sets <- structure(as.list(list(t(apply(dist_to_lm, 2, pts_within_eps))[1,])), names=self$index_set) + ## Construct the preimages + if(length(eps_lm) == 1){ + self$level_sets <- structure(as.list(list(t(apply(dist_to_lm, 2, pts_within_eps))[1,])), names=self$index_set) + }else{ + x = apply(dist_to_lm, 2, pts_within_eps) + # if all level sets contain the same number of points, apply returns a matrix and gives an error + # -> need to split columns into list elements in this case + if(is.matrix(x)){ + self$level_sets <- structure(split(x, rep(1:ncol(x), each = nrow(x))), names=self$index_set) }else{ - x = apply(dist_to_lm, 2, pts_within_eps) - # if all level sets contain the same number of points, apply returns a matrix and gives an error - # -> need to split columns into list elements in this case - if(is.matrix(x)){ - self$level_sets <- structure(split(x, rep(1:ncol(x), each = nrow(x))), names=self$index_set) - }else{ - self$level_sets <- structure(as.list(x), names=self$index_set) - } + self$level_sets <- structure(as.list(x), names=self$index_set) } } + } - if (!missing(index)){ - return(self$level_sets[[index]]) } + if (!missing(index)){ return(self$level_sets[[index]]) } ## Always return self invisible(self) diff --git a/R/NeighborhoodCover.R b/R/NeighborhoodCover.R index e7127cc..d622f81 100644 --- a/R/NeighborhoodCover.R +++ b/R/NeighborhoodCover.R @@ -2,21 +2,21 @@ #' #' @docType class #' @description This class provides a cover whose open sets are formed by \code{k}-neighborhoods about a landmark -#' set. Cover sets may contain more than \code{k} points if there are more than \code{k} points equidistant from the -#' central point. Using this class requires the \code{RANN} package to be installed, and thus explicitly assumes -#' the filter space endowed with the euclidean metric. +#' set. If no seed or seed_method is specified, default behavior uses the first data point as the seed. Cover sets +#' may contain more than \code{k} points if there are more than \code{k} points equidistant from the central point. +#' Using this class requires the \code{RANN} package to be installed, and thus explicitly assumes the filter space +#' endowed with the euclidean metric. #' #' @field k := desired number of neighbord to include in a cover set #' @field seed_index := index of data point to use as the seed for landmark set calculation -#' @field seed_method := method to select a seed (user specified, random, highest eccentricity) +#' @field seed_method := method to select a seed ("SPEC" : user specified index | "RAND" : random index +#' | "ECC" : point with highest eccentricity in the filter space) #' @author Yara Skaf, Cory Brunsion #' @family cover #' @export library(proxy) -# Seed methods: SPEC (specify index), RAND (random index), ECC (seed with highest eccentricity data point) -# Default: specified index using first data point (seed_method = "SPEC", seed_index = 1) #' @export NeighborhoodCover <- R6::R6Class( classname = "NeighborhoodCover", @@ -53,7 +53,7 @@ NeighborhoodCover$set("public", "format", function(...){ s <- strsplit(x, " ")[[1]] paste(toupper(substring(s, 1, 1)), substring(s, 2), sep = "", collapse = " ") } - sprintf("%s Cover: (k = %s, seed_index = %s)", titlecase(private$.typename), self$k, self$seed_index) + sprintf("%s Cover: (k = %s, seed index = %s)", titlecase(private$.typename), self$k, self$seed_index) }) ## construct_cover ------ @@ -78,7 +78,8 @@ NeighborhoodCover$set("public", "construct_cover", function(filter, index=NULL){ } } - eps_lm <- landmarks(x=fv, k=self$k, seed_index=self$seed_index) # compute landmark set + ## Compute the landmark set + eps_lm <- landmarks(x=fv, k=self$k, seed_index=self$seed_index) ## Construct the index set self$index_set <- as.character(attr(eps_lm,"names")) From f1e79832e41365e2d1e7cdf15ef5a680a59e2571 Mon Sep 17 00:00:00 2001 From: yaraskaf <56359026+yaraskaf@users.noreply.github.com> Date: Wed, 15 Apr 2020 21:03:00 -0400 Subject: [PATCH 16/25] refactor(LandmarkBallCover): cleaned up code in construct_cover --- R/LandmarkBallCover.R | 75 +++++++++++++------------------------------ 1 file changed, 23 insertions(+), 52 deletions(-) diff --git a/R/LandmarkBallCover.R b/R/LandmarkBallCover.R index 865fc50..b0664ff 100644 --- a/R/LandmarkBallCover.R +++ b/R/LandmarkBallCover.R @@ -79,74 +79,45 @@ LandmarkBallCover$set("public", "construct_cover", function(filter, index=NULL){ fv <- filter() f_size <- nrow(fv) - ## Set the seed index if necessary + ## Construct the balls if(is.null(index)){ - if(all(self$seed_method == "RAND")) { - self$seed_index = sample(1:f_size, 1) - } - if(all(self$seed_method == "ECC")) { - ecc = eccentricity(from=fv, x=fv) - self$seed_index = which.max(ecc) - } - } + ## Set the seed index if necessary + if(all(self$seed_method == "RAND")) { self$seed_index = sample(1:f_size, 1) } + if(all(self$seed_method == "ECC")) { self$seed_index = which.max(eccentricity(from=fv, x=fv)) } - ## Construct the balls - if (!is.null(self$num_sets)) { ## Compute the landmark set - eps_lm <- landmarks(x=fv, n=self$num_sets, seed_index=self$seed_index) - - ## Get distance from each point to landmarks - dist_to_lm <- proxy::dist(fv, fv[eps_lm,,drop=FALSE]) - pts_within_eps <- function(lm_dist){ which(lm_dist <= self$epsilon) } + if (!is.null(self$num_sets)) { eps_lm <- landmarks(x=fv, n=self$num_sets, seed_index=self$seed_index) + } else if (!is.null(self$epsilon)) { eps_lm <- landmarks(x=fv, eps=self$epsilon, seed_index=self$seed_index) } ## Construct the index set self$index_set <- as.character(eps_lm) - ## Construct the preimages - if(length(eps_lm) == 1){ - orderedIndices = apply(dist_to_lm,1, sort) - max = which.max(orderedIndices) - self$epsilon = dist_to_lm[max] # ball radius should be distance of the farthest point from the landmark set so that all points are in at least one ball - self$level_sets <- structure(as.list(list(t(apply(dist_to_lm, 2, pts_within_eps))[1,])), names=self$index_set) - }else{ - orderedIndices = t(apply(dist_to_lm,1, sort)) - max = which.max(orderedIndices[,1]) - self$epsilon = dist_to_lm[max] # ball radius should be distance of the farthest point from the landmark set so that all points are in at least one ball + ## Get distance from each point to landmarks + dist_to_lm <- proxy::dist(fv, fv[eps_lm,,drop=FALSE]) + pts_within_eps <- function(lm_dist){ which(lm_dist <= self$epsilon) } - x = apply(dist_to_lm, 2, pts_within_eps) - # if all level sets contain the same number of points, apply returns a matrix and gives an error - # -> need to split columns into list elements in this case - if(is.matrix(x)){ - self$level_sets <- structure(split(x, rep(1:ncol(x), each = nrow(x))), names=self$index_set) - }else{ - self$level_sets <- structure(as.list(x), names=self$index_set) + ## Calculate an epsilon if one was not given + if (!is.null(self$num_sets)) { + if(length(eps_lm) == 1){ + orderedIndices = apply(dist_to_lm,1, sort) + max = which.max(orderedIndices) + } else { + orderedIndices = t(apply(dist_to_lm,1, sort)) + max = which.max(orderedIndices[,1]) } + self$epsilon = dist_to_lm[max] # ball radius should be distance of the farthest point from the landmark set so that all points are in at least one ball } - }else if (!is.null(self$epsilon)) { - ## Compute the landmark set - eps_lm <- landmarks(x=fv, eps=self$epsilon, seed_index=self$seed_index) # compute landmark set - dist_to_lm <- proxy::dist(fv, fv[eps_lm,,drop=FALSE]) # Get distance from each point to landmarks - - pts_within_eps <- function(lm_dist){ which(lm_dist <= self$epsilon) } - - ## Construct the index set - self$index_set <- as.character(eps_lm) ## Construct the preimages - if(length(eps_lm) == 1){ + if(length(eps_lm) == 1) { self$level_sets <- structure(as.list(list(t(apply(dist_to_lm, 2, pts_within_eps))[1,])), names=self$index_set) - }else{ + } else { x = apply(dist_to_lm, 2, pts_within_eps) - # if all level sets contain the same number of points, apply returns a matrix and gives an error - # -> need to split columns into list elements in this case - if(is.matrix(x)){ - self$level_sets <- structure(split(x, rep(1:ncol(x), each = nrow(x))), names=self$index_set) - }else{ - self$level_sets <- structure(as.list(x), names=self$index_set) - } + # if all level sets contain the same number of points, apply returns a matrix -> need to split columns into list elements + if(is.matrix(x)){ self$level_sets <- structure(split(x, rep(1:ncol(x), each = nrow(x))), names=self$index_set) + } else { self$level_sets <- structure(as.list(x), names=self$index_set) } } } - if (!missing(index)){ return(self$level_sets[[index]]) } ## Always return self From 6f4f3dd80035a98fc4293ee48b8f74ba5e5a8036 Mon Sep 17 00:00:00 2001 From: yaraskaf <56359026+yaraskaf@users.noreply.github.com> Date: Wed, 15 Apr 2020 21:05:48 -0400 Subject: [PATCH 17/25] refactor(NeighborhoodCover): cleaned up code in construct_cover --- R/NeighborhoodCover.R | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/R/NeighborhoodCover.R b/R/NeighborhoodCover.R index d622f81..9263b73 100644 --- a/R/NeighborhoodCover.R +++ b/R/NeighborhoodCover.R @@ -67,24 +67,18 @@ NeighborhoodCover$set("public", "construct_cover", function(filter, index=NULL){ fv <- filter() f_size <- nrow(fv) - ## Set the seed index if necessary if(is.null(index)){ - if(all(self$seed_method == "RAND")) { - self$seed_index = sample(1:f_size, 1) - } - if(all(self$seed_method == "ECC")) { - ecc = eccentricity(from=fv, x=fv) - self$seed_index = which.max(ecc) - } - } - - ## Compute the landmark set - eps_lm <- landmarks(x=fv, k=self$k, seed_index=self$seed_index) + ## Set the seed index if necessary + if(all(self$seed_method == "RAND")) { self$seed_index = sample(1:f_size, 1) } + if(all(self$seed_method == "ECC")) { self$seed_index = which.max(eccentricity(from=fv, x=fv)) } - ## Construct the index set - self$index_set <- as.character(attr(eps_lm,"names")) - self$level_sets <- eps_lm + ## Compute the landmark set + eps_lm <- landmarks(x=fv, k=self$k, seed_index=self$seed_index) + ## Construct the index set and level sets + self$index_set <- as.character(attr(eps_lm,"names")) + self$level_sets <- eps_lm + } if (!missing(index)){ return(self$level_sets[[index]]) } ## Always return self From f71639e05fc068d417c9478b989d06fbaee4aa12 Mon Sep 17 00:00:00 2001 From: yaraskaf <56359026+yaraskaf@users.noreply.github.com> Date: Wed, 15 Apr 2020 21:17:58 -0400 Subject: [PATCH 18/25] refactor(landmarks): remove comments Remove commented out code that computed landmarks in onle one dimensional data. --- R/landmarks.R | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/R/landmarks.R b/R/landmarks.R index 280d2a6..b8236d2 100644 --- a/R/landmarks.R +++ b/R/landmarks.R @@ -10,6 +10,7 @@ #' @param seed_index the first landmark to seed the algorithm. #' @param shuffle_data whether to first randomly shuffle the data. #' @references De Silva, Vin, and Gunnar E. Carlsson. "Topological estimation using witness complexes." SPBG 4 (2004): 157-166. +#' @references Dłotko, Paweł. "Ball Mapper: A Shape Summary for Topological Data Analysis." (2019). Web. #' @export landmarks <- function(x, n=NULL, eps=NULL, k=NULL, dist_method = "euclidean", seed_index = 1, shuffle_data=FALSE){ stopifnot(is.matrix(x)) @@ -44,33 +45,29 @@ landmarks <- function(x, n=NULL, eps=NULL, k=NULL, dist_method = "euclidean", se # STEP 1: Pick point in the space (seed) and add it to the list of centers/landmarks C = list(seed_index) - f_C = list(x[seed_index]) + f_C = matrix(x[seed_index,], nrow=1, byrow=TRUE) # STEP 2: Compute distance between landmark set and each point in the space - dists = sapply(f_C, function(c) { - proxy::dist(c, x, method = dist_method) - }) + dists = proxy::dist(f_C, x, method = dist_method) max = which.max(dists) d = dists[max] # Continue if distance is greater than epsilon if(d >= eps){ C = append(C, max) - f_C = append(f_C, x[max]) + f_C = rbind(f_C, x[max,]) # STEP 2: Compute distance between landmark set and each point in the space while(TRUE){ - dists = sapply(f_C, function(c) { - proxy::dist(c, x, method = dist_method) - }) - orderedIndices = t(apply(dists,1, sort)) + dists = proxy::dist(f_C, x, method = dist_method) + orderedIndices = t(apply(dists,2, sort)) max = which.max(orderedIndices[,1]) d = orderedIndices[max,1] # Continue until distance is less than epsilon (i.e. stop when all points are contained within an epsilon-ball) if(d >= eps){ C = append(C,max) - f_C = append(f_C,x[max]) + f_C = rbind(f_C, x[max,]) } else{ break } } } @@ -119,6 +116,5 @@ landmarks <- function(x, n=NULL, eps=NULL, k=NULL, dist_method = "euclidean", se idx_list = as.character(unlist(C)) landmark_idx = structure(k_nhds, names=idx_list) } - if (is.na(shuffle_idx)){ landmark_idx } else { shuffle_idx[landmark_idx] } } From a1fd4673fb9483063c3b0605478fed2db51d497b Mon Sep 17 00:00:00 2001 From: yaraskaf <56359026+yaraskaf@users.noreply.github.com> Date: Wed, 15 Apr 2020 23:05:37 -0400 Subject: [PATCH 19/25] fix(LandmarkBallCover): fix bug where num_sets > unique filtered points In this case, just take the unique balls as the cover sets. Duplicate lensed values should not form distinct centers. --- R/LandmarkBallCover.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/LandmarkBallCover.R b/R/LandmarkBallCover.R index b0664ff..f09a1d5 100644 --- a/R/LandmarkBallCover.R +++ b/R/LandmarkBallCover.R @@ -86,7 +86,7 @@ LandmarkBallCover$set("public", "construct_cover", function(filter, index=NULL){ if(all(self$seed_method == "ECC")) { self$seed_index = which.max(eccentricity(from=fv, x=fv)) } ## Compute the landmark set - if (!is.null(self$num_sets)) { eps_lm <- landmarks(x=fv, n=self$num_sets, seed_index=self$seed_index) + if (!is.null(self$num_sets)) { eps_lm <- unique(landmarks(x=fv, n=self$num_sets, seed_index=self$seed_index)) } else if (!is.null(self$epsilon)) { eps_lm <- landmarks(x=fv, eps=self$epsilon, seed_index=self$seed_index) } ## Construct the index set From 45e4bb945a067dbd9fceca2e10236b6542099e46 Mon Sep 17 00:00:00 2001 From: yaraskaf <56359026+yaraskaf@users.noreply.github.com> Date: Wed, 15 Apr 2020 23:17:23 -0400 Subject: [PATCH 20/25] refactor(landmarks): removed unnecessary loop in eps-landmark calculation --- R/landmarks.R | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/R/landmarks.R b/R/landmarks.R index b8236d2..87a62fc 100644 --- a/R/landmarks.R +++ b/R/landmarks.R @@ -52,24 +52,16 @@ landmarks <- function(x, n=NULL, eps=NULL, k=NULL, dist_method = "euclidean", se max = which.max(dists) d = dists[max] - # Continue if distance is greater than epsilon - if(d >= eps){ + # Continue until distance is less than epsilon (i.e. stop when all points are contained within an epsilon-ball) + while(d >= eps){ C = append(C, max) f_C = rbind(f_C, x[max,]) # STEP 2: Compute distance between landmark set and each point in the space - while(TRUE){ - dists = proxy::dist(f_C, x, method = dist_method) - orderedIndices = t(apply(dists,2, sort)) - max = which.max(orderedIndices[,1]) - d = orderedIndices[max,1] - - # Continue until distance is less than epsilon (i.e. stop when all points are contained within an epsilon-ball) - if(d >= eps){ - C = append(C,max) - f_C = rbind(f_C, x[max,]) - } else{ break } - } + dists = proxy::dist(f_C, x, method = dist_method) + orderedIndices = t(apply(dists,2, sort)) + max = which.max(orderedIndices[,1]) + d = orderedIndices[max,1] } landmark_idx = unlist(C) } else if (!is.null(k)){ From dffbbb8017cbf6300ae506eba1c16d5b03a7d9c2 Mon Sep 17 00:00:00 2001 From: yaraskaf <56359026+yaraskaf@users.noreply.github.com> Date: Thu, 16 Apr 2020 17:35:56 -0400 Subject: [PATCH 21/25] refactor(NeighborhoodBallCover): cleaned up code to construct cover sets Moved calculation of k-neighborhoods to NeighborhoodCover.R, eliminated unnecessary code, removed k-nhd functionality from landmarks.R --- R/NeighborhoodCover.R | 38 +++++++++++++++++++++++++++++----- R/landmarks.R | 47 ++----------------------------------------- 2 files changed, 35 insertions(+), 50 deletions(-) diff --git a/R/NeighborhoodCover.R b/R/NeighborhoodCover.R index 9263b73..12a0d4e 100644 --- a/R/NeighborhoodCover.R +++ b/R/NeighborhoodCover.R @@ -57,14 +57,16 @@ NeighborhoodCover$set("public", "format", function(...){ }) ## construct_cover ------ -NeighborhoodCover$set("public", "construct_cover", function(filter, index=NULL){ +NeighborhoodCover$set("public", "construct_cover", function(filter, index=NULL, dist_method = "euclidean"){ if (!requireNamespace("RANN", quietly = TRUE)){ stop("Package \"RANN\" is needed for to use this cover.", call. = FALSE) } self$validate(filter) + stopifnot(toupper(dist_method) %in% toupper(proxy::pr_DB$get_entry_names())) ## Get filter values fv <- filter() + f_dim <- ncol(fv) f_size <- nrow(fv) if(is.null(index)){ @@ -72,12 +74,38 @@ NeighborhoodCover$set("public", "construct_cover", function(filter, index=NULL){ if(all(self$seed_method == "RAND")) { self$seed_index = sample(1:f_size, 1) } if(all(self$seed_method == "ECC")) { self$seed_index = which.max(eccentricity(from=fv, x=fv)) } - ## Compute the landmark set - eps_lm <- landmarks(x=fv, k=self$k, seed_index=self$seed_index) + ## Compute the set of k-nhds + C = c() # create an empty list to store indices of centers + k_nhds = list() # create empty list of lists to store points in each neighborhood + ptsLeft = c(1:f_size) # keep track of indices that are still available to be chosen as a center + + nextC = self$seed_index # use the seed as the first center + while(TRUE){ + # add the new center to the list and compute its k-neighborhood + C = append(C, nextC) + neighbors = proxy::dist(matrix(fv[nextC,], ncol=f_dim), fv, method = dist_method) + sortedNeighbors = sort(neighbors) + + # include all points that are equidistant from the center, even if those points create a nhd size > k + i = 1 + while( ((self$k+i) <= f_size) && (sortedNeighbors[self$k] == sortedNeighbors[self$k + i]) ){i = i + 1} + nhd = order(neighbors)[1:(self$k + i - 1)] + k_nhds = append(k_nhds, list(nhd)) + + # points that are included in a nhd are no longer eligible to be chosen as centers + ptsLeft = setdiff(ptsLeft, nhd) + if(length(ptsLeft) <= 0){break} + + # select the next center + dists = proxy::dist(matrix(fv[C,], ncol=f_dim), matrix(fv[ptsLeft,], ncol=f_dim), method = dist_method) + sortedDists = matrix(apply(dists,2,sort),ncol=length(ptsLeft)) + max = which.max(sortedDists[1,]) + nextC = ptsLeft[max] + } ## Construct the index set and level sets - self$index_set <- as.character(attr(eps_lm,"names")) - self$level_sets <- eps_lm + self$index_set = as.character(C) + self$level_sets = structure(k_nhds, names=self$index_set) } if (!missing(index)){ return(self$level_sets[[index]]) } diff --git a/R/landmarks.R b/R/landmarks.R index 87a62fc..ad0906c 100644 --- a/R/landmarks.R +++ b/R/landmarks.R @@ -12,10 +12,10 @@ #' @references De Silva, Vin, and Gunnar E. Carlsson. "Topological estimation using witness complexes." SPBG 4 (2004): 157-166. #' @references Dłotko, Paweł. "Ball Mapper: A Shape Summary for Topological Data Analysis." (2019). Web. #' @export -landmarks <- function(x, n=NULL, eps=NULL, k=NULL, dist_method = "euclidean", seed_index = 1, shuffle_data=FALSE){ +landmarks <- function(x, n=NULL, eps=NULL, dist_method = "euclidean", seed_index = 1, shuffle_data=FALSE){ stopifnot(is.matrix(x)) stopifnot(seed_index >= 1L && seed_index <= nrow(x)) - stopifnot(!is.null(n) || !is.null(eps) || !is.null(k)) # must specify a number of balls, a radius, or k-neighborhood + stopifnot(!is.null(n) || !is.null(eps)) # must specify a number of balls or a radius shuffle_idx <- NA if (shuffle_data){ @@ -64,49 +64,6 @@ landmarks <- function(x, n=NULL, eps=NULL, k=NULL, dist_method = "euclidean", se d = orderedIndices[max,1] } landmark_idx = unlist(C) - } else if (!is.null(k)){ - stopifnot(toupper(dist_method) %in% toupper(proxy::pr_DB$get_entry_names())) - - # STEP 1: Pick point in the space (seed) and add it to the list of centers/landmarks - C = list(seed_index) - - # STEP 2: Compute distance between landmark set and each point in the space - dists = proxy::dist(x[seed_index], x, method = dist_method) - equivalent_pts = which(dists == 0) - - if(length(equivalent_pts) > k){ - k_nhds = list(equivalent_pts) - } else{ - k_nhds = list(order(dists)[1:k]) - } - pt_list = k_nhds[[1]] - - while(TRUE){ - # STEP 2: Compute distance between landmark set and each point in the space - dists = sapply(pt_list, function(pt) { - proxy::dist(x[pt], x, method = dist_method) - }) - orderedIndices = t(apply(dists,1, sort)) - - max_vector = which(orderedIndices[,1] == max(orderedIndices[,1])) - max = max_vector[1] - - if(length(max_vector) > k){ - new_pts = max_vector - } else{ - new_pts = order(proxy::dist(x[max], x, method = dist_method))[1:k] - } - - # Continue until all points are within a k-neighborhood - if(!(max %in% pt_list)){ - C = append(C,max) - k_nhds = append(k_nhds, list(new_pts)) - pt_list = append(pt_list, new_pts) - } else{ break } - } - - idx_list = as.character(unlist(C)) - landmark_idx = structure(k_nhds, names=idx_list) } if (is.na(shuffle_idx)){ landmark_idx } else { shuffle_idx[landmark_idx] } } From eb977e6dba843bec9293ce67f480f4b88b8f7b3a Mon Sep 17 00:00:00 2001 From: yaraskaf <56359026+yaraskaf@users.noreply.github.com> Date: Thu, 16 Apr 2020 18:11:50 -0400 Subject: [PATCH 22/25] refactor(landmarks): eliminate unnecessary code in epsilon-net computation Refactor code for epsilon-landmarks in landmarks.R, update documentation --- R/landmarks.R | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/R/landmarks.R b/R/landmarks.R index ad0906c..14045cc 100644 --- a/R/landmarks.R +++ b/R/landmarks.R @@ -4,8 +4,11 @@ #' \code{x}. Maxmin is a simple greedy algorithm that is relatively efficient, but it has a tendency to pick out extremal points. #' If the distance metric is euclidean, an efficient Rcpp implementation is used. If another metric is requested, #' the algorithm is performed in R. +#' Alternatively, a radius \deqn{\epsilon} can be specified so that the landmark set is computed to cover the space with +#' \deqn{\epsilon}-balls centered at the landmarks. #' @param x a data matrix. #' @param n the number of landmarks requested. +#' @param eps the desired radius of balls in the cover set. #' @param dist_method the distance metric to use. Any distance measure in the \code{proxy} package is supported. #' @param seed_index the first landmark to seed the algorithm. #' @param shuffle_data whether to first randomly shuffle the data. @@ -42,28 +45,25 @@ landmarks <- function(x, n=NULL, eps=NULL, dist_method = "euclidean", seed_index } } else if(!is.null(eps)){ stopifnot(toupper(dist_method) %in% toupper(proxy::pr_DB$get_entry_names())) + f_dim <- ncol(x) + f_size <- nrow(x) - # STEP 1: Pick point in the space (seed) and add it to the list of centers/landmarks - C = list(seed_index) - f_C = matrix(x[seed_index,], nrow=1, byrow=TRUE) + # algorithm and variable names as specified in Dlotko paper + C = c() # create a list to store indices of centers/landmarks + next_pt = seed_index # first landmark should be the seed point + while(TRUE){ + C = append(C, next_pt) # add new point to list of landmarks - # STEP 2: Compute distance between landmark set and each point in the space - dists = proxy::dist(f_C, x, method = dist_method) - max = which.max(dists) - d = dists[max] + # compute distance between landmark set and each point in the space + dists = proxy::dist(matrix(x[C,], ncol=f_dim), x, method = dist_method) + sortedDists = matrix(apply(dists,2,sort),ncol=f_size) - # Continue until distance is less than epsilon (i.e. stop when all points are contained within an epsilon-ball) - while(d >= eps){ - C = append(C, max) - f_C = rbind(f_C, x[max,]) - - # STEP 2: Compute distance between landmark set and each point in the space - dists = proxy::dist(f_C, x, method = dist_method) - orderedIndices = t(apply(dists,2, sort)) - max = which.max(orderedIndices[,1]) - d = orderedIndices[max,1] + # the next landmark is the point with greatest distance from the current landmark set + next_pt = which.max(sortedDists[1,]) + d = sortedDists[1,next_pt] # done when this max distance is < eps, i.e. when all pts are contained in an eps-ball + if(d < eps){break} } - landmark_idx = unlist(C) + landmark_idx = C } if (is.na(shuffle_idx)){ landmark_idx } else { shuffle_idx[landmark_idx] } } From c5f9c3fa678233eaec520a35f71f52330482127b Mon Sep 17 00:00:00 2001 From: yaraskaf <56359026+yaraskaf@users.noreply.github.com> Date: Thu, 16 Apr 2020 20:26:42 -0400 Subject: [PATCH 23/25] refactor(LandmarkBallCover): remove unnecessary code from construct_cover --- R/LandmarkBallCover.R | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/R/LandmarkBallCover.R b/R/LandmarkBallCover.R index f09a1d5..88982d0 100644 --- a/R/LandmarkBallCover.R +++ b/R/LandmarkBallCover.R @@ -98,25 +98,18 @@ LandmarkBallCover$set("public", "construct_cover", function(filter, index=NULL){ ## Calculate an epsilon if one was not given if (!is.null(self$num_sets)) { - if(length(eps_lm) == 1){ - orderedIndices = apply(dist_to_lm,1, sort) - max = which.max(orderedIndices) - } else { - orderedIndices = t(apply(dist_to_lm,1, sort)) - max = which.max(orderedIndices[,1]) - } - self$epsilon = dist_to_lm[max] # ball radius should be distance of the farthest point from the landmark set so that all points are in at least one ball + sortedDists = matrix(apply(dist_to_lm,1,sort),nrow=f_size,byrow=TRUE) + max = which.max(sortedDists[,1]) + self$epsilon = sortedDists[max,1] # radius should be distance of the farthest pt from the landmark set so that all pts are in at least one ball } - ## Construct the preimages - if(length(eps_lm) == 1) { - self$level_sets <- structure(as.list(list(t(apply(dist_to_lm, 2, pts_within_eps))[1,])), names=self$index_set) - } else { - x = apply(dist_to_lm, 2, pts_within_eps) - # if all level sets contain the same number of points, apply returns a matrix -> need to split columns into list elements - if(is.matrix(x)){ self$level_sets <- structure(split(x, rep(1:ncol(x), each = nrow(x))), names=self$index_set) - } else { self$level_sets <- structure(as.list(x), names=self$index_set) } - } + x = apply(dist_to_lm, 2, pts_within_eps) + + # if all level sets contain the same number of points, apply returns a matrix -> need to split columns into list elements + if(is.matrix(x)){ self$level_sets <- structure(split(x, rep(1:ncol(x), each = nrow(x))), names=self$index_set) + } else { self$level_sets <- structure(as.list(x), names=self$index_set) } + + print(self$level_sets) } if (!missing(index)){ return(self$level_sets[[index]]) } From 7ef71eccb7b407bcb0d01d690f98d005244f665d Mon Sep 17 00:00:00 2001 From: yaraskaf <56359026+yaraskaf@users.noreply.github.com> Date: Mon, 27 Apr 2020 22:51:43 -0400 Subject: [PATCH 24/25] fix(LandmarkBallCover, NeighborhoodCover): Change list() to c() to avoid bug with pasting lists of parameters. --- R/LandmarkBallCover.R | 4 +--- R/NeighborhoodCover.R | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/R/LandmarkBallCover.R b/R/LandmarkBallCover.R index 88982d0..251f5ed 100644 --- a/R/LandmarkBallCover.R +++ b/R/LandmarkBallCover.R @@ -34,7 +34,7 @@ LandmarkBallCover <- R6::R6Class( #' @export LandmarkBallCover$set("public", "initialize", function(...){ super$initialize(typename="landmark_ball") - params <- list(...) + params <- c(...) if ("epsilon" %in% names(params)){ self$epsilon <- params[["epsilon"]] } if ("num_sets" %in% names(params)){ self$num_sets <- params[["num_sets"]] } if ("seed_index" %in% names(params)){ self$seed_index <- params[["seed_index"]] } @@ -108,8 +108,6 @@ LandmarkBallCover$set("public", "construct_cover", function(filter, index=NULL){ # if all level sets contain the same number of points, apply returns a matrix -> need to split columns into list elements if(is.matrix(x)){ self$level_sets <- structure(split(x, rep(1:ncol(x), each = nrow(x))), names=self$index_set) } else { self$level_sets <- structure(as.list(x), names=self$index_set) } - - print(self$level_sets) } if (!missing(index)){ return(self$level_sets[[index]]) } diff --git a/R/NeighborhoodCover.R b/R/NeighborhoodCover.R index 12a0d4e..133cb89 100644 --- a/R/NeighborhoodCover.R +++ b/R/NeighborhoodCover.R @@ -28,7 +28,7 @@ NeighborhoodCover <- R6::R6Class( #' @export NeighborhoodCover$set("public", "initialize", function(...){ super$initialize(typename="neighborhood") - params <- list(...) + params <- c(...) if ("k" %in% names(params)){ self$k <- params[["k"]] } if ("seed_index" %in% names(params)){ self$seed_index <- params[["seed_index"]] } if ("seed_method" %in% names(params)){ self$seed_method <- params[["seed_method"]] } From c91b39b284e229af6b36d151a5a0d1b832b826a4 Mon Sep 17 00:00:00 2001 From: yaraskaf <56359026+yaraskaf@users.noreply.github.com> Date: Tue, 5 May 2020 13:28:56 -0400 Subject: [PATCH 25/25] fix(LandmarkBallCover): fix printing of epsilon format method --- R/LandmarkBallCover.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/LandmarkBallCover.R b/R/LandmarkBallCover.R index 251f5ed..c93ba5e 100644 --- a/R/LandmarkBallCover.R +++ b/R/LandmarkBallCover.R @@ -64,7 +64,7 @@ LandmarkBallCover$set("public", "format", function(...){ if (!is.null(self$num_sets)) { sprintf("%s Cover: (number of sets = %s, seed index = %s)", titlecase(private$.typename), self$num_sets, self$seed_index) }else if (!is.null(self$epsilon)){ - sprintf("%s Cover: (epsilon = %.2f, seed index = %s)", titlecase(private$.typename), self$epsilon, self$seed_index) + sprintf("%s Cover: (epsilon = %s, seed index = %s)", titlecase(private$.typename), self$epsilon, self$seed_index) } })