Skip to content
This repository was archived by the owner on Jun 9, 2023. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
d83d236
feat(LandmarkBallCover): initial commit
yaraskaf Feb 24, 2020
4332a69
chore: add Mapper.dll to .gitignore
yaraskaf Feb 24, 2020
05b143f
refactor(LandmarkBallCover): rename YS_BallCover to LandmarkBallCover
yaraskaf Feb 24, 2020
0839786
refactor(LandmarkBallCover): rename typename ys_ball to landmark_ball
yaraskaf Feb 24, 2020
5568886
refactor(LandmarkBallCover): remove print statements
yaraskaf Feb 24, 2020
5275dfa
feat(LandmarkBallCover): add fixed epsilon width balls
yaraskaf Mar 5, 2020
848787f
feat(LandmarkBallCover): add option to use filtered value of maximum …
yaraskaf Mar 5, 2020
c02ad62
fix(LandmarkBallCover): Fix bug with apply when all balls have same n…
yaraskaf Mar 5, 2020
4863777
feat(Landmarks): add ability to use any distance metric in pr_DB to c…
yaraskaf Mar 5, 2020
984f1fd
feat(LandmarkBallCover): add appropriate validation for cover parameters
yaraskaf Mar 5, 2020
84dd145
feat(NeighborhoodCover): initial commit
yaraskaf Mar 9, 2020
187899c
fix(NeighborhoodCover): fix bug where not all points are included in …
yaraskaf Mar 9, 2020
3e666ec
fix(NeighborhoodCover): fix bug with >k repeated lensed values
yaraskaf Mar 11, 2020
d371c48
docs: Update documentation for NeighborhoodCover.R and LandmarkBallCo…
yaraskaf Apr 15, 2020
8f41462
docs: Update documentation for NeighborhoodCover.R and LandmarkBallCo…
yaraskaf Apr 15, 2020
f1e7983
refactor(LandmarkBallCover): cleaned up code in construct_cover
yaraskaf Apr 16, 2020
6f4f3dd
refactor(NeighborhoodCover): cleaned up code in construct_cover
yaraskaf Apr 16, 2020
f71639e
refactor(landmarks): remove comments
yaraskaf Apr 16, 2020
a1fd467
fix(LandmarkBallCover): fix bug where num_sets > unique filtered points
yaraskaf Apr 16, 2020
45e4bb9
refactor(landmarks): removed unnecessary loop in eps-landmark calcula…
yaraskaf Apr 16, 2020
dffbbb8
refactor(NeighborhoodBallCover): cleaned up code to construct cover sets
yaraskaf Apr 16, 2020
eb977e6
refactor(landmarks): eliminate unnecessary code in epsilon-net comput…
yaraskaf Apr 16, 2020
c5f9c3f
refactor(LandmarkBallCover): remove unnecessary code from construct_c…
yaraskaf Apr 17, 2020
7ef71ec
fix(LandmarkBallCover, NeighborhoodCover):
yaraskaf Apr 28, 2020
c91b39b
fix(LandmarkBallCover): fix printing of epsilon format method
yaraskaf May 5, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,4 @@ ignore/
doc
Meta
*cache
src/Mapper.dll
67 changes: 34 additions & 33 deletions R/CoverRef.R
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
#' Cover abstract class
#' @aliases cover
#' @description Reference Class (R6) implementation of a Cover. This class is meant to act as an abstract class to derive other
#' @description Reference Class (R6) implementation of a Cover. This class is meant to act as an abstract class to derive other
#' types of covering generators with. Minimally, a derived covering class must implement the
#' 'construct_cover' method to populate the 'level_sets' list with point indices, and any parameters
#' that the derived class requires.
#'
#' Additional methods may also be added to improve the efficiency of the cover.
#' See the \href{https://peekxc.github.io/Mapper/articles/UsingCustomCover.html}{vignette} on creating a custom
#' cover for details.
#'
#' 'construct_cover' method to populate the 'level_sets' list with point indices, and any parameters
#' that the derived class requires.
#'
#' Additional methods may also be added to improve the efficiency of the cover.
#' See the \href{https://peekxc.github.io/Mapper/articles/UsingCustomCover.html}{vignette} on creating a custom
#' cover for details.
#'
#' @section Fields:
#' The following is a list of the fields available for derived classes. Each may be accessed
#' by the \code{self} environment. See \code{?R6} for more details.
#' The following is a list of the fields available for derived classes. Each may be accessed
#' by the \code{self} environment. See \code{?R6} for more details.
#' \itemize{
#' \item{\emph{level_sets}:}{ named list, indexed by \code{index_set}, whose values represent indices in the original data set to cluster over.}
#' \item{\emph{index_set}:}{ character vector of keys that uniquely index the open sets of the cover.}
Expand All @@ -20,14 +20,14 @@
#' @format An \code{\link{R6Class}} generator object
#' @author Matt Piekenbrock
#' @export CoverRef
CoverRef <- R6::R6Class("CoverRef",
CoverRef <- R6::R6Class("CoverRef",
private = list(
.level_sets = NULL,
.index_set = NULL,
.index_set = NULL,
.typename = character(0)
),
),
lock_class = FALSE, ## Feel free to add your own members
lock_objects = FALSE ## Or change existing ones
lock_objects = FALSE ## Or change existing ones
)

## Cover initialization
Expand All @@ -43,7 +43,7 @@ CoverRef$set("public", "format", function(...){

## Typename field
## typename ----
CoverRef$set("active", "typename",
CoverRef$set("active", "typename",
function(value){
if (missing(value)){ private$.typename } else {
stop("Cover 'typename' member is read-only.")
Expand All @@ -53,7 +53,7 @@ CoverRef$set("active", "typename",
## The index set may be composed of any data type, but the collection of indices must uniquely
## index the level sets list via the `[[` operator.
## index_set ----
CoverRef$set("active", "index_set",
CoverRef$set("active", "index_set",
function(value){
if (missing(value)){
private$.index_set
Expand All @@ -65,10 +65,10 @@ CoverRef$set("active", "index_set",
}
})

## The level sets must be a list indexed by the index set. If the list is named, a check is performed to make sure the
## names match the values of the index set, and in the proper order. Otherwise, the order is assumed to be correct.
## The level sets must be a list indexed by the index set. If the list is named, a check is performed to make sure the
## names match the values of the index set, and in the proper order. Otherwise, the order is assumed to be correct.
## level_sets ----
CoverRef$set("active", "level_sets",
CoverRef$set("active", "level_sets",
function(value){
if (missing(value)){
private$.level_sets
Expand All @@ -80,31 +80,31 @@ CoverRef$set("active", "level_sets",
}
)

## Default cover
## Default cover
## construct_cover ----
CoverRef$set("public", "construct_cover", function(index=NULL){
stop("Base class cover construction called. This method must be overridden to be used.")
})

## Given an index in the index set, returns the indices of point intersect the image
## of f in the cover. The default method relies on the construct_cover method.
## Given an index in the index set, returns the indices of point intersect the image
## of f in the cover. The default method relies on the construct_cover method.
# CoverRef$set("public", "construct_pullback", function(index){
#
#
# })


## Which indices of the index set should be compared in constructing the k-simplices?
## This can be customized based on the cover to (dramatically) reduce
## the number of intersection checks needed to generate the k-skeletons, where k >= 1.
## Defaults to every pairwise combination of level sets.
## Which indices of the index set should be compared in constructing the k-simplices?
## This can be customized based on the cover to (dramatically) reduce
## the number of intersection checks needed to generate the k-skeletons, where k >= 1.
## Defaults to every pairwise combination of level sets.
## neighborhood ----
CoverRef$set("public", "neighborhood", function(filter, k=1){
if (length(private$.index_set) <= 1L){ return(NULL) }
k_combs <- t(combn(length(private$.index_set), k+1))
relist(private$.index_set[k_combs], k_combs)
})

## Validates that the constructed cover is indeed a valid cover.
## Validates that the constructed cover is indeed a valid cover.
## validate ----
CoverRef$set("public", "validate", function(){
if ( length(private$.index_set) != length(private$.level_sets) ){
Expand All @@ -129,13 +129,14 @@ covers_available <- function(){
line_format <- " %-28s %-34s %-15s"
writeLines(c(
sprintf("Typename:%-20sGenerator:%-25sParameters:%-26s", "", "", ""),
sprintf(line_format, "fixed interval", "FixedIntervalCover", paste0(c("number_intervals", "percent_overlap"), collapse = ", ")),
sprintf(line_format, "fixed interval", "FixedIntervalCover", paste0(c("number_intervals", "percent_overlap"), collapse = ", ")),
sprintf(line_format, "restrained interval", "RestrainedIntervalCover", paste0(c("number_intervals", "percent_overlap"), collapse = ", ")),
# sprintf(line_format, "adaptive", "AdaptiveCover", paste0(c("number_intervals", "percent_overlap", "quantile_method"), collapse = ", ")),
sprintf(line_format, "ball", "BallCover", paste0("epsilon", collapse = ", "))
sprintf(line_format, "ball", "BallCover", paste0("epsilon", collapse = ", ")),
sprintf(line_format, "landmark_ball", "LandmarkBallCover", paste0(c("epsilon", "num_sets", "seed_index", "seed_method"), collapse = ", ")),
sprintf(line_format, "neighborhood", "NeighborhoodCover", paste0(c("k", "seed_index", "seed_method"), collapse = ", "))
))
}

# TODO: add ability to convert cover to new type to preserve copying the filter points multiple times, or
# move the filter points to the MapperRef object

# TODO: add ability to convert cover to new type to preserve copying the filter points multiple times, or
# move the filter points to the MapperRef object
116 changes: 116 additions & 0 deletions R/LandmarkBallCover.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
#' Landmark Ball Cover
#'
#' @docType class
#' @description This class provides a cover whose open sets are formed by balls centered about each
#' point in a landmark set. Given a radius \deqn{\epsilon}, choose a set of landmark points via the
#' algorithm presented in Dłotko to produce a cover by balls of radius \deqn{\epsilon}. Alternatively,
#' given a number of cover sets \code{n}, choose \code{n} landmarks via maxmin algorithm. If no seed
#' or seed_method is specified, default behavior uses the first data point as the seed.
#'
#' This differs from BallCover.R in that it does NOT union intersecting cover sets.
#'
#' Using this class requires the \code{RANN} package to be installed, and thus explicitly assumes
#' the filter space endowed with the euclidean metric.
#'
#' @field epsilon := radius of the ball to form around each landmark point
#' @field num_sets := desired number of balls/cover sets
#' @field seed_index := index of data point to use as the seed for landmark set calculation
#' @field seed_method := method to select a seed ("SPEC" : user specified index | "RAND" : random index
#' | "ECC" : point with highest eccentricity in the filter space)
#' @author Yara Skaf, Cory Brunsion
#' @family cover
#' @references Dłotko, Paweł. "Ball Mapper: A Shape Summary for Topological Data Analysis." (2019). Web.

library(proxy)

#' @export
LandmarkBallCover <- R6::R6Class(
classname = "LandmarkBallCover",
inherit = CoverRef,
public = list(epsilon=NULL, num_sets=NULL, seed_index=1, seed_method="SPEC")
)

## initialize ------
#' @export
LandmarkBallCover$set("public", "initialize", function(...){
super$initialize(typename="landmark_ball")
params <- c(...)
if ("epsilon" %in% names(params)){ self$epsilon <- params[["epsilon"]] }
if ("num_sets" %in% names(params)){ self$num_sets <- params[["num_sets"]] }
if ("seed_index" %in% names(params)){ self$seed_index <- params[["seed_index"]] }
if ("seed_method" %in% names(params)){ self$seed_method <- params[["seed_method"]] }
})

## validate ------
LandmarkBallCover$set("public", "validate", function(filter){
## Get filter values
fv <- filter()
f_size <- nrow(fv)

## validate parameters
stopifnot(!is.null(self$epsilon) || !is.null(self$num_sets)) # require either radius or # balls
stopifnot(is.null(self$num_sets) || (self$num_sets <= f_size && self$num_sets > 0)) # cannot have more cover sets than data points
stopifnot(is.null(self$epsilon) || self$epsilon >= 0) # radius must be positive
stopifnot(self$seed_index <= f_size && self$seed_index > 0) # seed index must be within the range of the data indices
stopifnot(all(self$seed_method == "RAND") || all(self$seed_method == "SPEC") || all(self$seed_method == "ECC")) # must use one of available seed methods
})

## format ----
LandmarkBallCover$set("public", "format", function(...){
titlecase <- function(x){
s <- strsplit(x, " ")[[1]]
paste(toupper(substring(s, 1, 1)), substring(s, 2), sep = "", collapse = " ")
}
if (!is.null(self$num_sets)) {
sprintf("%s Cover: (number of sets = %s, seed index = %s)", titlecase(private$.typename), self$num_sets, self$seed_index)
}else if (!is.null(self$epsilon)){
sprintf("%s Cover: (epsilon = %s, seed index = %s)", titlecase(private$.typename), self$epsilon, self$seed_index)
}
})

## construct_cover ------
LandmarkBallCover$set("public", "construct_cover", function(filter, index=NULL){
if (!requireNamespace("RANN", quietly = TRUE)){
stop("Package \"RANN\" is needed for to use this cover.", call. = FALSE)
}
self$validate(filter)

## Get filter values
fv <- filter()
f_size <- nrow(fv)

## Construct the balls
if(is.null(index)){
## Set the seed index if necessary
if(all(self$seed_method == "RAND")) { self$seed_index = sample(1:f_size, 1) }
if(all(self$seed_method == "ECC")) { self$seed_index = which.max(eccentricity(from=fv, x=fv)) }

## Compute the landmark set
if (!is.null(self$num_sets)) { eps_lm <- unique(landmarks(x=fv, n=self$num_sets, seed_index=self$seed_index))
} else if (!is.null(self$epsilon)) { eps_lm <- landmarks(x=fv, eps=self$epsilon, seed_index=self$seed_index) }

## Construct the index set
self$index_set <- as.character(eps_lm)

## Get distance from each point to landmarks
dist_to_lm <- proxy::dist(fv, fv[eps_lm,,drop=FALSE])
pts_within_eps <- function(lm_dist){ which(lm_dist <= self$epsilon) }

## Calculate an epsilon if one was not given
if (!is.null(self$num_sets)) {
sortedDists = matrix(apply(dist_to_lm,1,sort),nrow=f_size,byrow=TRUE)
max = which.max(sortedDists[,1])
self$epsilon = sortedDists[max,1] # radius should be distance of the farthest pt from the landmark set so that all pts are in at least one ball
}

x = apply(dist_to_lm, 2, pts_within_eps)

# if all level sets contain the same number of points, apply returns a matrix -> need to split columns into list elements
if(is.matrix(x)){ self$level_sets <- structure(split(x, rep(1:ncol(x), each = nrow(x))), names=self$index_set)
} else { self$level_sets <- structure(as.list(x), names=self$index_set) }
}
if (!missing(index)){ return(self$level_sets[[index]]) }

## Always return self
invisible(self)
})
114 changes: 114 additions & 0 deletions R/NeighborhoodCover.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
#' Neighborhood Cover
#'
#' @docType class
#' @description This class provides a cover whose open sets are formed by \code{k}-neighborhoods about a landmark
#' set. If no seed or seed_method is specified, default behavior uses the first data point as the seed. Cover sets
#' may contain more than \code{k} points if there are more than \code{k} points equidistant from the central point.
#' Using this class requires the \code{RANN} package to be installed, and thus explicitly assumes the filter space
#' endowed with the euclidean metric.
#'
#' @field k := desired number of neighbord to include in a cover set
#' @field seed_index := index of data point to use as the seed for landmark set calculation
#' @field seed_method := method to select a seed ("SPEC" : user specified index | "RAND" : random index
#' | "ECC" : point with highest eccentricity in the filter space)
#' @author Yara Skaf, Cory Brunsion
#' @family cover
#' @export

library(proxy)

#' @export
NeighborhoodCover <- R6::R6Class(
classname = "NeighborhoodCover",
inherit = CoverRef,
public = list(k=NULL, seed_index=1, seed_method="SPEC")
)

## initialize ------
#' @export
NeighborhoodCover$set("public", "initialize", function(...){
super$initialize(typename="neighborhood")
params <- c(...)
if ("k" %in% names(params)){ self$k <- params[["k"]] }
if ("seed_index" %in% names(params)){ self$seed_index <- params[["seed_index"]] }
if ("seed_method" %in% names(params)){ self$seed_method <- params[["seed_method"]] }
})

## validate ------
NeighborhoodCover$set("public", "validate", function(filter){
## Get filter values
fv <- filter()
f_size <- nrow(fv)

## validate parameters
stopifnot(!is.null(self$k)) # require nieghborhood size
stopifnot(self$k >= 2 && self$k <= f_size) # size must be at least 2 points
stopifnot(self$seed_index <= f_size && self$seed_index > 0) # seed index must be within the range of the data indices
stopifnot(all(self$seed_method == "RAND") || all(self$seed_method == "SPEC") || all(self$seed_method == "ECC")) # must use one of available seed methods
})

## format ----
NeighborhoodCover$set("public", "format", function(...){
titlecase <- function(x){
s <- strsplit(x, " ")[[1]]
paste(toupper(substring(s, 1, 1)), substring(s, 2), sep = "", collapse = " ")
}
sprintf("%s Cover: (k = %s, seed index = %s)", titlecase(private$.typename), self$k, self$seed_index)
})

## construct_cover ------
NeighborhoodCover$set("public", "construct_cover", function(filter, index=NULL, dist_method = "euclidean"){
if (!requireNamespace("RANN", quietly = TRUE)){
stop("Package \"RANN\" is needed for to use this cover.", call. = FALSE)
}
self$validate(filter)
stopifnot(toupper(dist_method) %in% toupper(proxy::pr_DB$get_entry_names()))

## Get filter values
fv <- filter()
f_dim <- ncol(fv)
f_size <- nrow(fv)

if(is.null(index)){
## Set the seed index if necessary
if(all(self$seed_method == "RAND")) { self$seed_index = sample(1:f_size, 1) }
if(all(self$seed_method == "ECC")) { self$seed_index = which.max(eccentricity(from=fv, x=fv)) }

## Compute the set of k-nhds
C = c() # create an empty list to store indices of centers
k_nhds = list() # create empty list of lists to store points in each neighborhood
ptsLeft = c(1:f_size) # keep track of indices that are still available to be chosen as a center

nextC = self$seed_index # use the seed as the first center
while(TRUE){
# add the new center to the list and compute its k-neighborhood
C = append(C, nextC)
neighbors = proxy::dist(matrix(fv[nextC,], ncol=f_dim), fv, method = dist_method)
sortedNeighbors = sort(neighbors)

# include all points that are equidistant from the center, even if those points create a nhd size > k
i = 1
while( ((self$k+i) <= f_size) && (sortedNeighbors[self$k] == sortedNeighbors[self$k + i]) ){i = i + 1}
nhd = order(neighbors)[1:(self$k + i - 1)]
k_nhds = append(k_nhds, list(nhd))

# points that are included in a nhd are no longer eligible to be chosen as centers
ptsLeft = setdiff(ptsLeft, nhd)
if(length(ptsLeft) <= 0){break}

# select the next center
dists = proxy::dist(matrix(fv[C,], ncol=f_dim), matrix(fv[ptsLeft,], ncol=f_dim), method = dist_method)
sortedDists = matrix(apply(dists,2,sort),ncol=length(ptsLeft))
max = which.max(sortedDists[1,])
nextC = ptsLeft[max]
}

## Construct the index set and level sets
self$index_set = as.character(C)
self$level_sets = structure(k_nhds, names=self$index_set)
}
if (!missing(index)){ return(self$level_sets[[index]]) }

## Always return self
invisible(self)
})
Loading