Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: stream
Version: 1.5-0.1
Date: 2021-xx-xx
Version: 1.5-1.0
Date: 2021-10-11
Encoding: UTF-8
Title: Infrastructure for Data Stream Mining
Authors@R: c(person("Michael", "Hahsler", role = c("aut", "cre", "cph"),
Expand All @@ -14,8 +14,9 @@ Authors@R: c(person("Michael", "Hahsler", role = c("aut", "cre", "cph"),
Description: A framework for data stream modeling and associated data mining tasks such as clustering and classification. The development of this package was supported in part by NSF IIS-0948893 and NIH R21HG005912. Hahsler et al (2017) <doi:10.18637/jss.v076.i14>.
Depends: R (>= 3.5.0), methods, registry, proxy (>= 0.4-7)
Imports: clue, cluster, clusterGeneration, dbscan (>= 1.0-0), fpc, graphics, grDevices, MASS, mlbench, Rcpp (>= 0.11.4), stats, utils
Suggests: animation, DBI, rJava, RSQLite, testthat
Suggests: animation, DBI, rJava, RSQLite, testthat, ggplot2
URL: https://github.com/mhahsler/stream
BugReports: https://github.com/mhahsler/stream/issues
LinkingTo: Rcpp, BH
LinkingTo: Rcpp, BH, RcppEigen
RcppModules: SHCModule, SigmaIndexModule
License: GPL-3
26 changes: 25 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,16 @@ export(

description,

DSC_registry
DSC_registry,

SHCAgglomerationType,
SHCDriftType,
DSC_SHC.behavioral,
DSC_SHC.man,
SHCEvalCallback,
stream.SHC,
stream.SHC.clone,
stream.SHC.man
)

### update
Expand Down Expand Up @@ -247,3 +256,18 @@ S3method(add_keyframe, MGC_Linear)
S3method(get_keyframes, MGC_Linear)
S3method(remove_keyframe, MGC_Linear)

S3method(get_stats, DSC_SHC)
S3method(get_microclusters, DSC_SHC)
S3method(get_microweights, DSC_SHC)
S3method(get_macroclusters, DSC_SHC)
S3method(get_macroweights, DSC_SHC)
S3method(microToMacro, DSC_SHC)
S3method(get_assignment, DSC_SHC)
S3method(get_outlier_positions, DSC_SHC)
S3method(recheck_outlier, DSC_SHC)
S3method(clean_outliers, DSC_SHC)
S3method(plot, DSC_SHC)
S3method(getHistogram, DSC_SHC)
S3method(clearEigenMPSupport, DSC_SHC)

S3method(evaluate_callback, SHCEvalCallback)
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# stream 1.5-1.0 (10/11/21)

## New Features
* Added DSC_SHC. Code and Interface by Dalibor Krleža.

# stream 1.5-0.1 (xx/xx/21)

## Bug Fixes
Expand Down
4 changes: 4 additions & 0 deletions R/AAAregistry.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,7 @@ DSC_registry$set_field("DSC_Micro", type = "logical",
is_key = TRUE)
DSC_registry$set_field("DSC_Macro", type = "logical",
is_key = TRUE)
DSC_registry$set_field("DSC_Outlier", type = "logical",
is_key = TRUE)
DSC_registry$set_field("DSC_SinglePass", type = "logical",
is_key = TRUE)
2 changes: 1 addition & 1 deletion R/DSC_BICO.R
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,6 @@ BICO_R$methods(
)

DSC_registry$set_entry(name = "DSC_BICO",
DSC_Micro = TRUE, DSC_Macro = FALSE,
DSC_Micro = TRUE, DSC_Macro = FALSE, DSC_Outlier = FALSE, DSC_SinglePass = FALSE,
description = "BICO - Fast computation of k-means coresets")

2 changes: 1 addition & 1 deletion R/DSC_BIRCH.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,6 @@ birch$methods(


DSC_registry$set_entry(name = "DSC_BIRCH",
DSC_Micro = TRUE, DSC_Macro = FALSE,
DSC_Micro = TRUE, DSC_Macro = FALSE, DSC_Outlier = FALSE, DSC_SinglePass = FALSE,
description = "BIRCH - Balanced Iterative Reducing Clustering using Hierarchies")

2 changes: 1 addition & 1 deletion R/DSC_DBSCAN.R
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,6 @@ DBSCAN$methods(
)

DSC_registry$set_entry(name = "DSC_DBSCAN",
DSC_Micro = FALSE, DSC_Macro = TRUE,
DSC_Micro = FALSE, DSC_Macro = TRUE, DSC_Outlier = FALSE, DSC_SinglePass = FALSE,
description = "DBSCAN Reclustering")

2 changes: 1 addition & 1 deletion R/DSC_DBSTREAM.R
Original file line number Diff line number Diff line change
Expand Up @@ -445,5 +445,5 @@ get_cluster_assignments <- function(x) {


DSC_registry$set_entry(name = "DSC_DBSTREAM",
DSC_Micro = TRUE, DSC_Macro = FALSE,
DSC_Micro = TRUE, DSC_Macro = FALSE, DSC_Outlier = FALSE, DSC_SinglePass = FALSE,
description = "DBSTREAM - density-based stream clustering with shared-density-based reclustering.")
2 changes: 1 addition & 1 deletion R/DSC_DStream.R
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,6 @@ get_assignment.DSC_DStream <- function(dsc, points, type=c("auto", "micro", "mac
}

DSC_registry$set_entry(name = "DSC_DStream",
DSC_Micro = TRUE, DSC_Macro = FALSE,
DSC_Micro = TRUE, DSC_Macro = FALSE, DSC_Outlier = FALSE, DSC_SinglePass = FALSE,
description = "DStream")

2 changes: 1 addition & 1 deletion R/DSC_EA.R
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,6 @@ EA_R$methods(


DSC_registry$set_entry(name = "DSC_EA",
DSC_Micro = FALSE, DSC_Macro = TRUE,
DSC_Micro = FALSE, DSC_Macro = TRUE, DSC_Outlier = FALSE, DSC_SinglePass = FALSE,
description = "EA - Reclustering using an evolutionary algorithm")

2 changes: 1 addition & 1 deletion R/DSC_Hierarchical.R
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,6 @@ hierarchical$methods(
)

DSC_registry$set_entry(name = "DSC_Hierarchical",
DSC_Micro = FALSE, DSC_Macro = TRUE,
DSC_Micro = FALSE, DSC_Macro = TRUE, DSC_Outlier = FALSE, DSC_SinglePass = FALSE,
description = "Hierarchical reclustering")

70 changes: 35 additions & 35 deletions R/DSC_Kmeans.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#######################################################################
# stream - Infrastructure for Data Stream Mining
# Copyright (C) 2013 Michael Hahsler, Matthew Bolanos, John Forrest
# Copyright (C) 2013 Michael Hahsler, Matthew Bolanos, John Forrest
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
Expand All @@ -16,17 +16,17 @@
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

### creator
### creator
DSC_Kmeans <- function(k, weighted = TRUE, iter.max = 10, nstart = 1,
algorithm = c("Hartigan-Wong", "Lloyd", "Forgy",
"MacQueen"),
"MacQueen"),
min_weight = NULL, description=NULL) {

algorithm <- match.arg(algorithm)
if(!is.null(description)) desc <- description
else if(weighted) desc <- "k-Means (weighted)"
else desc <-"k-Means"

structure(list(description = desc,
RObj = kmeans_refClass$new(
k=k, weighted=weighted, iter.max = iter.max, nstart = nstart,
Expand All @@ -35,7 +35,7 @@ DSC_Kmeans <- function(k, weighted = TRUE, iter.max = 10, nstart = 1,
}


kmeans_refClass <- setRefClass("kmeans",
kmeans_refClass <- setRefClass("kmeans",
fields = list(
k = "numeric",
weighted = "logical",
Expand All @@ -49,62 +49,62 @@ kmeans_refClass <- setRefClass("kmeans",
clusterWeights = "numeric",
details = "ANY",
min_weight = "numeric"
),
),

methods = list(
initialize = function(
k = 3,
weighted = TRUE,
iter.max = 10,
nstart = 1,
algorithm = c("Hartigan-Wong", "Lloyd",
algorithm = c("Hartigan-Wong", "Lloyd",
"Forgy","MacQueen"),
min_weight = NULL
) {
k <<- k

k <<- k
weighted <<- weighted
iter.max <<- iter.max
iter.max <<- iter.max
nstart <<- nstart
algorithm <<- match.arg(algorithm)
assignment <<- numeric()
weights <<- numeric()
clusterWeights <<- numeric()
assignment <<- numeric()
weights <<- numeric()
clusterWeights <<- numeric()
clusterCenters <<- data.frame()
data <<- data.frame()

if(is.null(min_weight)) min_weight <<- 0
else min_weight <<- min_weight

.self
}

),
)

kmeans_refClass$methods(
cluster = function(x, weight = rep(1,nrow(x)), ...) {
# if(nrow(x)==1)

# if(nrow(x)==1)
# warning("DSC_Kmeans does not support iterative updating! Old data is overwritten.")

### filter weak clusters
if(min_weight>0) {
x <- x[weight>min_weight,]
weight <- weight[weight>min_weight]
}


weights <<- weight
data <<- x

if(nrow(data)>k) {
if(weighted) km <- kmeansW(x=data, weight=weights, centers=k,
if(weighted) km <- kmeansW(x=data, weight=weights, centers=k,
iter.max = iter.max, nstart = nstart)
else km <- kmeans(x=data, centers=k,
else km <- kmeans(x=data, centers=k,
iter.max = iter.max, nstart = nstart,
algorithm = algorithm)

assignment <<- km$cluster
clusterCenters <<- data.frame(km$centers)
details <<- km
Expand All @@ -113,26 +113,26 @@ kmeans_refClass$methods(
clusterCenters <<- x
details <<- NULL
}

clusterWeights <<- sapply(1:k, FUN =
function(i) sum(weights[assignment==i]))

},

get_macroclusters = function(...) { clusterCenters },
get_macroweights = function(...) { clusterWeights },

get_microclusters = function(...) { data },
get_microweights = function(x) { weights },
microToMacro = function(micro=NULL, ...){

microToMacro = function(micro=NULL, ...){
if(is.null(micro)) micro <- 1:nrow(data)
structure(assignment[micro], names=micro)
}
}
)


DSC_registry$set_entry(name = "DSC_Kmeans",
DSC_Micro = FALSE, DSC_Macro = TRUE,
DSC_Micro = FALSE, DSC_Macro = TRUE, DSC_Outlier = FALSE, DSC_SinglePass = FALSE,
description = "K-means reclustering")

14 changes: 7 additions & 7 deletions R/DSC_Reachability.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#######################################################################
# stream - Infrastructure for Data Stream Mining
# Copyright (C) 2013 Michael Hahsler, Matthew Bolanos, John Forrest
# Copyright (C) 2013 Michael Hahsler, Matthew Bolanos, John Forrest
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
Expand All @@ -18,20 +18,20 @@

# Reachability and single-link hierarchical clustering are equivalent

### creator
### creator
DSC_Reachability <- function(epsilon, min_weight=NULL, description=NULL) {
hierarchical <- hierarchical$new(

hierarchical <- hierarchical$new(
h=epsilon, method="single", min_weight=min_weight)

if(is.null(description)) description <- "Reachability"

l <- list(description = description, RObj = hierarchical)
class(l) <- c("DSC_Reachability", "DSC_Hierarchical", "DSC_Macro", "DSC_R", "DSC")
l
}

DSC_registry$set_entry(name = "DSC_Reachability",
DSC_Micro = FALSE, DSC_Macro = TRUE,
DSC_Micro = FALSE, DSC_Macro = TRUE, DSC_Outlier = FALSE, DSC_SinglePass = FALSE,
description = "Reachability reclustering")

Loading