diff --git a/Docker/cnv-opt-canoes/Dockerfile b/Docker/cnv-opt-canoes/Dockerfile
new file mode 100644
index 0000000..b404cab
--- /dev/null
+++ b/Docker/cnv-opt-canoes/Dockerfile
@@ -0,0 +1,18 @@
+FROM ubuntu:xenial
+MAINTAINER biodatageeks <team@biodatageeks.ii.pw.edu.pl>
+
+RUN apt-get update
+RUN apt-get install -y software-properties-common
+RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9
+RUN add-apt-repository 'deb [arch=amd64,i386] https://cran.rstudio.com/bin/linux/ubuntu xenial/'
+RUN apt-get install -y apt-transport-https
+
+RUN apt-get update && \
+    apt-get upgrade -y && \
+    apt-get install -y r-base libssl-dev libssh2-1-dev libxml2-dev libcurl4-openssl-dev libpq-dev
+
+RUN Rscript -e "install.packages('nnls', repos = 'http://cran.us.r-project.org')"
+RUN Rscript -e "install.packages('Hmisc', repos = 'http://cran.us.r-project.org')"
+RUN Rscript -e "install.packages('mgcv', repos = 'http://cran.us.r-project.org')"
+RUN Rscript -e "install.packages('plyr', repos = 'http://cran.us.r-project.org')"
+RUN Rscript -e "install.packages('CANOES', repos = 'http://zsibio.ii.pw.edu.pl/nexus/repository/r-all')"
diff --git a/Docker/cnv-opt-canoescov/Dockerfile b/Docker/cnv-opt-canoescov/Dockerfile
new file mode 100644
index 0000000..fa7683b
--- /dev/null
+++ b/Docker/cnv-opt-canoescov/Dockerfile
@@ -0,0 +1,11 @@
+FROM biodatageeks/cnv-opt-canoes
+MAINTAINER biodatageeks <team@biodatageeks.ii.pw.edu.pl>
+
+RUN Rscript -e "source('https://bioconductor.org/biocLite.R');biocLite('IRanges')"
+RUN Rscript -e "source('https://bioconductor.org/biocLite.R');biocLite('BSgenome.Hsapiens.UCSC.hg19')"
+RUN Rscript -e "source('https://bioconductor.org/biocLite.R');biocLite('Biostrings')"
+RUN Rscript -e "source('https://bioconductor.org/biocLite.R');biocLite('Rsamtools')"
+RUN Rscript -e "source('https://bioconductor.org/biocLite.R');biocLite('GenomeInfoDb')"
+RUN Rscript -e "source('https://bioconductor.org/biocLite.R');biocLite('S4Vectors')"
+
+RUN Rscript -e "install.packages('CANOESCOV', repos = 'http://zsibio.ii.pw.edu.pl/nexus/repository/r-all')"
diff --git a/Docker/cnv-opt-cnv-simulator/Dockerfile b/Docker/cnv-opt-cnv-simulator/Dockerfile
new file mode 100644
index 0000000..d31a1d1
--- /dev/null
+++ b/Docker/cnv-opt-cnv-simulator/Dockerfile
@@ -0,0 +1,4 @@
+FROM biodatageeks/cnv-opt-codex
+MAINTAINER biodatageeks <team@biodatageeks.ii.pw.edu.pl>
+
+RUN Rscript -e "install.packages('CNV.SIMULATOR', repos = 'http://zsibio.ii.pw.edu.pl/nexus/repository/r-all')"
diff --git a/Docker/cnv-opt-codex/Dockerfile b/Docker/cnv-opt-codex/Dockerfile
new file mode 100644
index 0000000..6f56eb5
--- /dev/null
+++ b/Docker/cnv-opt-codex/Dockerfile
@@ -0,0 +1,14 @@
+FROM ubuntu:xenial
+MAINTAINER biodatageeks <team@biodatageeks.ii.pw.edu.pl>
+
+RUN apt-get update
+RUN apt-get install -y software-properties-common
+RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9
+RUN add-apt-repository 'deb [arch=amd64,i386] https://cran.rstudio.com/bin/linux/ubuntu xenial/'
+RUN apt-get install -y apt-transport-https
+
+RUN apt-get update && \
+    apt-get upgrade -y && \
+    apt-get install -y r-base libssl-dev libssh2-1-dev libxml2-dev libcurl4-openssl-dev libpq-dev
+
+RUN Rscript -e "source('https://bioconductor.org/biocLite.R');biocLite('CODEX')"
diff --git a/Docker/cnv-opt-codexcov/Dockerfile b/Docker/cnv-opt-codexcov/Dockerfile
new file mode 100644
index 0000000..258bbf8
--- /dev/null
+++ b/Docker/cnv-opt-codexcov/Dockerfile
@@ -0,0 +1,4 @@
+FROM biodatageeks/cnv-opt-codex
+MAINTAINER biodatageeks <team@biodatageeks.ii.pw.edu.pl>
+
+RUN Rscript -e "install.packages('CODEXCOV', repos = 'http://zsibio.ii.pw.edu.pl/nexus/repository/r-all')"
diff --git a/Docker/cnv-opt-exomecopy/Dockerfile b/Docker/cnv-opt-exomecopy/Dockerfile
new file mode 100644
index 0000000..0333434
--- /dev/null
+++ b/Docker/cnv-opt-exomecopy/Dockerfile
@@ -0,0 +1,20 @@
+FROM ubuntu:xenial
+MAINTAINER biodatageeks <team@biodatageeks.ii.pw.edu.pl>
+
+RUN apt-get update
+RUN apt-get install -y software-properties-common
+RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9
+RUN add-apt-repository 'deb [arch=amd64,i386] https://cran.rstudio.com/bin/linux/ubuntu xenial/'
+RUN apt-get install -y apt-transport-https
+
+RUN apt-get update && \
+    apt-get upgrade -y && \
+    apt-get install -y r-base libssl-dev libssh2-1-dev libxml2-dev libcurl4-openssl-dev libpq-dev wget
+
+RUN Rscript -e "source('https://bioconductor.org/biocLite.R');biocLite('IRanges')"
+RUN Rscript -e "source('https://bioconductor.org/biocLite.R');biocLite('GenomicRanges')"
+RUN Rscript -e "source('https://bioconductor.org/biocLite.R');biocLite('Rsamtools')"
+RUN Rscript -e "source('https://bioconductor.org/biocLite.R');biocLite('GenomeInfoDb')"
+
+RUN wget http://bioconductor.org/packages/3.5/bioc/src/contrib/exomeCopy_1.22.0.tar.gz
+RUN Rscript -e "install.packages('exomeCopy_1.22.0.tar.gz', repos = NULL, type='source')"
diff --git a/Docker/cnv-opt-exomecopycov/Dockerfile b/Docker/cnv-opt-exomecopycov/Dockerfile
new file mode 100644
index 0000000..c12fa2f
--- /dev/null
+++ b/Docker/cnv-opt-exomecopycov/Dockerfile
@@ -0,0 +1,8 @@
+FROM biodatageeks/cnv-opt-exomecopy
+MAINTAINER biodatageeks <team@biodatageeks.ii.pw.edu.pl>
+
+RUN Rscript -e "source('https://bioconductor.org/biocLite.R');biocLite('GenomeInfoDb')"
+RUN Rscript -e "source('https://bioconductor.org/biocLite.R');biocLite('BSgenome.Hsapiens.UCSC.hg19')"
+
+RUN Rscript -e "install.packages('EXOMECOPYCOV', repos = 'http://zsibio.ii.pw.edu.pl/nexus/repository/r-all')"
+
diff --git a/Docker/cnv-opt-exomedepth/Dockerfile b/Docker/cnv-opt-exomedepth/Dockerfile
new file mode 100644
index 0000000..fd16d85
--- /dev/null
+++ b/Docker/cnv-opt-exomedepth/Dockerfile
@@ -0,0 +1,21 @@
+FROM ubuntu:xenial
+MAINTAINER biodatageeks <team@biodatageeks.ii.pw.edu.pl>
+
+RUN apt-get update
+RUN apt-get install -y software-properties-common
+RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9
+RUN add-apt-repository 'deb [arch=amd64,i386] https://cran.rstudio.com/bin/linux/ubuntu xenial/'
+RUN apt-get install -y apt-transport-https
+
+RUN apt-get update && \
+    apt-get upgrade -y && \
+    apt-get install -y r-base libssl-dev libssh2-1-dev libxml2-dev libcurl4-openssl-dev libpq-dev
+
+RUN Rscript -e "source('https://bioconductor.org/biocLite.R');biocLite('Biostrings')"
+RUN Rscript -e "source('https://bioconductor.org/biocLite.R');biocLite('IRanges')"
+RUN Rscript -e "source('https://bioconductor.org/biocLite.R');biocLite('Rsamtools')"
+RUN Rscript -e "source('https://bioconductor.org/biocLite.R');biocLite('GenomicRanges')"
+RUN Rscript -e "source('https://bioconductor.org/biocLite.R');biocLite('RCurl')"
+RUN Rscript -e "source('https://bioconductor.org/biocLite.R');biocLite('GenomicAlignments')"
+RUN Rscript -e "install.packages('ExomeDepth', repos = 'http://cran.us.r-project.org')"
+
diff --git a/Docker/cnv-opt-exomedepthcov/Dockerfile b/Docker/cnv-opt-exomedepthcov/Dockerfile
new file mode 100644
index 0000000..28448f8
--- /dev/null
+++ b/Docker/cnv-opt-exomedepthcov/Dockerfile
@@ -0,0 +1,5 @@
+FROM biodatageeks/cnv-opt-exomedepth
+MAINTAINER biodatageeks <team@biodatageeks.ii.pw.edu.pl>
+
+RUN Rscript -e "install.packages('EXOMEDEPTHCOV', repos = 'http://zsibio.ii.pw.edu.pl/nexus/repository/r-all')"
+
diff --git a/Docker/cnv-opt-reference-sample-set-selector/Dockerfile b/Docker/cnv-opt-reference-sample-set-selector/Dockerfile
new file mode 100644
index 0000000..d854276
--- /dev/null
+++ b/Docker/cnv-opt-reference-sample-set-selector/Dockerfile
@@ -0,0 +1,8 @@
+FROM biodatageeks/cnv-opt-codex
+MAINTAINER biodatageeks <team@biodatageeks.ii.pw.edu.pl>
+
+ARG CACHE_DATE=not_a_specified_date
+
+RUN Rscript -e "install.packages('ExomeDepth', repos = 'http://cran.us.r-project.org')"
+
+RUN Rscript -e "install.packages('REFERENCE.SAMPLE.SET.SELECTOR', repos = 'http://zsibio.ii.pw.edu.pl/nexus/repository/r-all')"
diff --git a/Docker/cnv-opt-target-qc/Dockerfile b/Docker/cnv-opt-target-qc/Dockerfile
new file mode 100644
index 0000000..8391b91
--- /dev/null
+++ b/Docker/cnv-opt-target-qc/Dockerfile
@@ -0,0 +1,4 @@
+FROM biodatageeks/cnv-opt-codex
+MAINTAINER biodatageeks <team@biodatageeks.ii.pw.edu.pl>
+
+RUN Rscript -e "install.packages('TARGET.QC', repos = 'http://zsibio.ii.pw.edu.pl/nexus/repository/r-all')"
diff --git a/Jenkinsfile b/Jenkinsfile
index 4d46cc4..cb2dbad 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -2,7 +2,7 @@ pipeline {
     agent any
        stages {
 
-        stage('Test R code') {
+        /*stage('Test R code') {
                     steps {
                         echo 'Testing R code....'
                         sh 'docker run -i --rm --network="host" -e CNV_OPT_PSQL_USER="cnv-opt" -e CNV_OPT_PSQL_PASSWORD="zsibio321" -e CNV_OPT_PSQL_DRV_URL="http://zsibio.ii.pw.edu.pl/nexus/repository/zsi-bio-raw/common/jdbc/postgresql-42.1.1.jar" -e CNV_OPT_PSQL_CONN_URL="jdbc:postgresql://cdh00.ii.pw.edu.pl:15432/cnv-opt" -w="/tmp" -v $(pwd | sed "s|/var/jenkins_home|/data/home/jenkins|g")/R:/tmp zsibio.ii.pw.edu.pl:50009/zsi-bio-toolset Rscript tests/run_tests.R'
@@ -12,7 +12,7 @@ pipeline {
                         junit '**R/tests/*.xml'
                       }
                     }
-         }
+         }*/
 
          stage('Build R package') {
                              steps {
@@ -21,37 +21,48 @@ pipeline {
                                  sh "cd R && R CMD build REFERENCE.SAMPLE.SET.SELECTOR/ && curl -v --user ${NEXUS_USER}:${NEXUS_PASS} --upload-file REFERENCE.SAMPLE.SET.SELECTOR_0.0.1.tar.gz http://zsibio.ii.pw.edu.pl/nexus/repository/r-zsibio/src/contrib/REFERENCE.SAMPLE.SET.SELECTOR_0.0.1.tar.gz"
                                  sh "cd R && R CMD build CODEXCOV/ && curl -v --user ${NEXUS_USER}:${NEXUS_PASS} --upload-file CODEXCOV_0.0.1.tar.gz http://zsibio.ii.pw.edu.pl/nexus/repository/r-zsibio/src/contrib/CODEXCOV_0.0.1.tar.gz"
                                  sh "cd R && R CMD build EXOMEDEPTHCOV/ && curl -v --user ${NEXUS_USER}:${NEXUS_PASS} --upload-file EXOMEDEPTHCOV_0.0.1.tar.gz http://zsibio.ii.pw.edu.pl/nexus/repository/r-zsibio/src/contrib/EXOMEDEPTHCOV_0.0.1.tar.gz"
+                                 sh "cd R && R CMD build EXOMECOPYCOV/ && curl -v --user ${NEXUS_USER}:${NEXUS_PASS} --upload-file EXOMECOPYCOV_0.0.1.tar.gz http://zsibio.ii.pw.edu.pl/nexus/repository/r-zsibio/src/contrib/EXOMECOPYCOV_0.0.1.tar.gz"
                                  sh "cd R && R CMD build CANOESCOV/ && curl -v --user ${NEXUS_USER}:${NEXUS_PASS} --upload-file CANOESCOV_0.0.1.tar.gz http://zsibio.ii.pw.edu.pl/nexus/repository/r-zsibio/src/contrib/CANOESCOV_0.0.1.tar.gz"
+                                 sh "cd R && R CMD build CANOES/ && curl -v --user ${NEXUS_USER}:${NEXUS_PASS} --upload-file CANOES_0.0.1.tar.gz http://zsibio.ii.pw.edu.pl/nexus/repository/r-zsibio/src/contrib/CANOES_0.0.1.tar.gz"
                                  sh "cd R && R CMD build CNVCALLER.RUNNER/ && curl -v --user ${NEXUS_USER}:${NEXUS_PASS} --upload-file CNVCALLER.RUNNER_0.0.1.tar.gz http://zsibio.ii.pw.edu.pl/nexus/repository/r-zsibio/src/contrib/CNVCALLER.RUNNER_0.0.1.tar.gz"
                                  sh "cd R && R CMD build CNVCALLER.EVALUATOR/ && curl -v --user ${NEXUS_USER}:${NEXUS_PASS} --upload-file CNVCALLER.EVALUATOR_0.0.1.tar.gz http://zsibio.ii.pw.edu.pl/nexus/repository/r-zsibio/src/contrib/CNVCALLER.EVALUATOR_0.0.1.tar.gz"
+                                 sh "cd R && R CMD build CNV.SIMULATOR/ && curl -v --user ${NEXUS_USER}:${NEXUS_PASS} --upload-file CNV.SIMULATOR_0.0.1.tar.gz http://zsibio.ii.pw.edu.pl/nexus/repository/r-zsibio/src/contrib/CNV.SIMULATOR_0.0.1.tar.gz"
                              }
 
                   }
 
         stage('Test Scala code') {
                     steps {
-                        slackSend botUser: true, channel: '#development', message: 'started ${env.JOB_NAME} ${env.BUILD_NUMBER} (<${env.BUILD_URL}|Open>)', teamDomain: 'zsibio.slack.com'
+                        // slackSend botUser: true, channel: '#development', message: 'started ${env.JOB_NAME} ${env.BUILD_NUMBER} (<${env.BUILD_URL}|Open>)', teamDomain: 'zsibio.slack.com'
                         echo 'Testing Scala code....'
-                        sh "${tool name: 'sbt-0.13.15', type: 'org.jvnet.hudson.plugins.SbtPluginBuilder$SbtInstallation'}/bin/sbt test"
-                    }
-                    post {
-                      always {
-                        junit '**/target/test-reports/*.xml'
-                      }
+                        // sh "${tool name: 'sbt-0.13.15', type: 'org.jvnet.hudson.plugins.SbtPluginBuilder$SbtInstallation'}/bin/sbt test"
                     }
+                    // post {
+                      // always {
+                        // junit '**/target/test-reports/*.xml'
+                      // }
+                    // }
                 }
 
          stage('Package scala code') {
                             steps {
                                 echo 'Building Scala code....'
-                                sh "${tool name: 'sbt-0.13.15', type: 'org.jvnet.hudson.plugins.SbtPluginBuilder$SbtInstallation'}/bin/sbt package"
+                                // sh "${tool name: 'sbt-0.13.15', type: 'org.jvnet.hudson.plugins.SbtPluginBuilder$SbtInstallation'}/bin/sbt package"
         			            echo "Generating documentation"
-        			            sh "${tool name: 'sbt-0.13.15', type: 'org.jvnet.hudson.plugins.SbtPluginBuilder$SbtInstallation'}/bin/sbt doc"
-        			            publishHTML([allowMissing: false, alwaysLinkToLastBuild: true, keepAll: false, reportDir: 'target/scala-2.11/api/', reportFiles: 'package.html', reportName: 'Scala Doc', reportTitles: ''])
+        			            // sh "${tool name: 'sbt-0.13.15', type: 'org.jvnet.hudson.plugins.SbtPluginBuilder$SbtInstallation'}/bin/sbt doc"
+        			            // publishHTML([allowMissing: false, alwaysLinkToLastBuild: true, keepAll: false, reportDir: 'target/scala-2.11/api/', reportFiles: 'package.html', reportName: 'Scala Doc', reportTitles: ''])
 
                             }
 
                 }
+
+        stage('Build Docker images') {
+                    steps {
+                        echo 'Building Docker images....'
+                        sh './build.sh'
+                    }
+                }
+
          stage('Publish to Nexus snapshots and copying assembly fat jar to the edge server') {
                    when {
                          branch 'master'
diff --git a/R/CANOES/DESCRIPTION b/R/CANOES/DESCRIPTION
new file mode 100644
index 0000000..0824435
--- /dev/null
+++ b/R/CANOES/DESCRIPTION
@@ -0,0 +1,18 @@
+Package: CANOES
+Title: CANOES Package
+Version: 0.0.1
+Authors@R: c(
+    person("Tomasz", "Gambin", email = "tgambin@gmail.com", role = c("aut", "cre")),
+    person("Marek", "Wiewiórka", email = "marek.wiewiorka@gmail.com", role = c("aut")),
+    person("Wiktor", "Kuśmirek", email = "kusmirekwiktor@gmail.com", role = c("aut")))
+Description: An implementation of the CANOES package in R.
+Depends:
+    R (>= 3.2.3),
+    plyr (>= 1.8.4),
+    nnls (>= 1.4.0),
+    Hmisc (>= 4.0.0),
+    mgcv (>= 1.8.0)
+License: GPL-3
+Encoding: UTF-8
+LazyData: true
+RoxygenNote: 6.0.1.9000
diff --git a/R/CANOES/NAMESPACE b/R/CANOES/NAMESPACE
new file mode 100644
index 0000000..884a631
--- /dev/null
+++ b/R/CANOES/NAMESPACE
@@ -0,0 +1,2 @@
+# Generated by roxygen2: fake comment so roxygen2 overwrites silently.
+exportPattern("^[^\\.]")
diff --git a/R/CANOES/R/functions_CANOES.R b/R/CANOES/R/functions_CANOES.R
new file mode 100644
index 0000000..b3077b4
--- /dev/null
+++ b/R/CANOES/R/functions_CANOES.R
@@ -0,0 +1,685 @@
+# Constants
+NUM.ABNORMAL.STATES=2
+NUM.STATES=3
+DELETION=1
+NORMAL=2
+DUPLICATION=3
+
+# PlotCNV
+#     Plots count data for targets of interest
+#     highlights sample of interest in red, 
+#     highlights area of interest with a black line
+#     highlights probe locations with black dots
+# Arguments:
+#   counts: 
+#     count matrix, with column "target" with target numbers 
+#     and sample data in columns 6:end
+#   sample.name:
+#     sample of interest (will be highlighted in red in figure)
+#     (should correspond to a column in counts)
+#   targets:
+#     targets of interest in the form start.target..end.target
+#   offset:
+#     number of targets to add on either end (default=1)
+# Returns: 
+#   returns nothing
+PlotCNV <- function(counts, sample.name, targets, offset=1){
+  sample.name <- as.character(sample.name)
+  if (!sample.name %in% names(counts)){stop("No column for sample ", sample.name, " in counts matrix")}
+  if (length(setdiff("target", names(counts)[1:5]) > 0)){
+    stop("counts matrix must have column named target")
+  }
+  t <- as.character(targets)
+  start.target <- as.numeric(unlist(strsplit(t, "..", fixed=T))[1])
+  end.target <- as.numeric(unlist(strsplit(t, "..", fixed=T))[2])
+  if (!start.target %in% counts$target){
+    stop("no data for start.target in counts matrix")
+  }
+  if (!end.target %in% counts$target){
+    stop("no data for end.target in counts matrix")
+  }
+  if ((start.target - offset) %in% counts$target){
+    start.target <- start.target - offset
+  }
+  if ((end.target + offset) %in% counts$target){
+    end.target <- end.target + offset
+  }
+  ref.sample.names <- setdiff(as.character(names(counts)[-seq(1,5)]), 
+                              sample.name)
+  data <- subset(counts, target >= start.target & target <= end.target)
+  sample.data <- data[, sample.name]
+  means <- apply(data[, ref.sample.names], 1, mean)
+  sd <- sqrt(apply(data[, ref.sample.names], 1, var))
+  refs.z.scores <- matrix(NA, nrow(data), length(ref.sample.names))
+  sample.z.score <- numeric(length = nrow(data))
+  for (i in seq(1, dim(data)[1])){
+    refs.z.scores[i, ] <- as.numeric((data[i, ref.sample.names] - means[i]) / 
+                                       max(0.000001, sd[i]))
+    sample.z.score[i] <- (sample.data[i] - means[i]) / max(0.000001, sd[i])
+  }
+  ylim <- max(abs(refs.z.scores), abs(sample.z.score))
+  plot(seq(-6, 6), seq(-6, 6), 
+       xlim=c(data[1, "start"], data[dim(data)[1], "start"]), 
+       ylim=c(-ylim - 0.1, ylim + 0.1), type="n", xlab="", ylab="Z-score")
+  for (i in seq(1, length(ref.sample.names))){
+    lines(data[, "start"], refs.z.scores[, i], col="#2f4f4f85")
+  }
+  lines(data[, "start"], sample.z.score, col="red", lwd=3)
+  points(data[, "start"], rep(-ylim - 0.05, length(data[, "start"])), pch=20)
+  lines( c(data[1 + offset, "start"], data[nrow(data) - offset, "end"]) , 
+         c(ylim+0.2, ylim+0.2), lwd=2)
+  title(main=paste("Sample ", sample.name, ", ", 
+                   counts$chromosome[start.target], ":", 
+                   data$start[1], "-", data$end[nrow(data)], sep=""))
+}
+
+# CallCNVs
+#     Calls CNVs in sample of interest
+# Arguments:
+#   sample.name:
+#     sample to call CNVs in (should correspond to a column in counts)
+#   counts: 
+#     count matrix, first five columns should be 
+#       target: consecutive numbers for targets (integer)
+#       chromosome: chromosome number (integer-valued) 
+#         (support for sex chromosomes to come)
+#       start: start position of probe (integer)
+#       end: end position of probe (integer)
+#       gc: gc content (real between 0 and 1)
+#       subsequent columns should include counts for each probe for samples
+#   p:
+#     average rate of occurrence of CNVs (real) default is 1e-08
+#   D:
+#     expected distance between targets in a CNV (integer) default is 70,000
+#   Tnum:
+#     expected number of targets in a CNV (integer) default is 6
+#   numrefs
+#     maximum number of reference samples to use (integer) default is 30
+#     the weighted variance calculations will take a long time if too 
+#     many reference samples are used
+# Returns: 
+#   data frame with the following columns:
+#      SAMPLE: name of sample
+#      CNV: DEL of DUP
+#      INTERVAL: CNV coordinates in the form chr:start-stop
+#      KB: length of CNV in kilobases
+#      CHR: chromosome
+#      MID_BP: middle base pair of CNV
+#      TARGETS: target numbers of CNV in the form start..stop
+#      NUM_TARG: how many targets are in the CNV
+#      Q_SOME: a Phred-scaled quality score for the CNV
+CallCNVs <- function(sample.name, counts, p=1e-08, Tnum=6, D=70000, numrefs=30, get.dfs=F, homdel.mean=0.2){
+  if (!sample.name %in% names(counts)){stop("No column for sample ", sample.name, " in counts matrix")}
+  if (length(setdiff(names(counts)[1:5], c("target", "chromosome", "start", "end", "gc"))) > 0){
+    stop("First five columns of counts matrix must be target, chromosome, start, end, gc")
+  }
+  if (length(setdiff(unique(counts$chromosome), seq(1:22))) > 0) {
+    # remove sex chromosomes
+    cat("Trying to remove sex chromosomes and 'chr' prefixes\n")
+    counts <- subset(counts, !chromosome %in% c("chrX", "chrY", "X", "Y"))
+    if (sum(grepl("chr", counts$chromosome))==length(counts$chromosome)){
+      counts$chromosome <- gsub("chr", "", counts$chromosome)
+    }
+    counts$chromosome <- as.numeric(counts$chromosome)
+    if (length(setdiff(unique(counts$chromosome), seq(1:22))) > 0) 
+      stop("chromosome must take value in range 1-22 (support for sex chromosomes to come)")
+  }
+  library(plyr)
+  counts <- arrange(counts, chromosome, start)
+  if (p <= 0){
+    stop("parameter p must be positive")
+  }
+  if (Tnum <= 0){
+    stop("parameter Tnum must be positive")
+  }
+  if (D <= 0){
+    stop("parameter D must be positive")
+  }
+  if (numrefs <= 0){
+    stop("parameter numrefs must be positive")
+  }
+  sample.names <- colnames(counts)[-seq(1,5)]
+  # find mean coverage of probes
+  mean.counts <- mean(apply(counts[, sample.names], 2, mean))
+  # normalize counts; round so we can use negative binomial
+  counts[, sample.names] <- apply(counts[, sample.names], 2, 
+        function(x, mean.counts) 
+                 round(x * mean.counts / mean(x)), mean.counts)
+  # calculate covariance of read count across samples
+  cov <- cor(counts[, sample.names], counts[, sample.names])
+  reference.samples <- setdiff(sample.names, sample.name)
+  covariances <- cov[sample.name, reference.samples]
+  reference.samples <- names(sort(covariances, 
+          decreasing=T)[1:min(numrefs, length(covariances))])
+  sample.mean.counts <- mean(counts[, sample.name])
+  sample.sumcounts <- apply(counts[, reference.samples], 2, sum)
+  # normalize reference samples to sample of interest
+  counts[, reference.samples] <- apply(counts[, reference.samples], 2, 
+        function(x, sample.mean.counts) 
+                round(x * sample.mean.counts / 
+                mean(x)), sample.mean.counts)  
+  # select reference samples and weightings using non-negative least squares
+  b <- counts[, sample.name]
+  A <- as.matrix(counts[, reference.samples])
+  library(nnls)
+  all <- nnls(A, b)$x
+  est <- matrix(0, nrow=50, ncol=length(reference.samples))
+  set.seed(1)
+  for (i in 1:50){
+    d <- sample(nrow(A), min(500, nrow(A)))
+    est[i, ] <- nnls(A[d, ], b[d])$x
+  }
+  weights <- colMeans(est)
+  sample.weights <- weights / sum(weights)
+  library(Hmisc)
+  # calculate weighted mean of read count
+  # this is used to calculate emission probabilities
+  counts$mean <- apply(counts[, reference.samples], 
+                       1, wtd.mean, sample.weights)
+  targets <- counts$target
+  # exclude probes with all zero counts
+  nonzero.rows <- counts$mean > 0
+  nonzero.rows.df <- data.frame(target=counts$target, 
+                                nonzero.rows=nonzero.rows)
+
+  counts <- counts[nonzero.rows, ]
+  # get the distances between consecutive probes
+  distances <- GetDistances(counts)
+  # estimate the read count variance at each probe
+  var.estimate <- EstimateVariance(counts, reference.samples, 
+                                               sample.weights)
+  emission.probs <- EmissionProbs(counts[, sample.name], 
+                        counts$mean, var.estimate$var.estimate, 
+                        counts[, "target"])
+  if (get.dfs){
+    return(list(emission.probs=emission.probs, distances=distances))
+  }
+  # call CNVs with the Viterbi algorithm
+  viterbi.state <- Viterbi(emission.probs, distances, p, Tnum, D)  
+  # format the CNVs
+  cnvs <- PrintCNVs(sample.name, viterbi.state, 
+                         counts)
+  # if there aren't too many CNVs, calculate the Q_SOME
+  if (nrow(cnvs) > 0 & nrow(cnvs) <= 50){
+    qualities <- GenotypeCNVs(cnvs, sample.name, counts, p, Tnum, D, numrefs, 
+                          emission.probs=emission.probs, 
+                          distances=distances)
+    for (i in 1:nrow(cnvs)){
+      cnvs$Q_SOME[i] <- ifelse(cnvs$CNV[i]=="DEL", qualities[i, "SQDel"], 
+                               qualities[i, "SQDup"])
+    }
+  }
+  data <- as.data.frame(cbind(counts$target, counts$mean, var.estimate$var.estimate, counts[, sample.name]))
+  names(data) <- c("target", "countsmean", "varestimate", "sample")
+  if (nrow(cnvs) > 0){
+    cnvs <- CalcCopyNumber(data, cnvs, homdel.mean)
+  }
+  return(cnvs)
+}
+
+# GenotypeCNVs
+#     Genotype CNVs in sample of interest
+# Arguments:
+#   xcnv
+#     data frame with the following columns, and one row for each
+#     CNV to genotype
+#      INTERVAL: CNV coordinates in the form chr:start-stop
+#      TARGETS: target numbers of CNV in the form start..stop
+#               these should correspond to the target numbers in counts
+#   sample.name:
+#     sample to genotype CNVs in (should correspond to a column in counts)
+#   counts: 
+#     count matrix, first five columns should be 
+#       target: consecutive numbers for targets (integer)
+#       chromosome: chromosome number (integer-valued) 
+#         (support for sex chromosomes to come)
+#       start: start position of probe (integer)
+#       end: end position of probe (integer)
+#       gc: gc content (real between 0 and 1)
+#       subsequent columns should include counts for each probe for samples
+#   p:
+#     average rate of occurrence of CNVs (real) default is 1e-08
+#   D:
+#     expected distance between targets in a CNV (integer) default is 70,000
+#   Tnum:
+#     expected number of targets in a CNV (integer) default is 6
+#   numrefs
+#     maximum number of reference samples to use (integer) default is 30
+#     the weighted variance calculations will take a long time if too 
+#     many reference samples are used
+#   emission.probs and distances are for internal use only
+# Returns: 
+#   data frame with the following columns and one row for each genotyped CNV:
+#      INTERVAL: CNV coordinates in the form chr:start-stop
+#      NQDEL: a Phred-scaled quality score that sample.name has no deletion 
+#             in the interval
+#      SQDEL: a Phred-scaled quality score that sample.name has a deletion 
+#             in the interval
+#      NQDUP and SQDUP: same, but for a duplication
+GenotypeCNVs <- function(xcnvs, sample.name, counts, p=1e-08, Tnum=6, 
+                    D=70000, numrefs=30,
+                    emission.probs=NULL, 
+                    distances=NULL){
+  if (!sample.name %in% names(counts)){stop("No column for sample ", sample.name, " in counts matrix")}
+  if (length(setdiff(names(counts)[1:5], c("target", "chromosome", "start", "end", "gc"))) > 0){
+    stop("First five columns of counts matrix must be target, chromosome, start, end, gc")
+  }
+  if (length(setdiff(unique(counts$chromosome), seq(1:22))) > 0) {
+    # remove sex chromosomes
+    cat("Trying to remove sex chromosomes and 'chr' prefixes\n")
+    counts <- subset(counts, !chromosome %in% c("chrX", "chrY", "X", "Y"))
+    if (sum(grepl("chr", counts$chromosome))==length(counts$chromosome)){
+      counts$chromosome <- gsub("chr", "", counts$chromosome)
+    }
+    counts$chromosome <- as.numeric(counts$chromosome)
+    if (length(setdiff(unique(counts$chromosome), seq(1:22))) > 0) 
+      stop("chromosome must take value in range 1-22 (support for sex chromosomes to come)")
+  }
+  library(plyr)
+  counts <- arrange(counts, chromosome, start)
+  if (p <= 0){
+    stop("parameter p must be positive")
+  }
+  if (Tnum <= 0){
+    stop("parameter Tnum must be positive")
+  }
+  if (D <= 0){
+    stop("parameter D must be positive")
+  }
+  if (numrefs <= 0){
+    stop("parameter numrefs must be positive")
+  }
+  num.cnvs <- nrow(xcnvs)
+  cnv.intervals <- as.character(xcnvs$INTERVAL)
+  # if no emission probs matrix is passed in, generate a new one
+  if (is.null(emission.probs)){
+    l <- CallCNVs(sample.name, counts, p, Tnum=6, D=70000, numrefs=30, get.dfs=T)
+    emission.probs <- l[['emission.probs']]
+    distances <- l[['distances']]
+  }
+  forward.m <- GetForwardMatrix(emission.probs, distances, p, Tnum, D)
+  backward.m <- GetBackwardMatrix(emission.probs, distances, p, Tnum, D)
+  qualities <- matrix(0, nrow=num.cnvs, ncol=5, 
+                      dimnames=list(cnv.intervals, 
+                                    c("INTERVAL", "NQDel", "SQDel", "NQDup", "SQDup")))
+  for (i in 1:num.cnvs){
+    interval <- as.character(xcnvs[i, "INTERVAL"])
+    targets <- as.numeric(strsplit(as.character(xcnvs[i, "TARGETS"]), ".", fixed=T)[[1]][c(1,3)])
+    left.target <- targets[1]
+    right.target <- targets[2]
+    likelihoods <- GetModifiedLikelihood(forward.m, backward.m, 
+                                         emission.probs, distances, 
+                                         left.target, right.target, 
+                                         c(DUPLICATION, DELETION), p, Tnum, D)
+    modified.likelihood <- likelihoods[1]; 
+    unmodified.likelihood <- likelihoods[2]
+    Prob.All.Normal <- exp(modified.likelihood - unmodified.likelihood)
+    likelihoods <- GetModifiedLikelihood(forward.m, backward.m, 
+                                         emission.probs, distances, 
+                                         left.target, right.target, DELETION, p, Tnum, D)
+    modified.likelihood <- likelihoods[1]; 
+    unmodified.likelihood <- likelihoods[2]
+    Prob.No.Deletion <- exp(modified.likelihood - unmodified.likelihood)
+    likelihoods <- GetModifiedLikelihood(forward.m, backward.m, 
+                                         emission.probs, distances, 
+                                         left.target, right.target, DUPLICATION, p, Tnum, D)
+    modified.likelihood <- likelihoods[1]; 
+    unmodified.likelihood <- likelihoods[2]
+    Prob.No.Duplication <- exp(modified.likelihood - unmodified.likelihood)
+    # Check if probabilities greater than 1 are numerical error or bug
+    Phred <- function(prob){
+      return(round(min(99, -10 * log10(1 - prob))))
+    }
+    qualities[i, "NQDel"] <- Phred(Prob.No.Deletion)       
+    qualities[i, "SQDel"] <- Phred(Prob.No.Duplication - Prob.All.Normal)
+    qualities[i, "NQDup"] <- Phred(Prob.No.Duplication)       
+    qualities[i, "SQDup"] <- Phred(Prob.No.Deletion - Prob.All.Normal)
+    qualities[i, "INTERVAL"] <- interval
+  }
+  qualities <- as.data.frame(qualities, stringsAsFactors=F)
+  qualities$NQDel <- as.integer(qualities$NQDel)
+  qualities$NQDup <- as.integer(qualities$NQDup)
+  qualities$SQDel <- as.integer(qualities$SQDel)
+  qualities$SQDup <- as.integer(qualities$SQDup)
+  return(qualities)
+}
+
+# returns data frame with distance to each target from the previous target 
+# (0 in the case of the first target on chromosome 1, a very big number
+# for the first target on each other chromosome--this resets the HMM
+# for each chromosome)
+GetDistances <- function(counts){
+  chromosome <- counts[, "chromosome"]
+  startbase <- counts[, "start"]
+  num.nonzero.exons <- length(startbase)
+  distances <- c(0, startbase[2:num.nonzero.exons] - 
+                   startbase[1:(num.nonzero.exons - 1)] + 
+                   1000000000000 * (chromosome[2:num.nonzero.exons] - 
+                                      chromosome[1:(num.nonzero.exons - 1)]))
+  return(data.frame(target=counts[, "target"], distance=distances))
+}
+
+EstimateVariance <- function(counts, ref.sample.names, sample.weights){
+  library(Hmisc)
+  counts$var <- apply(counts[, ref.sample.names], 1, wtd.var, sample.weights, normwt=T)
+  set.seed(1)
+  counts.subset <- counts[sample(nrow(counts), min(36000, nrow(counts))), ]
+  library(mgcv)
+  # can't do gamma regression with negative 
+  counts.subset$var[counts.subset$var==0] <- 0.1 
+  fit <- gam(var ~ s(mean) + s(gc), family=Gamma(link=log), data=counts.subset)
+  # we don't want variance less than Poisson
+  # we take maximum of genome-wide estimate, method of moments estimate
+  # and Poisson variance
+  v.estimate <- pmax(predict(fit, counts, type="response"), counts$var, 
+                     counts$mean * 1.01)
+  return(data.frame(target=counts$target, var.estimate=v.estimate))
+}
+
+EmissionProbs <- function(test.counts, target.means, 
+                                      var.estimate, targets){
+  num.targets <- length(test.counts)
+  # calculate the means for the deletion, normal and duplication states
+  state.target.means <- t(apply(data.frame(x=target.means), 1, function(x) c(x*1/2, x, x*3/2)))
+  # calculate the expected size (given the predicted variance)
+  size <- target.means ^ 2 / (var.estimate - target.means)
+  emission.probs <- matrix(NA, num.targets, 4)
+  colnames(emission.probs) <- c("target", "delprob", "normalprob", "dupprob")
+  # calculate the emission probabilities given the read count
+  size.del <- size
+  size.dup <- size
+  size.del <- size / 2
+  size.dup <- size * 3 / 2
+  emission.probs[, "delprob"] <- dnbinom(
+    test.counts,
+    mu=state.target.means[, 1],
+    size=size.del, log=T)
+  emission.probs[, "normalprob"] <- dnbinom(
+    test.counts,
+    mu=state.target.means[, 2],
+    size=size, log=T)
+  emission.probs[, "dupprob"] <- dnbinom(
+    test.counts,
+    mu=state.target.means[, 3],
+    size=size.dup, log=T)
+  emission.probs[, "target"] <- targets
+  # some values may be infinite as a result of extreme read count
+  row.all.inf <- which(apply(emission.probs, 1, function(x){all(is.infinite(x))}))
+  if (length(row.all.inf) > 0){
+    for (i in row.all.inf){
+      if (test.counts[i] >= state.target.means[i, 3]){
+        emission.probs[i, 2:4] <- c(-Inf, -Inf, -0.01)
+      }
+      else if (test.counts[i] <= state.target.means[i, 1]){
+        emission.probs[i, 2:4] <- c(-0.01, -Inf, -Inf)
+      }
+      else emission.probs[i, 2:4] <- c(-Inf, -0.01, -Inf)
+    }
+  }
+  return(emission.probs)
+}
+
+# Viterbi algorithm
+Viterbi <- function(emission.probs.matrix, distances, p, Tnum, D){
+  targets <- emission.probs.matrix[, 1]
+  emission.probs.matrix <- as.matrix(emission.probs.matrix[, 2:4])
+  num.exons <- dim(emission.probs.matrix)[1]
+  viterbi.matrix <- matrix(NA, nrow=num.exons, ncol=NUM.STATES)
+  viterbi.pointers <- matrix(NA, nrow=num.exons, ncol=NUM.STATES)
+  initial.state <- log(c(0.0075 / NUM.ABNORMAL.STATES, 1 - 0.0075, 0.0075 / NUM.ABNORMAL.STATES))
+  viterbi.matrix[1, ] <- initial.state + emission.probs.matrix[1,]
+  for (i in 2:num.exons) {
+    temp.matrix <- viterbi.matrix[i - 1, ] + GetTransitionMatrix(distances$distance[i], p, Tnum, D)
+    viterbi.matrix[i, ] <- apply(temp.matrix, 2, max)
+    emission.probs <- c(emission.probs.matrix[i,])
+    dim(emission.probs) <- c(NUM.STATES, 1)
+    viterbi.matrix[i, ] <- viterbi.matrix[i, ] + emission.probs
+    viterbi.pointers[i, ] <- apply(temp.matrix, 2, which.max)
+  }
+  viterbi.states = vector(length = num.exons)
+  viterbi.states[num.exons] = which.max(viterbi.matrix[num.exons, ])
+  for (i in (num.exons - 1):1) {
+    viterbi.states[i] <- viterbi.pointers[i + 1, viterbi.states[i + 1]]
+  }
+  return(data.frame(target=targets, viterbi.state=viterbi.states))
+}
+
+# returns a transition matrix
+#                              to state
+#                    deletion   normal    duplication
+#           deletion   
+#from state   normal
+#        duplication
+GetTransitionMatrix <- function(distance, p, Tnum, D){
+  q <- 1 / Tnum
+  f = exp(-distance/D)
+  prob.abnormal.abnormal <- f * (1 - q) + (1 - f) * p
+  prob.abnormal.normal <- f * q + (1 - f) * (1 - 2 * p)
+  prob.abnormal.diff.abnormal <- (1 - f) * p
+  prob.normal.normal <- 1 - 2 * p
+  prob.normal.abnormal <- p
+  transition.probs <- 
+    c(prob.abnormal.abnormal, prob.abnormal.normal, prob.abnormal.diff.abnormal, 
+      prob.normal.abnormal, prob.normal.normal, prob.normal.abnormal,
+      prob.abnormal.diff.abnormal, prob.abnormal.normal, prob.abnormal.abnormal)
+  transition.m = log(matrix(transition.probs, NUM.STATES, NUM.STATES, byrow=TRUE))
+  return(transition.m)
+}
+
+# adds two log-space probabilities using the identity
+# log (p1 + p2) = log p1 + log(1 + exp(log p2 - log p1))
+AddTwoProbabilities <- function(x, y){
+  if (is.infinite(x)) return (y)
+  if (is.infinite(y)) return (x)
+  sum.probs <- max(x, y) + log1p(exp(-abs(x - y)))
+}
+
+# adds multiple log-space probabilities
+SumProbabilities <- function(x){
+  sum.probs <- x[1]
+  for (i in 2:length(x)){
+    sum.probs <- AddTwoProbabilities(sum.probs, x[i])
+  }
+  return(sum.probs)
+}
+
+# finds the data likelihood by summing the product of the corresponding 
+# forward and backward probabilities at any token (should give the same value
+# regardless of the token)
+GetLikelihood <- function(forward.matrix, backward.matrix, x){
+  SumProbabilities(forward.matrix[x, ] + backward.matrix[x, ])
+}
+
+# get the forward probabilities
+GetForwardMatrix <- function(emission.probs.matrix, distances, p, Tnum, D){
+  emission.probs.matrix <- as.matrix(emission.probs.matrix[, 2:4])
+  num.exons <- dim(emission.probs.matrix)[1]
+  forward.matrix <- matrix(NA, nrow=num.exons, ncol=NUM.STATES)   # matrix to hold forward probabilities
+  initial.state <- log(c(0.0075 / NUM.ABNORMAL.STATES, 1 - 0.0075, 0.0075 / NUM.ABNORMAL.STATES))
+  forward.matrix[1, ] <- initial.state + emission.probs.matrix[1, ]
+  for (i in 2:num.exons){
+    # compute matrix with probability we were in state j and are now in state i
+    # in temp.matrix[j, i] (ignoring emission of current token)
+    temp.matrix <- forward.matrix[i - 1, ] + GetTransitionMatrix(distances$distance[i], p, Tnum, D)
+    # find the probability that we are in each of the three states
+    sum.probs <- apply(temp.matrix, 2, SumProbabilities)
+    forward.matrix[i, ] <- sum.probs + emission.probs.matrix[i, ]
+  }  
+  return(forward.matrix)  
+}
+
+# get the backward probabilities
+GetBackwardMatrix <- function(emission.probs.matrix, distances, 
+                                  p, Tnum, D){
+  emission.probs.matrix <- as.matrix(emission.probs.matrix[, 2:4])
+  num.exons <- dim(emission.probs.matrix)[1]
+  backward.matrix <- matrix(NA, nrow=num.exons, ncol=NUM.STATES)   # matrix to hold backward probabilities
+  initial.state <- log(c(0.0075 / NUM.ABNORMAL.STATES, 1 - 0.0075, 0.0075 / NUM.ABNORMAL.STATES))
+  backward.matrix[num.exons, ] <- rep(0, NUM.STATES)
+  for (i in (num.exons - 1):1){
+    temp.matrix <- GetTransitionMatrix(distances$distance[i+1], p, Tnum, D) + 
+      matrix(backward.matrix[i + 1, ], 3, 3, byrow=T) +
+      matrix(emission.probs.matrix[i+1, ], 3, 3, byrow=T)
+    backward.matrix[i, ] <- apply(temp.matrix, 1, SumProbabilities)
+  }  
+  final.prob <- backward.matrix[1, ] + emission.probs.matrix[1, ] + initial.state
+  return(backward.matrix)  
+}
+
+# find the likelihood of the data given that certain states are disallowed
+# between start target and end target
+GetModifiedLikelihood <- function(forward.matrix, backward.matrix, emission.probs.matrix, distances, 
+                                      start.target, end.target, disallowed.states, p, Tnum, D){
+  targets <- emission.probs.matrix[, 1]
+  emission.probs.matrix <- as.matrix(emission.probs.matrix[, 2:4])
+  # there may be missing targets in this sample, we genotype the largest stretch of 
+  # targets that lie in the CNV
+  left.target <- min(which(targets >= start.target))
+  right.target <- max(which(targets <= end.target))
+  num.exons <- dim(emission.probs.matrix)[1]
+  unmodified.likelihood <- GetLikelihood(forward.matrix, 
+                                             backward.matrix, min(right.target + 1, num.exons))
+  #right.target or left.target may be empty
+  
+  #if (right.target >= left.target) return(c(NA, unmodified.likelihood))
+  stopifnot(right.target >= left.target)
+  modified.emission.probs.matrix <- emission.probs.matrix
+  modified.emission.probs.matrix[left.target:right.target, 
+                                 disallowed.states] <- -Inf
+  
+  # if the start target is the first target we need to recalculate the 
+  # forward probabilities
+  # for that target, using the modified emission probabilities
+  if (left.target == 1){
+    initial.state <- log(c(0.0075 / NUM.ABNORMAL.STATES, 1 - 0.0075, 0.0075 / NUM.ABNORMAL.STATES))
+    forward.matrix[1, ] <- initial.state + modified.emission.probs.matrix[1, ]
+    left.target <- left.target + 1
+  } 
+  for (i in seq(left.target, min(right.target + 1, num.exons))){
+    # compute matrix with probability we were in state j and are now in state i
+    # in temp.matrix[j, i] (ignoring emission of current token)
+    temp.matrix <- forward.matrix[i - 1, ] + GetTransitionMatrix(distances$distance[i], p, Tnum, D)
+    # find the probability that we are in each of the three states
+    sum.probs <- apply(temp.matrix, 2, SumProbabilities) 
+    if (!i == (right.target + 1)){
+      forward.matrix[i, ] <- sum.probs + modified.emission.probs.matrix[i, ]
+    } else{
+      forward.matrix[i, ] <- sum.probs + emission.probs.matrix[i, ]
+    }
+  }  
+  # find the modified likelihood of the sequence
+  modified.likelihood <- GetLikelihood(forward.matrix, backward.matrix, min(right.target + 1, num.exons))
+  return(c(modified.likelihood, unmodified.likelihood))
+}
+
+SummarizeCNVs <- function(cnv.targets, counts, sample.name, state){
+  sample.name <- sample.name
+  cnv.type <- ifelse(state==3, "DUP", "DEL")
+  cnv.start <- min(cnv.targets$target)
+  cnv.end <- max(cnv.targets$target)
+  cnv.chromosome <- counts[cnv.start, "chromosome"]
+  cnv.start.base <- counts[cnv.start, "start"]
+  cnv.start.target <- counts[cnv.start, "target"]
+  cnv.end.base <- counts[cnv.end, "end"]
+  cnv.end.target <- counts[cnv.end, "target"]
+  cnv.kbs <- (cnv.end.base - cnv.start.base) / 1000
+  cnv.midbp <- round((cnv.end.base - cnv.start.base) / 2) + cnv.start.base
+  cnv.targets <- paste(cnv.start.target, "..", cnv.end.target, sep="")
+  cnv.interval <- paste(cnv.chromosome, ":", cnv.start.base, "-", cnv.end.base, sep="")
+  num.targets <- cnv.end.target - cnv.start.target + 1
+  return(data.frame(sample.name=sample.name, cnv.type=cnv.type, cnv.interval=cnv.interval, 
+                    cnv.kbs=cnv.kbs, cnv.chromosome=cnv.chromosome, 
+                    cnv.midbp=cnv.midbp, cnv.targets=cnv.targets, num.targets=num.targets))
+}
+
+PrintCNVs <- function(test.sample.name, viterbi.state, 
+                      nonzero.counts){  
+  consecutiveGroups <- function(sequence){
+    num <- length(sequence)
+    group <- 1
+    groups <- rep(0, num)
+    groups[1] <- group
+    if (num > 1){
+      for (i in 2:num){
+        if (!sequence[i] == (sequence[i - 1] + 1)) group <- group + 1
+        groups[i] <- group
+      }
+    }
+    return(groups)
+  }
+  num.duplications <- 0
+  num.deletions <- 0
+  for (state in c(1, 3)){
+    cnv.targets <- which(viterbi.state$viterbi.state == state)
+    if (!length(cnv.targets) == 0){
+      groups <- consecutiveGroups(cnv.targets)
+      library(plyr)
+      cnvs.temp.df <- ddply(data.frame(target=cnv.targets, group=groups), 
+                            "group", SummarizeCNVs, nonzero.counts, test.sample.name, 
+                            state)
+      if (state == 1){
+        deletions.df <- cnvs.temp.df
+        if (!is.null(dim(deletions.df))){
+          num.deletions <- dim(deletions.df)[1]
+        }
+      } else {
+        duplications.df <- cnvs.temp.df
+        if (!is.null(dim(duplications.df))){
+          num.duplications <- dim(duplications.df)[1]
+        }
+      }
+    }
+  }
+  num.calls <- num.deletions + num.duplications
+  cat(num.calls, "CNVs called in sample", test.sample.name, "\n")
+  if (num.deletions == 0 & num.duplications == 0){
+    df <- data.frame(SAMPLE=character(0), CNV=character(0), INTERVAL=character(0), 
+                     KB=numeric(0), CHR=character(0), 
+                     MID_BP=numeric(), TARGETS=character(0), NUM_TARG=numeric(0), Q_SOME=numeric(0), MLCN=numeric(0))
+    return(df)
+  }
+  if (num.deletions > 0 & num.duplications > 0){
+    cnvs.df <- rbind(deletions.df, duplications.df)
+  } else {
+    ifelse(num.deletions > 0, 
+           cnvs.df <- deletions.df, cnvs.df <- duplications.df)
+  }
+  xcnv <- cbind(cnvs.df[, c("sample.name", "cnv.type", "cnv.interval", 
+                      "cnv.kbs", "cnv.chromosome", "cnv.midbp", 
+                      "cnv.targets", "num.targets")], 0)
+  colnames(xcnv) <- c("SAMPLE", "CNV", "INTERVAL", "KB", "CHR", "MID_BP", "TARGETS",
+                      "NUM_TARG", "MLCN")
+  xcnv$Q_SOME <- NA
+  return(xcnv)
+}
+
+CalcCopyNumber <- function(data, cnvs, homdel.mean){
+  for (i in 1:nrow(cnvs)){
+    cnv <- cnvs[i, ]
+    targets <- as.numeric(unlist(strsplit(as.character(cnv$TARGETS), "..", fixed=T)))
+    cnv.data <- subset(data, target >= targets[1] & target <= targets[2])
+    state.target.means <- t(apply(data.frame(x=cnv.data$countsmean), 1, 
+                                  function(x) c(C1=x*1/2, C2=x, C3=x*3/2, 
+                                                C4=x * 2, C5=x * 5/2, C6=x*6/2)))
+    # calculate the expected size (given the predicted variance)
+    size <- cnv.data$countsmean ^ 2 / (cnv.data$varestimate - cnv.data$countsmean)
+    emission.probs <- matrix(NA, nrow(cnv.data), 7)
+    colnames(emission.probs) <- c("C0", "C1", "C2", "C3", "C4", "C5", "C6")
+    #colnames(emission.probs) <- c("target", "delprob", "normalprob", "dupprob")
+    # calculate the emission probabilities given the read count
+    emission.probs[, 1] <- dpois(cnv.data$sample, homdel.mean, log=T)
+    for (s in 1:6){
+      size.state <- size * s/2
+      emission.probs[, s+1] <- dnbinom(cnv.data$sample, mu=state.target.means[, s], 
+                                       size=size.state, log=T)
+    }
+    cs <- colSums(emission.probs)
+    ml.state <- which.max(cs) - 1
+    if (ml.state==2){
+      ml.state <- ifelse(cnv$CNV=="DEL", 1, 3)
+    }
+    cnvs$MLCN[i] <- ml.state
+  }  
+  return(cnvs)
+}
+
diff --git a/R/CANOES/R/run_CANOES.R b/R/CANOES/R/run_CANOES.R
new file mode 100644
index 0000000..65ddb36
--- /dev/null
+++ b/R/CANOES/R/run_CANOES.R
@@ -0,0 +1,32 @@
+Test <- function(){
+  # read in the data
+  gc <- read.table("gc.txt")$V2
+  canoes.reads <- read.table("canoes.reads.txt")
+  # rename the columns of canoes.reads
+  sample.names <- paste("S", seq(1:26), sep="")
+  names(canoes.reads) <- c("chromosome", "start", "end", sample.names)
+  # create a vector of consecutive target ids
+  target <- seq(1, nrow(canoes.reads))
+  # combine the data into one data frame
+  canoes.reads <- cbind(target, gc, canoes.reads)
+  # call CNVs in each sample
+  # create a vector to hold the results for each sample
+  xcnv.list <- vector('list', length(sample.names))
+  for (i in 1:length(sample.names)){
+    xcnv.list[[i]] <- CallCNVs(sample.names[i], canoes.reads) 
+  }
+  # combine the results into one data frame
+  xcnvs <- do.call('rbind', xcnv.list)
+  # inspect the first two CNV calls
+  print(head(xcnvs, 2))
+  # plot all the CNV calls to a pdf
+  pdf("CNVplots.pdf")
+  for (i in 1:nrow(xcnvs)){
+     PlotCNV(canoes.reads, xcnvs[i, "SAMPLE"], xcnvs[i, "TARGETS"])
+  }
+  dev.off()
+  # genotype all the CNVs calls made above in sample S2
+  genotyping.S2 <- GenotypeCNVs(xcnvs, "S2", canoes.reads)
+  # inspect the genotype scores for the first two CNV calls
+  print(head(genotyping.S2, 2))
+}
diff --git a/R/CANOESCOV/DESCRIPTION b/R/CANOESCOV/DESCRIPTION
index 314f419..27c73ca 100644
--- a/R/CANOESCOV/DESCRIPTION
+++ b/R/CANOESCOV/DESCRIPTION
@@ -12,14 +12,13 @@ Description: An extended implementation of the CANOES package in R. It extends
 Depends:
     R (>= 3.2.3),
     devtools (>= 1.13.2),
-    DBI (== 0.8),
+    DBI (>= 0.8),
     optparse (== 1.4.4),
     IRanges (>= 2.0.0),
     plyr (>= 1.8.4),
     nnls (>= 1.4.0),
     Hmisc (>= 4.0.0),
-    mgcv (>= 1.8.0),
-    REFERENCE.SAMPLE.SET.SELECTOR (>= 0.0.1)
+    mgcv (>= 1.8.0)
 License: GPL-3
 Encoding: UTF-8
 LazyData: true
diff --git a/R/CANOESCOV/R/functions_CANOESCOV.R b/R/CANOESCOV/R/functions_CANOESCOV.R
index 873e894..f3195a7 100644
--- a/R/CANOESCOV/R/functions_CANOESCOV.R
+++ b/R/CANOESCOV/R/functions_CANOESCOV.R
@@ -1,23 +1,24 @@
 
-coverageObj1 <- function(cov_table, sampname, targets_for_chr, chr){
-  Y <- matrix(data=as.integer(0), nrow = nrow(targets_for_chr), ncol = 0)
-  for(sample in sampname) {
-    cov_targets_for_sample <- cov_table[cov_table[,"sample_name"] == sample,]
-    cov_targets_for_sample <- cov_targets_for_sample[with(cov_targets_for_sample, order(target_id)), ]
-    Y <- cbind(Y, cov_targets_for_sample[,"read_count"])
+# from CODEX package
+getgc <- function(chr, ref) {
+  library(GenomeInfoDb)
+  library(BSgenome.Hsapiens.UCSC.hg19)
+  if (chr == "X" | chr == "x" | chr == "chrX" | chr == "chrx") {
+    chrtemp <- 23
+  } else if (chr == "Y" | chr == "y" | chr == "chrY" | chr == "chry") {
+    chrtemp <- 24
+  } else {
+    chrtemp <- as.numeric(mapSeqlevels(as.character(chr), "NCBI")[1])
   }
-  colnames(Y) <- sampname
-  rownames(Y) <- targets_for_chr[,"target_id"]
-  return(list(Y=Y))
+  if (length(chrtemp) == 0) 
+    message("Chromosome cannot be found in NCBI Homo sapiens database!")
+  chrm <- unmasked(Hsapiens[[chrtemp]])
+  seqs <- Views(chrm, ref)
+  af <- alphabetFrequency(seqs, baseOnly = TRUE, as.prob = TRUE)
+  gc <- round((af[, "G"] + af[, "C"]) * 100,2)
+  gc
 }
 
-# Constants
-NUM.ABNORMAL.STATES=2
-NUM.STATES=3
-DELETION=1
-NORMAL=2
-DUPLICATION=3
-
 # CallCNVs
 #     Calls CNVs in sample of interest
 # Arguments:
@@ -53,7 +54,14 @@ DUPLICATION=3
 #      TARGETS: target numbers of CNV in the form start..stop
 #      NUM_TARG: how many targets are in the CNV
 #      Q_SOME: a Phred-scaled quality score for the CNV
-CallCNVs <- function(sample.name, counts, p=1e-08, Tnum=6, D=70000, reference_set_select_method='canoes', num_of_samples_in_reference_set=30, get.dfs=F, homdel.mean=0.2, target_length){
+CallCNVs <- function(sample.name, reference.samples, counts, p=1e-08, Tnum=6, D=70000, get.dfs=F, homdel.mean=0.2){
+  library(IRanges)
+  library(BSgenome.Hsapiens.UCSC.hg19)
+  library(Biostrings)
+  library(Rsamtools)
+  library(GenomeInfoDb)
+  library(S4Vectors)
+  library(CANOES)
   if (!sample.name %in% names(counts)){stop("No column for sample ", sample.name, " in counts matrix")}
   if (length(setdiff(names(counts)[1:5], c("target", "chromosome", "start", "end", "gc"))) > 0){
     stop("First five columns of counts matrix must be target, chromosome, start, end, gc")
@@ -97,12 +105,12 @@ CallCNVs <- function(sample.name, counts, p=1e-08, Tnum=6, D=70000, reference_se
   #reference.samples <- names(sort(covariances, 
   #        decreasing=T)[1:min(numrefs, length(covariances))])
   Y <- data.matrix(counts[,6:ncol(counts)])
-  library('REFERENCE.SAMPLE.SET.SELECTOR')
-  reference.samples <- run_REFERENCE.SAMPLE.SET.SELECTOR(sample.name,
-                                                         Y,
-                                                         reference_set_select_method,
-                                                         num_of_samples_in_reference_set,
-                                                         target_length)
+  #library('REFERENCE.SAMPLE.SET.SELECTOR')
+  #reference.samples <- run_REFERENCE.SAMPLE.SET.SELECTOR(sample.name,
+  #                                                       Y,
+  #                                                       reference_set_select_method,
+  #                                                       num_of_samples_in_reference_set,
+  #                                                       target_length)
   sample.mean.counts <- mean(counts[, sample.name])
   sample.sumcounts <- apply(counts[, reference.samples], 2, sum)
   # normalize reference samples to sample of interest
@@ -168,469 +176,3 @@ CallCNVs <- function(sample.name, counts, p=1e-08, Tnum=6, D=70000, reference_se
   }
   return(cnvs)
 }
-
-# GenotypeCNVs
-#     Genotype CNVs in sample of interest
-# Arguments:
-#   xcnv
-#     data frame with the following columns, and one row for each
-#     CNV to genotype
-#      INTERVAL: CNV coordinates in the form chr:start-stop
-#      TARGETS: target numbers of CNV in the form start..stop
-#               these should correspond to the target numbers in counts
-#   sample.name:
-#     sample to genotype CNVs in (should correspond to a column in counts)
-#   counts: 
-#     count matrix, first five columns should be 
-#       target: consecutive numbers for targets (integer)
-#       chromosome: chromosome number (integer-valued) 
-#         (support for sex chromosomes to come)
-#       start: start position of probe (integer)
-#       end: end position of probe (integer)
-#       gc: gc content (real between 0 and 1)
-#       subsequent columns should include counts for each probe for samples
-#   p:
-#     average rate of occurrence of CNVs (real) default is 1e-08
-#   D:
-#     expected distance between targets in a CNV (integer) default is 70,000
-#   Tnum:
-#     expected number of targets in a CNV (integer) default is 6
-#   numrefs
-#     maximum number of reference samples to use (integer) default is 30
-#     the weighted variance calculations will take a long time if too 
-#     many reference samples are used
-#   emission.probs and distances are for internal use only
-# Returns: 
-#   data frame with the following columns and one row for each genotyped CNV:
-#      INTERVAL: CNV coordinates in the form chr:start-stop
-#      NQDEL: a Phred-scaled quality score that sample.name has no deletion 
-#             in the interval
-#      SQDEL: a Phred-scaled quality score that sample.name has a deletion 
-#             in the interval
-#      NQDUP and SQDUP: same, but for a duplication
-GenotypeCNVs <- function(xcnvs, sample.name, counts, p=1e-08, Tnum=6, 
-                    D=70000, numrefs=30,
-                    emission.probs=NULL, 
-                    distances=NULL){
-  if (!sample.name %in% names(counts)){stop("No column for sample ", sample.name, " in counts matrix")}
-  if (length(setdiff(names(counts)[1:5], c("target", "chromosome", "start", "end", "gc"))) > 0){
-    stop("First five columns of counts matrix must be target, chromosome, start, end, gc")
-  }
-  if (length(setdiff(unique(counts$chromosome), seq(1:22))) > 0) {
-    # remove sex chromosomes
-    cat("Trying to remove sex chromosomes and 'chr' prefixes\n")
-    counts <- subset(counts, !chromosome %in% c("chrX", "chrY", "X", "Y"))
-    if (sum(grepl("chr", counts$chromosome))==length(counts$chromosome)){
-      counts$chromosome <- gsub("chr", "", counts$chromosome)
-    }
-    counts$chromosome <- as.numeric(counts$chromosome)
-    if (length(setdiff(unique(counts$chromosome), seq(1:22))) > 0) 
-      stop("chromosome must take value in range 1-22 (support for sex chromosomes to come)")
-  }
-  library(plyr)
-  counts <- arrange(counts, chromosome, start)
-  if (p <= 0){
-    stop("parameter p must be positive")
-  }
-  if (Tnum <= 0){
-    stop("parameter Tnum must be positive")
-  }
-  if (D <= 0){
-    stop("parameter D must be positive")
-  }
-  if (numrefs <= 0){
-    stop("parameter numrefs must be positive")
-  }
-  num.cnvs <- nrow(xcnvs)
-  cnv.intervals <- as.character(xcnvs$INTERVAL)
-  # if no emission probs matrix is passed in, generate a new one
-  if (is.null(emission.probs)){
-    l <- CANOESCOV::CallCNVs(sample.name, counts, p, Tnum=6, D=70000, numrefs=30, get.dfs=T)
-    emission.probs <- l[['emission.probs']]
-    distances <- l[['distances']]
-  }
-  forward.m <- GetForwardMatrix(emission.probs, distances, p, Tnum, D)
-  backward.m <- GetBackwardMatrix(emission.probs, distances, p, Tnum, D)
-  qualities <- matrix(0, nrow=num.cnvs, ncol=5, 
-                      dimnames=list(cnv.intervals, 
-                                    c("INTERVAL", "NQDel", "SQDel", "NQDup", "SQDup")))
-  for (i in 1:num.cnvs){
-    interval <- as.character(xcnvs[i, "INTERVAL"])
-    targets <- as.numeric(strsplit(as.character(xcnvs[i, "TARGETS"]), ".", fixed=T)[[1]][c(1,3)])
-    left.target <- targets[1]
-    right.target <- targets[2]
-    likelihoods <- GetModifiedLikelihood(forward.m, backward.m, 
-                                         emission.probs, distances, 
-                                         left.target, right.target, 
-                                         c(DUPLICATION, DELETION), p, Tnum, D)
-    modified.likelihood <- likelihoods[1]; 
-    unmodified.likelihood <- likelihoods[2]
-    Prob.All.Normal <- exp(modified.likelihood - unmodified.likelihood)
-    likelihoods <- GetModifiedLikelihood(forward.m, backward.m, 
-                                         emission.probs, distances, 
-                                         left.target, right.target, DELETION, p, Tnum, D)
-    modified.likelihood <- likelihoods[1]; 
-    unmodified.likelihood <- likelihoods[2]
-    Prob.No.Deletion <- exp(modified.likelihood - unmodified.likelihood)
-    likelihoods <- GetModifiedLikelihood(forward.m, backward.m, 
-                                         emission.probs, distances, 
-                                         left.target, right.target, DUPLICATION, p, Tnum, D)
-    modified.likelihood <- likelihoods[1]; 
-    unmodified.likelihood <- likelihoods[2]
-    Prob.No.Duplication <- exp(modified.likelihood - unmodified.likelihood)
-    # Check if probabilities greater than 1 are numerical error or bug
-    Phred <- function(prob){
-      return(round(min(99, -10 * log10(1 - prob))))
-    }
-    qualities[i, "NQDel"] <- Phred(Prob.No.Deletion)       
-    qualities[i, "SQDel"] <- Phred(Prob.No.Duplication - Prob.All.Normal)
-    qualities[i, "NQDup"] <- Phred(Prob.No.Duplication)       
-    qualities[i, "SQDup"] <- Phred(Prob.No.Deletion - Prob.All.Normal)
-    qualities[i, "INTERVAL"] <- interval
-  }
-  qualities <- as.data.frame(qualities, stringsAsFactors=F)
-  qualities$NQDel <- as.integer(qualities$NQDel)
-  qualities$NQDup <- as.integer(qualities$NQDup)
-  qualities$SQDel <- as.integer(qualities$SQDel)
-  qualities$SQDup <- as.integer(qualities$SQDup)
-  return(qualities)
-}
-
-# returns data frame with distance to each target from the previous target 
-# (0 in the case of the first target on chromosome 1, a very big number
-# for the first target on each other chromosome--this resets the HMM
-# for each chromosome)
-GetDistances <- function(counts){
-  chromosome <- counts[, "chromosome"]
-  startbase <- counts[, "start"]
-  num.nonzero.exons <- length(startbase)
-  distances <- c(0, startbase[2:num.nonzero.exons] - 
-                   startbase[1:(num.nonzero.exons - 1)] + 
-                   1000000000000 * (chromosome[2:num.nonzero.exons] - 
-                                      chromosome[1:(num.nonzero.exons - 1)]))
-  return(data.frame(target=counts[, "target"], distance=distances))
-}
-
-EstimateVariance <- function(counts, ref.sample.names, sample.weights){
-  library(Hmisc)
-  counts$var <- apply(counts[, ref.sample.names], 1, wtd.var, sample.weights, normwt=T)
-  set.seed(1)
-  counts.subset <- counts[sample(nrow(counts), min(36000, nrow(counts))), ]
-  library(mgcv)
-  # can't do gamma regression with negative 
-  counts.subset$var[counts.subset$var==0] <- 0.1 
-  fit <- gam(var ~ s(mean) + s(gc), family=Gamma(link=log), data=counts.subset)
-  # we don't want variance less than Poisson
-  # we take maximum of genome-wide estimate, method of moments estimate
-  # and Poisson variance
-  v.estimate <- pmax(predict(fit, counts, type="response"), counts$var, 
-                     counts$mean * 1.01)
-  return(data.frame(target=counts$target, var.estimate=v.estimate))
-}
-
-EmissionProbs <- function(test.counts, target.means, 
-                                      var.estimate, targets){
-  num.targets <- length(test.counts)
-  # calculate the means for the deletion, normal and duplication states
-  state.target.means <- t(apply(data.frame(x=target.means), 1, function(x) c(x*1/2, x, x*3/2)))
-  # calculate the expected size (given the predicted variance)
-  size <- target.means ^ 2 / (var.estimate - target.means)
-  emission.probs <- matrix(NA, num.targets, 4)
-  colnames(emission.probs) <- c("target", "delprob", "normalprob", "dupprob")
-  # calculate the emission probabilities given the read count
-  size.del <- size
-  size.dup <- size
-  size.del <- size / 2
-  size.dup <- size * 3 / 2
-  emission.probs[, "delprob"] <- dnbinom(
-    test.counts,
-    mu=state.target.means[, 1],
-    size=size.del, log=T)
-  emission.probs[, "normalprob"] <- dnbinom(
-    test.counts,
-    mu=state.target.means[, 2],
-    size=size, log=T)
-  emission.probs[, "dupprob"] <- dnbinom(
-    test.counts,
-    mu=state.target.means[, 3],
-    size=size.dup, log=T)
-  emission.probs[, "target"] <- targets
-  # some values may be infinite as a result of extreme read count
-  row.all.inf <- which(apply(emission.probs, 1, function(x){all(is.infinite(x))}))
-  if (length(row.all.inf) > 0){
-    for (i in row.all.inf){
-      if (test.counts[i] >= state.target.means[i, 3]){
-        emission.probs[i, 2:4] <- c(-Inf, -Inf, -0.01)
-      }
-      else if (test.counts[i] <= state.target.means[i, 1]){
-        emission.probs[i, 2:4] <- c(-0.01, -Inf, -Inf)
-      }
-      else emission.probs[i, 2:4] <- c(-Inf, -0.01, -Inf)
-    }
-  }
-  return(emission.probs)
-}
-
-# Viterbi algorithm
-Viterbi <- function(emission.probs.matrix, distances, p, Tnum, D){
-  targets <- emission.probs.matrix[, 1]
-  emission.probs.matrix <- as.matrix(emission.probs.matrix[, 2:4])
-  num.exons <- dim(emission.probs.matrix)[1]
-  viterbi.matrix <- matrix(NA, nrow=num.exons, ncol=NUM.STATES)
-  viterbi.pointers <- matrix(NA, nrow=num.exons, ncol=NUM.STATES)
-  initial.state <- log(c(0.0075 / NUM.ABNORMAL.STATES, 1 - 0.0075, 0.0075 / NUM.ABNORMAL.STATES))
-  viterbi.matrix[1, ] <- initial.state + emission.probs.matrix[1,]
-  for (i in 2:num.exons) {
-    temp.matrix <- viterbi.matrix[i - 1, ] + GetTransitionMatrix(distances$distance[i], p, Tnum, D)
-    viterbi.matrix[i, ] <- apply(temp.matrix, 2, max)
-    emission.probs <- c(emission.probs.matrix[i,])
-    dim(emission.probs) <- c(NUM.STATES, 1)
-    viterbi.matrix[i, ] <- viterbi.matrix[i, ] + emission.probs
-    viterbi.pointers[i, ] <- apply(temp.matrix, 2, which.max)
-  }
-  viterbi.states = vector(length = num.exons)
-  viterbi.states[num.exons] = which.max(viterbi.matrix[num.exons, ])
-  for (i in (num.exons - 1):1) {
-    viterbi.states[i] <- viterbi.pointers[i + 1, viterbi.states[i + 1]]
-  }
-  return(data.frame(target=targets, viterbi.state=viterbi.states))
-}
-
-# returns a transition matrix
-#                              to state
-#                    deletion   normal    duplication
-#           deletion   
-#from state   normal
-#        duplication
-GetTransitionMatrix <- function(distance, p, Tnum, D){
-  q <- 1 / Tnum
-  f = exp(-distance/D)
-  prob.abnormal.abnormal <- f * (1 - q) + (1 - f) * p
-  prob.abnormal.normal <- f * q + (1 - f) * (1 - 2 * p)
-  prob.abnormal.diff.abnormal <- (1 - f) * p
-  prob.normal.normal <- 1 - 2 * p
-  prob.normal.abnormal <- p
-  transition.probs <- 
-    c(prob.abnormal.abnormal, prob.abnormal.normal, prob.abnormal.diff.abnormal, 
-      prob.normal.abnormal, prob.normal.normal, prob.normal.abnormal,
-      prob.abnormal.diff.abnormal, prob.abnormal.normal, prob.abnormal.abnormal)
-  transition.m = log(matrix(transition.probs, NUM.STATES, NUM.STATES, byrow=TRUE))
-  return(transition.m)
-}
-
-# adds two log-space probabilities using the identity
-# log (p1 + p2) = log p1 + log(1 + exp(log p2 - log p1))
-AddTwoProbabilities <- function(x, y){
-  if (is.infinite(x)) return (y)
-  if (is.infinite(y)) return (x)
-  sum.probs <- max(x, y) + log1p(exp(-abs(x - y)))
-}
-
-# adds multiple log-space probabilities
-SumProbabilities <- function(x){
-  sum.probs <- x[1]
-  for (i in 2:length(x)){
-    sum.probs <- AddTwoProbabilities(sum.probs, x[i])
-  }
-  return(sum.probs)
-}
-
-# finds the data likelihood by summing the product of the corresponding 
-# forward and backward probabilities at any token (should give the same value
-# regardless of the token)
-GetLikelihood <- function(forward.matrix, backward.matrix, x){
-  SumProbabilities(forward.matrix[x, ] + backward.matrix[x, ])
-}
-
-# get the forward probabilities
-GetForwardMatrix <- function(emission.probs.matrix, distances, p, Tnum, D){
-  emission.probs.matrix <- as.matrix(emission.probs.matrix[, 2:4])
-  num.exons <- dim(emission.probs.matrix)[1]
-  forward.matrix <- matrix(NA, nrow=num.exons, ncol=NUM.STATES)   # matrix to hold forward probabilities
-  initial.state <- log(c(0.0075 / NUM.ABNORMAL.STATES, 1 - 0.0075, 0.0075 / NUM.ABNORMAL.STATES))
-  forward.matrix[1, ] <- initial.state + emission.probs.matrix[1, ]
-  for (i in 2:num.exons){
-    # compute matrix with probability we were in state j and are now in state i
-    # in temp.matrix[j, i] (ignoring emission of current token)
-    temp.matrix <- forward.matrix[i - 1, ] + GetTransitionMatrix(distances$distance[i], p, Tnum, D)
-    # find the probability that we are in each of the three states
-    sum.probs <- apply(temp.matrix, 2, SumProbabilities)
-    forward.matrix[i, ] <- sum.probs + emission.probs.matrix[i, ]
-  }  
-  return(forward.matrix)  
-}
-
-# get the backward probabilities
-GetBackwardMatrix <- function(emission.probs.matrix, distances, 
-                                  p, Tnum, D){
-  emission.probs.matrix <- as.matrix(emission.probs.matrix[, 2:4])
-  num.exons <- dim(emission.probs.matrix)[1]
-  backward.matrix <- matrix(NA, nrow=num.exons, ncol=NUM.STATES)   # matrix to hold backward probabilities
-  initial.state <- log(c(0.0075 / NUM.ABNORMAL.STATES, 1 - 0.0075, 0.0075 / NUM.ABNORMAL.STATES))
-  backward.matrix[num.exons, ] <- rep(0, NUM.STATES)
-  for (i in (num.exons - 1):1){
-    temp.matrix <- GetTransitionMatrix(distances$distance[i+1], p, Tnum, D) + 
-      matrix(backward.matrix[i + 1, ], 3, 3, byrow=T) +
-      matrix(emission.probs.matrix[i+1, ], 3, 3, byrow=T)
-    backward.matrix[i, ] <- apply(temp.matrix, 1, SumProbabilities)
-  }  
-  final.prob <- backward.matrix[1, ] + emission.probs.matrix[1, ] + initial.state
-  return(backward.matrix)  
-}
-
-# find the likelihood of the data given that certain states are disallowed
-# between start target and end target
-GetModifiedLikelihood <- function(forward.matrix, backward.matrix, emission.probs.matrix, distances, 
-                                      start.target, end.target, disallowed.states, p, Tnum, D){
-  targets <- emission.probs.matrix[, 1]
-  emission.probs.matrix <- as.matrix(emission.probs.matrix[, 2:4])
-  # there may be missing targets in this sample, we genotype the largest stretch of 
-  # targets that lie in the CNV
-  left.target <- min(which(targets >= start.target))
-  right.target <- max(which(targets <= end.target))
-  num.exons <- dim(emission.probs.matrix)[1]
-  unmodified.likelihood <- GetLikelihood(forward.matrix, 
-                                             backward.matrix, min(right.target + 1, num.exons))
-  #right.target or left.target may be empty
-  
-  #if (right.target >= left.target) return(c(NA, unmodified.likelihood))
-  stopifnot(right.target >= left.target)
-  modified.emission.probs.matrix <- emission.probs.matrix
-  modified.emission.probs.matrix[left.target:right.target, 
-                                 disallowed.states] <- -Inf
-  
-  # if the start target is the first target we need to recalculate the 
-  # forward probabilities
-  # for that target, using the modified emission probabilities
-  if (left.target == 1){
-    initial.state <- log(c(0.0075 / NUM.ABNORMAL.STATES, 1 - 0.0075, 0.0075 / NUM.ABNORMAL.STATES))
-    forward.matrix[1, ] <- initial.state + modified.emission.probs.matrix[1, ]
-    left.target <- left.target + 1
-  } 
-  for (i in seq(left.target, min(right.target + 1, num.exons))){
-    # compute matrix with probability we were in state j and are now in state i
-    # in temp.matrix[j, i] (ignoring emission of current token)
-    temp.matrix <- forward.matrix[i - 1, ] + GetTransitionMatrix(distances$distance[i], p, Tnum, D)
-    # find the probability that we are in each of the three states
-    sum.probs <- apply(temp.matrix, 2, SumProbabilities) 
-    if (!i == (right.target + 1)){
-      forward.matrix[i, ] <- sum.probs + modified.emission.probs.matrix[i, ]
-    } else{
-      forward.matrix[i, ] <- sum.probs + emission.probs.matrix[i, ]
-    }
-  }  
-  # find the modified likelihood of the sequence
-  modified.likelihood <- GetLikelihood(forward.matrix, backward.matrix, min(right.target + 1, num.exons))
-  return(c(modified.likelihood, unmodified.likelihood))
-}
-
-SummarizeCNVs <- function(cnv.targets, counts, sample.name, state){
-  sample.name <- sample.name
-  cnv.type <- ifelse(state==3, "DUP", "DEL")
-  cnv.start <- min(cnv.targets$target)
-  cnv.end <- max(cnv.targets$target)
-  cnv.chromosome <- counts[cnv.start, "chromosome"]
-  cnv.start.base <- counts[cnv.start, "start"]
-  cnv.start.target <- counts[cnv.start, "target"]
-  cnv.end.base <- counts[cnv.end, "end"]
-  cnv.end.target <- counts[cnv.end, "target"]
-  cnv.kbs <- (cnv.end.base - cnv.start.base) / 1000
-  cnv.midbp <- round((cnv.end.base - cnv.start.base) / 2) + cnv.start.base
-  cnv.targets <- paste(cnv.start.target, "..", cnv.end.target, sep="")
-  cnv.interval <- paste(cnv.chromosome, ":", cnv.start.base, "-", cnv.end.base, sep="")
-  num.targets <- cnv.end.target - cnv.start.target + 1
-  return(data.frame(sample.name=sample.name, cnv.type=cnv.type, cnv.interval=cnv.interval, 
-                    cnv.kbs=cnv.kbs, cnv.chromosome=cnv.chromosome, 
-                    cnv.midbp=cnv.midbp, cnv.targets=cnv.targets, num.targets=num.targets))
-}
-
-PrintCNVs <- function(test.sample.name, viterbi.state, 
-                      nonzero.counts){  
-  consecutiveGroups <- function(sequence){
-    num <- length(sequence)
-    group <- 1
-    groups <- rep(0, num)
-    groups[1] <- group
-    if (num > 1){
-      for (i in 2:num){
-        if (!sequence[i] == (sequence[i - 1] + 1)) group <- group + 1
-        groups[i] <- group
-      }
-    }
-    return(groups)
-  }
-  num.duplications <- 0
-  num.deletions <- 0
-  for (state in c(1, 3)){
-    cnv.targets <- which(viterbi.state$viterbi.state == state)
-    if (!length(cnv.targets) == 0){
-      groups <- consecutiveGroups(cnv.targets)
-      library(plyr)
-      cnvs.temp.df <- ddply(data.frame(target=cnv.targets, group=groups), 
-                            "group", SummarizeCNVs, nonzero.counts, test.sample.name, 
-                            state)
-      if (state == 1){
-        deletions.df <- cnvs.temp.df
-        if (!is.null(dim(deletions.df))){
-          num.deletions <- dim(deletions.df)[1]
-        }
-      } else {
-        duplications.df <- cnvs.temp.df
-        if (!is.null(dim(duplications.df))){
-          num.duplications <- dim(duplications.df)[1]
-        }
-      }
-    }
-  }
-  num.calls <- num.deletions + num.duplications
-  cat(num.calls, "CNVs called in sample", test.sample.name, "\n")
-  if (num.deletions == 0 & num.duplications == 0){
-    df <- data.frame(SAMPLE=character(0), CNV=character(0), INTERVAL=character(0), 
-                     KB=numeric(0), CHR=character(0), 
-                     MID_BP=numeric(), TARGETS=character(0), NUM_TARG=numeric(0), Q_SOME=numeric(0), MLCN=numeric(0))
-    return(df)
-  }
-  if (num.deletions > 0 & num.duplications > 0){
-    cnvs.df <- rbind(deletions.df, duplications.df)
-  } else {
-    ifelse(num.deletions > 0, 
-           cnvs.df <- deletions.df, cnvs.df <- duplications.df)
-  }
-  xcnv <- cbind(cnvs.df[, c("sample.name", "cnv.type", "cnv.interval", 
-                      "cnv.kbs", "cnv.chromosome", "cnv.midbp", 
-                      "cnv.targets", "num.targets")], 0)
-  colnames(xcnv) <- c("SAMPLE", "CNV", "INTERVAL", "KB", "CHR", "MID_BP", "TARGETS",
-                      "NUM_TARG", "MLCN")
-  xcnv$Q_SOME <- NA
-  return(xcnv)
-}
-
-CalcCopyNumber <- function(data, cnvs, homdel.mean){
-  for (i in 1:nrow(cnvs)){
-    cnv <- cnvs[i, ]
-    targets <- as.numeric(unlist(strsplit(as.character(cnv$TARGETS), "..", fixed=T)))
-    cnv.data <- subset(data, target >= targets[1] & target <= targets[2])
-    state.target.means <- t(apply(data.frame(x=cnv.data$countsmean), 1, 
-                                  function(x) c(C1=x*1/2, C2=x, C3=x*3/2, 
-                                                C4=x * 2, C5=x * 5/2, C6=x*6/2)))
-    # calculate the expected size (given the predicted variance)
-    size <- cnv.data$countsmean ^ 2 / (cnv.data$varestimate - cnv.data$countsmean)
-    emission.probs <- matrix(NA, nrow(cnv.data), 7)
-    colnames(emission.probs) <- c("C0", "C1", "C2", "C3", "C4", "C5", "C6")
-    #colnames(emission.probs) <- c("target", "delprob", "normalprob", "dupprob")
-    # calculate the emission probabilities given the read count
-    emission.probs[, 1] <- dpois(cnv.data$sample, homdel.mean, log=T)
-    for (s in 1:6){
-      size.state <- size * s/2
-      emission.probs[, s+1] <- dnbinom(cnv.data$sample, mu=state.target.means[, s], 
-                                       size=size.state, log=T)
-    }
-    cs <- colSums(emission.probs)
-    ml.state <- which.max(cs) - 1
-    if (ml.state==2){
-      ml.state <- ifelse(cnv$CNV=="DEL", 1, 3)
-    }
-    cnvs$MLCN[i] <- ml.state
-  }  
-  return(cnvs)
-}
diff --git a/R/CANOESCOV/R/run_CANOESCOV.R b/R/CANOESCOV/R/run_CANOESCOV.R
index 980f6b1..62e2cfd 100644
--- a/R/CANOESCOV/R/run_CANOESCOV.R
+++ b/R/CANOESCOV/R/run_CANOESCOV.R
@@ -1,53 +1,56 @@
 library(methods)
-library(CODEX)
 
-run_CANOESCOV <- function(reference_set_select_method,
-                          num_of_samples_in_reference_set,
-                          cov_table){
+run_CANOESCOV <- function(input_cov_table,
+                          input_bed,
+                          reference_sample_set_file,
+                          output_calls_file){
 
-  sampname <- unique(cov_table[,"sample_name"])
-  targets <- cov_table[,c("target_id", "chr", "pos_min", "pos_max")]
-  targets <- targets[!duplicated(targets[,"target_id"]),]
-  targets <- targets[with(targets, order(target_id)), ]
+  con <- file(reference_sample_set_file, open='r')
+  reference_sample_set <- readLines(con)
+  Y <- read.csv(input_cov_table)
+  sampname <- colnames(Y)
+  targets <- read.delim(input_bed)
+  rownames(Y) <- 1:nrow(Y)
+  rownames(targets) <- 1:nrow(targets)
   
   calls <- data.frame(matrix(nrow=0, ncol=13))
-  chrs <- c(1:22, "X", "Y", paste0("chr",c(1:22, "X", "Y")))
-  for(chr in chrs) {
-    targets_for_chr <- targets[targets[,"chr"] == chr,]
-    ref <- IRanges(start = targets_for_chr[,"pos_min"], end = targets_for_chr[,"pos_max"])
-    if (length(ref) == 0) {    # 0 elements for specified chromosome in bed
-      next()
-    }
-    Y <- coverageObj1(cov_table, sampname, targets_for_chr, chr)$Y
-    Y <- cbind(rep(chr, nrow(Y)), start(ref), end(ref), Y)
-    target_length <- c()
-    for (i in 1:nrow(Y)) {
-      target_length <- c(target_length, width(ref[i]))
-    }
+  chr <- targets[1,'chr']
+  ref <- IRanges(start = targets[,"st_bp"], end = targets[,"ed_bp"])
+  if (length(ref) == 0) {    # 0 elements for specified chromosome in bed
+    next()
+  }
+  Y <- cbind(rep(chr, nrow(Y)), start(ref), end(ref), Y)
+  target_length <- c()
+  for (i in 1:nrow(Y)) {
+    target_length <- c(target_length, width(ref[i]))
+  }
 
-    # TODO better transformation
-    write.table(Y, file=paste('cov_', chr, '.tsv', sep=""), quote=FALSE, sep="\t", col.names = F, row.names = F)
-    canoes.reads <- read.table(paste('cov_', chr, '.tsv', sep=""))
+  # TODO better transformation
+  write.table(Y, file=paste('cov_', chr, '.tsv', sep=""), quote=FALSE, sep="\t", col.names = F, row.names = F)
+  canoes.reads <- read.table(paste('cov_', chr, '.tsv', sep=""))
 
-    gc <- getgc(chr, ref)
-    target <- seq(1, nrow(Y))
-    canoes.reads <- cbind(target, gc, canoes.reads)
-    sampname <- as.vector(sampname)
-    names(canoes.reads) <- c("target", "gc", "chromosome", "start", "end", sampname)
-    colnames(canoes.reads) <- c("target", "gc", "chromosome", "start", "end", sampname)
-    write.table(as.data.frame(canoes.reads),file="canoes.reads.csv", quote=F, sep=",",row.names=T,col.names=T)
-    xcnv.list <- vector('list', length(sampname))
-    for (i in 1:length(sampname)){
-      xcnv.list[[i]] <- CANOESCOV::CallCNVs(sample.name=sampname[i],
-                                            counts=canoes.reads,
-                                            reference_set_select_method=reference_set_select_method,
-                                            num_of_samples_in_reference_set=num_of_samples_in_reference_set,
-                                            target_length=target_length)
+  gc <- getgc(chr, ref)
+  target <- seq(1, nrow(Y))
+  canoes.reads <- cbind(target, gc, canoes.reads)
+  sampname <- as.vector(sampname)
+  names(canoes.reads) <- c("target", "gc", "chromosome", "start", "end", sampname)
+  colnames(canoes.reads) <- c("target", "gc", "chromosome", "start", "end", sampname)
+  write.table(as.data.frame(canoes.reads),file="canoes.reads.csv", quote=F, sep=",",row.names=T,col.names=T)
+  xcnv.list <- vector('list', length(sampname))
+  for (i in 1:length(reference_sample_set)) {
+    if (reference_sample_set[[i]] == '') {
+      next()
     }
-    xcnvs <- do.call('rbind', xcnv.list)
-    if (nrow(calls)==0){calls <- matrix(nrow=0, ncol=ncol(xcnvs))} 
-    calls <- rbind(calls, xcnvs)
+    samples <- unlist(strsplit(reference_sample_set[[i]], ','))
+    actual_sample <- samples[1]
+    reference_samples <- samples[-1]
+    xcnv.list[[i]] <- CANOESCOV::CallCNVs(sample.name=actual_sample,
+                                          reference.samples=reference_samples,
+                                          counts=canoes.reads)
   }
+  xcnvs <- do.call('rbind', xcnv.list)
+  if (nrow(calls)==0){calls <- matrix(nrow=0, ncol=ncol(xcnvs))} 
+  calls <- rbind(calls, xcnvs)
 
   # unify results format
   if (nrow(calls) != 0) {
@@ -80,7 +83,7 @@ run_CANOESCOV <- function(reference_set_select_method,
   calls[colnames(calls) == 'ed_bp'] <- as.character(unlist(calls[colnames(calls) == 'ed_bp']))
   calls[colnames(calls) == 'st_exon'] <- as.character(unlist(calls[colnames(calls) == 'st_exon']))
   calls[colnames(calls) == 'ed_exon'] <- as.character(unlist(calls[colnames(calls) == 'ed_exon']))
-  calls
+  write.csv(calls, output_calls_file, row.names=F)
 }
 
 #   SAMPLE CNV             INTERVAL     KB CHR   MID_BP    TARGETS NUM_TARG MLCN Q_SOME
diff --git a/R/CNV.SIMULATOR/DESCRIPTION b/R/CNV.SIMULATOR/DESCRIPTION
new file mode 100644
index 0000000..0acb0de
--- /dev/null
+++ b/R/CNV.SIMULATOR/DESCRIPTION
@@ -0,0 +1,18 @@
+Package: CNV.SIMULATOR
+Title: CNV.SIMULATOR A Package To Generate Artificial CNVs
+Version: 0.0.1
+Authors@R: c(
+    person("Tomasz", "Gambin", email = "tgambin@gmail.com", role = c("aut", "cre")),
+    person("Marek", "Wiewiórka", email = "marek.wiewiorka@gmail.com", role = c("aut")),
+    person("Wiktor", "Kuśmirek", email = "kusmirekwiktor@gmail.com", role = c("aut")))
+Description: An package to generate artificial CNVs.
+Depends:
+    R (>= 3.2.3),
+    plyr (>= 1.8.4),
+    nnls (>= 1.4.0),
+    Hmisc (>= 4.0.0),
+    mgcv (>= 1.8.0)
+License: GPL-3
+Encoding: UTF-8
+LazyData: true
+RoxygenNote: 6.0.1.9000
diff --git a/R/CNV.SIMULATOR/NAMESPACE b/R/CNV.SIMULATOR/NAMESPACE
new file mode 100644
index 0000000..884a631
--- /dev/null
+++ b/R/CNV.SIMULATOR/NAMESPACE
@@ -0,0 +1,2 @@
+# Generated by roxygen2: fake comment so roxygen2 overwrites silently.
+exportPattern("^[^\\.]")
diff --git a/R/CNV.SIMULATOR/R/functions_CNV.SIMULATOR.R b/R/CNV.SIMULATOR/R/functions_CNV.SIMULATOR.R
new file mode 100644
index 0000000..779ba29
--- /dev/null
+++ b/R/CNV.SIMULATOR/R/functions_CNV.SIMULATOR.R
@@ -0,0 +1,120 @@
+
+# build_intersection_matrix <- function(calls, refs){
+#   intersection_matrix <- matrix(data=as.integer(0), nrow = nrow(calls), ncol = nrow(refs))
+#   if (nrow(intersection_matrix) > 0 && ncol(intersection_matrix) > 0) {
+#     for (i in 1:nrow(intersection_matrix)) {
+#       for (j in 1:ncol(intersection_matrix)) {
+#         if (as.character(calls[i,"sample_name"]) == as.character(refs[j,"sample_name"]) && 
+#             as.character(calls[i,"chr"]) == as.character(refs[j,"chr"]) && 
+#             as.character(calls[i,"cnv"]) == as.character(refs[j,"cnv"])) {
+#           overlap_length <- calc_overlap_length(calls[i,"st_bp"], 
+#                                                 calls[i,"ed_bp"], 
+#                                                 refs[j,"st_bp"], 
+#                                                 refs[j,"ed_bp"])
+#           call_length <- calls[i,"ed_bp"] - calls[i,"st_bp"]
+#           ref_length <- refs[j,"ed_bp"] - refs[j,"st_bp"]
+#           overlap_factor <- overlap_length / ((call_length + ref_length) / 2) * 100
+#           intersection_matrix[i,j] <- round(overlap_factor, 2)
+#         }
+#       }
+#     }
+#   }
+#   intersection_matrix
+# }
+# 
+# filter_intersection_matrix_by_overlap_factor <- function(intersection_matrix, min_overlap_factor){
+#   if (nrow(intersection_matrix) > 0 && ncol(intersection_matrix) > 0) {
+#     for (i in 1:nrow(intersection_matrix)) {
+#       for (j in 1:ncol(intersection_matrix)) {
+#         if (intersection_matrix[i,j] < min_overlap_factor) {
+#           intersection_matrix[i,j] <- 0.00
+#         }
+#       }
+#     }
+#   }
+#   intersection_matrix
+# }
+# 
+# calc_number_of_different_copy_number_for_cnv <- function(cnv, calls){
+#   copy_no <- c()
+#   for (i in 1:nrow(calls)) {
+#     if (as.character(calls[i,"chr"]) == as.character(cnv[1,"chr"]) &&
+#         calls[i,"st_bp"] == cnv[1,"st_bp"] &&
+#         calls[i,"ed_bp"] == cnv[1,"ed_bp"] &&
+#         !is.na(calls[i,"copy_no"])) {
+#       copy_no <- c(copy_no, calls[i,"copy_no"])
+#     }
+#   }
+#   length(unique(copy_no))
+# }
+# 
+# calc_NA_rate_for_cnv <- function(cnv, calls){
+#   num_of_samples <- length(unique(calls[,"sample_name"]))
+#   num_of_NA <- 0
+#   for (i in 1:nrow(calls)) {
+#     if (as.character(calls[i,"chr"]) == as.character(cnv[1,"chr"]) &&
+#         calls[i,"st_bp"] == cnv[1,"st_bp"] &&
+#         calls[i,"ed_bp"] == cnv[1,"ed_bp"] &&
+#         is.na(calls[i,"cnv"])) {
+#       num_of_NA <- num_of_NA + 1
+#     }
+#   }
+#   round(num_of_NA / num_of_samples, 2)
+# }
+# 
+# calc_cnv_frequency <- function(cnv, calls){
+#   num_of_samples <- length(unique(calls[,"sample_name"]))
+#   num_of_same_cnv <- 0
+#   for (i in 1:nrow(calls)) {
+#     if (as.character(calls[i,"chr"]) == as.character(cnv[1,"chr"]) &&
+#         calls[i,"st_bp"] == cnv[1,"st_bp"] &&
+#         calls[i,"ed_bp"] == cnv[1,"ed_bp"] &&
+#         as.character(calls[i,"cnv"]) == as.character(cnv[1,"cnv"])) {
+#       num_of_same_cnv <- num_of_same_cnv + 1
+#     }
+#   }
+#   round(num_of_same_cnv / num_of_samples, 2)
+# }
+# 
+# calc_overlap_length <- function(min1, max1, min2, max2){
+#   overlap_length <- max(0, min(max1, max2) - max(min1, min2))
+#   overlap_length
+# }
+# 
+# calc_quality_statistics <- function(TP, FP, TN, FN){
+#   sensitivity <- if (TP + FN > 0) TP / (TP + FN) else 0
+#   specificity <- if (TN + FP > 0) TN / (TN + FP) else 0
+#   precision <- if (TP + FP > 0) TP / (TP + FP) else 0
+#   accuracy <- if (TP + TN + FP + FN > 0) (TP + TN) / (TP + TN + FP + FN) else 0
+#   return(list(sensitivity=round(sensitivity, digits=3), 
+#               specificity=round(specificity, digits=3), 
+#               precision=round(precision, digits=3), 
+#               accuracy=round(accuracy, digits=3)))
+# }
+# 
+# calc_confusion_matrix <- function(intersection_matrix, num_of_original_targets_in_refs, num_of_original_samples_in_refs){
+#   # TP
+#   TP <- 0
+#   if (nrow(intersection_matrix) > 0) {
+#     for (i in 1:nrow(intersection_matrix)) {
+#       if (sum(intersection_matrix[i,] != 0) != 0) {
+#         TP <- TP + 1
+#       }
+#     }
+#   }
+#   # FP
+#   FP <- nrow(intersection_matrix) - TP
+#   # FN
+#   FN <- 0
+#   if (ncol(intersection_matrix) > 0) {
+#     for (j in 1:ncol(intersection_matrix)) {
+#       if (sum(intersection_matrix[,j] != 0) == 0) {
+#         FN <- FN + 1
+#       }
+#     }
+#   }
+#   # TN
+#   TN <- (num_of_original_targets_in_refs * num_of_original_samples_in_refs) - FN
+#   return(list(TP=TP, FP=FP, TN=TN, FN=FN))
+# }
+# 
diff --git a/R/CNV.SIMULATOR/R/run_CNV.SIMULATOR.R b/R/CNV.SIMULATOR/R/run_CNV.SIMULATOR.R
new file mode 100644
index 0000000..04ba8d1
--- /dev/null
+++ b/R/CNV.SIMULATOR/R/run_CNV.SIMULATOR.R
@@ -0,0 +1,63 @@
+run_CNV.SIMULATOR <- function(input_cov_table,
+                              input_bed,
+                              input_males,
+                              input_females,
+                              output_cov_table,
+                              output_generated_cnvs,
+                              number_of_cnvs_per_sample,
+                              min_number_of_regions,
+                              max_number_of_regions,
+                              simulation_mode){
+
+
+  Y <- read.csv(input_cov_table)
+  sampname <- colnames(Y)
+  targets <- read.delim(input_bed)
+  males <- as.character(unlist(read.table(input_males, sep = ",")))
+  females <- as.character(unlist(read.table(input_females, sep = ",")))
+  generated_cnvs <- matrix(nrow=0, ncol=6)
+  colnames(generated_cnvs) <- c('sample_name','cnv','chr','st_bp','ed_bp','copy_no')
+  if (simulation_mode == "downsample") {
+    downsample_factor <- 0.5
+    for (sample in sampname) {
+      print(paste("Generating arficial CNVs in sample: ", sample, sep=""))
+      for (i in 1:number_of_cnvs_per_sample) {
+        cnv_length <- floor(runif(1, min=min_number_of_regions, max=max_number_of_regions))
+        cnv_start <- floor(runif(1, min=1, max=nrow(targets)))
+        for (j in cnv_start:(min(cnv_start+cnv_length-1,nrow(targets)))) {
+          Y[j,sample] <- floor(Y[j,sample]*downsample_factor)
+        }
+        print(paste(sample, targets[cnv_start,1], targets[cnv_start,2], targets[cnv_start+cnv_length,3], sep=" "))
+        generated_cnvs <- rbind(generated_cnvs, matrix(c(sample, 'del', as.character(targets[cnv_start,1]), targets[cnv_start,2], targets[cnv_start+cnv_length,3], '1'), nrow = 1))
+      }
+    }
+    write.csv(Y[,males], paste(output_cov_table, ".males", sep=""), row.names=F, quote=F)
+    write.csv(Y[,females], paste(output_cov_table, ".females", sep=""), row.names=F, quote=F)
+  } else if (simulation_mode == "replace") {
+    Y_males <- Y[,males]
+    Y_females <- Y[,females]
+    for (female in females) {
+      print(paste("Generating arficial CNVs in sample: ", female, sep=""))
+      cov <- cor(Y[,female], Y[,males])
+      covariances <- cov[1,males]
+      male <- names(sort(covariances, decreasing=T)[1:min(1, length(covariances))])
+      #male <- males[floor(runif(1, min=1, max=length(males)))]  # random male sample - in Ximmer tool
+      for (i in 1:number_of_cnvs_per_sample) {
+        cnv_length <- floor(runif(1, min=min_number_of_regions, max=max_number_of_regions))
+        cnv_start <- floor(runif(1, min=1, max=nrow(targets)))
+        for (j in cnv_start:(min(cnv_start+cnv_length-1,nrow(targets)))) {
+          Y_females[j,female] <- Y_males[j,male]
+          Y[j,female] <- Y[j,male]
+        }
+        print(paste(female, targets[cnv_start,1], targets[cnv_start,2], targets[cnv_start+cnv_length,3], sep=" "))
+        generated_cnvs <- rbind(generated_cnvs, matrix(c(female, 'del', as.character(targets[cnv_start,1]), targets[cnv_start,2], targets[cnv_start+cnv_length,3], '1'), nrow = 1))
+      }
+    }
+    write.csv(Y_males, paste(output_cov_table, ".males", sep=""), row.names=F, quote=F)
+    write.csv(Y_females, paste(output_cov_table, ".females", sep=""), row.names=F, quote=F)
+  } else {
+    print("Choose proper simulation mode!!!")
+  }
+  write.csv(Y, output_cov_table, row.names=F, quote=F)
+  write.csv(generated_cnvs, output_generated_cnvs, row.names=F, quote=F)
+}
diff --git a/R/CNV.SIMULATOR/inst/simulate_cnvs.R b/R/CNV.SIMULATOR/inst/simulate_cnvs.R
new file mode 100755
index 0000000..88c58d8
--- /dev/null
+++ b/R/CNV.SIMULATOR/inst/simulate_cnvs.R
@@ -0,0 +1,61 @@
+#!/usr/bin/env Rscript
+options(java.parameters = "-Xmx1512m")
+library(devtools)
+library('CNV.SIMULATOR')
+library(optparse)
+if (length(which(installed.packages()[,1] == "stringr")) == 0){install.packages("stringr",repos="https://cloud.r-project.org/")}
+library(stringr)
+
+option_list <- list(
+  make_option("--input_cov_table", default="public.runner_calls",
+              help="Calls table. [default %default]"),
+  make_option("--input_bed", default="public.runner_calls",
+              help="Calls table. [default %default]"),
+  make_option("--input_males", default="public.runner_calls",
+              help="Calls table. [default %default]"),
+  make_option("--input_females", default="public.runner_calls",
+              help="Calls table. [default %default]"),
+  make_option("--output_cov_table", default="public.runner_calls",
+              help="Calls table. [default %default]"),
+  make_option("--output_generated_cnvs", default="public.runner_calls",
+              help="Calls table. [default %default]"),
+  make_option("--min_number_of_cnvs_per_sample", default="public.runner_calls",
+              help="Calls table. [default %default]"),
+  make_option("--min_number_of_regions", default="public.runner_calls",
+              help="Calls table. [default %default]"),
+  make_option("--max_number_of_regions", default="public.runner_calls",
+              help="Calls table. [default %default]"),
+  make_option("--simulation_mode", default="1",
+              help="Calls table. [default %default]")
+)
+opt <- parse_args(OptionParser(option_list=option_list))
+
+simulate_cnvs <- function(parameters, cov_table){
+  simulated_cnvs <- run_CNV.SIMULATOR(input_cov_table,
+                                      input_bed,
+                                      input_males,
+                                      input_females,
+                                      output_cov_table,
+                                      output_generated_cnvs,
+                                      min_number_of_cnvs_per_sample,
+                                      min_number_of_regions,
+                                      max_number_of_regions,
+                                      simulation_mode
+  )
+  simulated_cnvs
+}
+
+simulated_cnvs <- simulate_cnvs(opt$input_cov_table, 
+                                opt$input_bed, 
+                                opt$input_males, 
+                                opt$input_females, 
+                                opt$output_cov_table, 
+                                opt$output_generated_cnvs, 
+                                opt$min_number_of_cnvs_per_sample, 
+                                opt$min_number_of_regions, 
+                                opt$max_number_of_regions, 
+                                opt$simulation_mode
+)
+print(simulated_cnvs)
+
+
diff --git a/R/CNVCALLER.EVALUATOR/DESCRIPTION b/R/CNVCALLER.EVALUATOR/DESCRIPTION
index f16487e..5dda4e9 100644
--- a/R/CNVCALLER.EVALUATOR/DESCRIPTION
+++ b/R/CNVCALLER.EVALUATOR/DESCRIPTION
@@ -9,7 +9,7 @@ Description: A package to evaluate CNV callers results.
 Depends:
     R (>= 3.2.3),
     devtools (>= 1.13.2),
-    DBI (== 0.8),
+    DBI (>= 0.8),
     optparse (== 1.4.4)
 License: GPL-3
 Encoding: UTF-8
diff --git a/R/CNVCALLER.RUNNER/DESCRIPTION b/R/CNVCALLER.RUNNER/DESCRIPTION
index aa62f1f..1f68fdc 100644
--- a/R/CNVCALLER.RUNNER/DESCRIPTION
+++ b/R/CNVCALLER.RUNNER/DESCRIPTION
@@ -13,7 +13,7 @@ Depends:
     EXOMEDEPTHCOV (>= 0.0.1),
     CANOESCOV (>= 0.0.1),
     devtools (>= 1.13.2),
-    DBI (== 0.8),
+    DBI (>= 0.8),
     optparse (== 1.4.4)
 License: GPL-3
 Encoding: UTF-8
diff --git a/R/CODEXCOV/CODEXCOV.Rproj b/R/CODEXCOV/CODEXCOV.Rproj
deleted file mode 100755
index d848a9f..0000000
--- a/R/CODEXCOV/CODEXCOV.Rproj
+++ /dev/null
@@ -1,16 +0,0 @@
-Version: 1.0
-
-RestoreWorkspace: No
-SaveWorkspace: No
-AlwaysSaveHistory: Default
-
-EnableCodeIndexing: Yes
-Encoding: UTF-8
-
-AutoAppendNewline: Yes
-StripTrailingWhitespace: Yes
-
-BuildType: Package
-PackageUseDevtools: Yes
-PackageInstallArgs: --no-multiarch --with-keep.source
-PackageRoxygenize: rd,collate,namespace
diff --git a/R/CODEXCOV/DESCRIPTION b/R/CODEXCOV/DESCRIPTION
index d9e2385..aacb351 100755
--- a/R/CODEXCOV/DESCRIPTION
+++ b/R/CODEXCOV/DESCRIPTION
@@ -12,10 +12,9 @@ Description: An extended implementation of the CODEX package in R. It extends
 Depends:
     R (>= 3.2.3),
     devtools (>= 1.13.2),
-    DBI (== 0.8),
+    DBI (>= 0.8),
     optparse (== 1.4.4),
-    CODEX (>= 1.8.0),
-    REFERENCE.SAMPLE.SET.SELECTOR (>= 0.0.1)
+    CODEX (>= 1.8.0)
 License: GPL-3
 Encoding: UTF-8
 LazyData: true
diff --git a/R/CODEXCOV/R/functions_CODEXCOV.R b/R/CODEXCOV/R/functions_CODEXCOV.R
index 3f7522c..25ad2de 100644
--- a/R/CODEXCOV/R/functions_CODEXCOV.R
+++ b/R/CODEXCOV/R/functions_CODEXCOV.R
@@ -1,26 +1,5 @@
 library(CODEX)
 
-#' Function Dexcription
-#'
-#' Function description.
-#' @param cov_file
-#' @param sampname
-#' @keywords 
-#' @export
-#' @examples
-#' coverageObj1
-coverageObj1 <- function(cov_table, sampname, targets_for_chr, chr){
-  Y <- matrix(data=as.integer(0), nrow = nrow(targets_for_chr), ncol = 0)
-  for(sample in sampname) {
-    cov_targets_for_sample <- cov_table[cov_table[,"sample_name"] == sample,]
-    cov_targets_for_sample <- cov_targets_for_sample[with(cov_targets_for_sample, order(target_id)), ]
-    Y <- cbind(Y, cov_targets_for_sample[,"read_count"])
-  }
-  colnames(Y) <- sampname
-  rownames(Y) <- targets_for_chr[,"target_id"]
-  return(list(Y=Y))
-}
-
 #' Function Dexcription
 #'
 #' Function description.
diff --git a/R/CODEXCOV/R/run_CODEXCOV.R b/R/CODEXCOV/R/run_CODEXCOV.R
index 78929de..e5565ae 100644
--- a/R/CODEXCOV/R/run_CODEXCOV.R
+++ b/R/CODEXCOV/R/run_CODEXCOV.R
@@ -9,77 +9,57 @@
 run_CODEXCOV <- function(K_from,
                          K_to,
                          lmax,
-                         reference_set_select_method,
-                         num_of_samples_in_reference_set,
-                         cov_table){
-  
-  sampname <- unique(cov_table[,"sample_name"])
-  sampname <- as.character(sampname)
-  targets <- cov_table[,c("target_id", "chr", "pos_min", "pos_max")]
-  targets <- targets[!duplicated(targets[,"target_id"]),]
-  targets <- targets[with(targets, order(target_id)), ]
+                         input_cov_table,
+                         input_bed,
+                         reference_sample_set_file,
+                         output_calls_file){
+
+  con <- file(reference_sample_set_file, open='r')
+  reference_sample_set <- readLines(con)
+  Y <- read.csv(input_cov_table)
+  targets <- read.delim(input_bed)
+  rownames(Y) <- 1:nrow(Y)
+  rownames(targets) <- 1:nrow(targets)
   
   finalcall <- matrix(nrow=0, ncol=13)
-  chrs <- c(1:22, "X", "Y", paste0("chr",c(1:22, "X", "Y")))
-  
-  for(chr in chrs) {
-    targets_for_chr <- targets[targets[,"chr"] == chr,]
-    ref <- IRanges(start = targets_for_chr[,"pos_min"], end = targets_for_chr[,"pos_max"])
-    if (length(ref) == 0) {    # 0 elements for specified chromosome in bed
+  ref <- IRanges(start = targets[,"st_bp"], end = targets[,"ed_bp"])
+
+  ###################################################
+  ### code chunk number 5: gcmapp1
+  ###################################################
+  gcmapp1_result <- gcmapp1(targets[1,'chr'], ref)
+  gc <- gcmapp1_result$gc
+
+  for (i in 1:length(reference_sample_set)) {
+    if (reference_sample_set[[i]] == '') {
       next()
     }
-    ###################################################
-    ### code chunk number 4: coverageObj1
-    ###################################################
-    Y <- coverageObj1(cov_table, sampname, targets_for_chr, chr)$Y
-
-    ###################################################
-    ### code chunk number 5: gcmapp1
-    ###################################################
-    gcmapp1_result <- gcmapp1(chr, ref)
-    gc <- gcmapp1_result$gc
+    samples <- unlist(strsplit(reference_sample_set[[i]], ','))
+    actual_sample <- samples[1]
+    reference_samples <- samples[-1]
+    samples <- sort(samples)
+    Y_subset <- as.matrix(Y[,samples])
 
     ###################################################
     ### code chunk number 7: normObj1
     ###################################################
-    normObj_result <- normObj1(Y, gc, K = K_from:K_to)
+    normObj_result <- normObj1(Y_subset, gc, K = K_from:K_to)
     Yhat <- normObj_result$Yhat
     AIC <- normObj_result$AIC
     BIC <- normObj_result$BIC
     RSS <- normObj_result$RSS
     K <- normObj_result$K
-    
-    ###################################################
-    ### code chunk number 8: normObj2 (eval = FALSE)
-    ###################################################
-    ## normObj_result <- normObj2(Y, gc, K = 1:9, normal_index=seq(1,45,2))
-    ## Yhat <- normObj_result$Yhat
-    ## AIC <- normObj_result$AIC
-    ## BIC <- normObj_result$BIC
-    ## RSS <- normObj_result$RSS
-    ## K <- normObj_result$K
-    
-    ###################################################
-    ### code chunk number 9: choiceofK (eval = FALSE)
-    ###################################################
-    #choiceofK(AIC, BIC, RSS, K, filename = paste("choiceofK_", chr, ".pdf", sep = ""))
-    
-    ###################################################
-    ### code chunk number 10: fig1
-    ###################################################
-    #plot(K, RSS, type = "b", xlab = "Number of latent variables")
-    #plot(K, AIC, type = "b", xlab = "Number of latent variables")
-    #plot(K, BIC, type = "b", xlab = "Number of latent variables")
-    
+
     ###################################################
     ### code chunk number 11: segment1
     ###################################################
-    finalcallIt <- segment1(Y, Yhat, K[which.max(BIC)], K, sampname,
-                            ref, chr, lmax, mode = "integer")$finalcall
-    if (nrow(finalcall)==0){finalcall <- matrix(nrow=0, ncol=ncol(finalcallIt))} 
+    finalcallIt <- segment1(Y_subset, Yhat, K[which.max(BIC)], K, samples,
+                            ref, targets[1,'chr'], lmax, mode = "integer")$finalcall
+    finalcallIt <- finalcallIt[finalcallIt[,"sample_name"] == actual_sample,]
+    if (nrow(finalcall)==0){finalcall <- matrix(nrow=0, ncol=ncol(finalcallIt))}
     finalcall <- rbind(finalcall, finalcallIt)
-  
+    print(finalcall)
   }
   finalcall <- unify_calls_format(finalcall)$finalcall
-  finalcall
+  write.csv(finalcall, output_calls_file, row.names=F)
 }
diff --git a/R/CODEXCOV/man/coverageObj1.Rd b/R/CODEXCOV/man/coverageObj1.Rd
deleted file mode 100644
index f6c6b1c..0000000
--- a/R/CODEXCOV/man/coverageObj1.Rd
+++ /dev/null
@@ -1,14 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/functions_CODEXCOV.R
-\name{coverageObj1}
-\alias{coverageObj1}
-\title{Function Dexcription}
-\usage{
-coverageObj1(cov_file, sampname)
-}
-\description{
-Function description.
-}
-\examples{
-coverageObj1
-}
diff --git a/R/CODEXCOV/man/gcmapp1.Rd b/R/CODEXCOV/man/gcmapp1.Rd
deleted file mode 100644
index 2fa53f1..0000000
--- a/R/CODEXCOV/man/gcmapp1.Rd
+++ /dev/null
@@ -1,14 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/functions_CODEXCOV.R
-\name{gcmapp1}
-\alias{gcmapp1}
-\title{Function Dexcription}
-\usage{
-gcmapp1(chr, ref)
-}
-\description{
-Function description.
-}
-\examples{
-coverageObj1
-}
diff --git a/R/CODEXCOV/man/normObj1.Rd b/R/CODEXCOV/man/normObj1.Rd
deleted file mode 100644
index 66b0a96..0000000
--- a/R/CODEXCOV/man/normObj1.Rd
+++ /dev/null
@@ -1,14 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/functions_CODEXCOV.R
-\name{normObj1}
-\alias{normObj1}
-\title{Function Dexcription}
-\usage{
-normObj1(Y_qc, gc_qc, K)
-}
-\description{
-Function description.
-}
-\examples{
-coverageObj1
-}
diff --git a/R/CODEXCOV/man/normObj2.Rd b/R/CODEXCOV/man/normObj2.Rd
deleted file mode 100644
index 4a10d47..0000000
--- a/R/CODEXCOV/man/normObj2.Rd
+++ /dev/null
@@ -1,14 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/functions_CODEXCOV.R
-\name{normObj2}
-\alias{normObj2}
-\title{Function Dexcription}
-\usage{
-normObj2(Y_qc, gc_qc, K, normal_index)
-}
-\description{
-Function description.
-}
-\examples{
-coverageObj1
-}
diff --git a/R/CODEXCOV/man/qcObj1.Rd b/R/CODEXCOV/man/qcObj1.Rd
deleted file mode 100644
index 806a098..0000000
--- a/R/CODEXCOV/man/qcObj1.Rd
+++ /dev/null
@@ -1,15 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/functions_CODEXCOV.R
-\name{qcObj1}
-\alias{qcObj1}
-\title{Function Dexcription}
-\usage{
-qcObj1(Y, sampname, chr, ref, mapp, gc, cov_thresh, length_thresh, mapp_thresh,
-  gc_thresh)
-}
-\description{
-Function description.
-}
-\examples{
-coverageObj1
-}
diff --git a/R/CODEXCOV/man/run_CODEXCOV.Rd b/R/CODEXCOV/man/run_CODEXCOV.Rd
deleted file mode 100644
index f80759e..0000000
--- a/R/CODEXCOV/man/run_CODEXCOV.Rd
+++ /dev/null
@@ -1,14 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/run_CODEXCOV.R
-\name{run_CODEXCOV}
-\alias{run_CODEXCOV}
-\title{Function Dexcription}
-\usage{
-run_CODEXCOV(cov_file, sampname)
-}
-\description{
-Function description.
-}
-\examples{
-run_codexcov
-}
diff --git a/R/CODEXCOV/man/segment1.Rd b/R/CODEXCOV/man/segment1.Rd
deleted file mode 100644
index c7cf654..0000000
--- a/R/CODEXCOV/man/segment1.Rd
+++ /dev/null
@@ -1,14 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/functions_CODEXCOV.R
-\name{segment1}
-\alias{segment1}
-\title{Function Dexcription}
-\usage{
-segment1(Y_qc, Yhat, optK, K, sampname_qc, ref_qc, chr, lmax, mode)
-}
-\description{
-Function description.
-}
-\examples{
-coverageObj1
-}
diff --git a/R/EXOMECOPYCOV/DESCRIPTION b/R/EXOMECOPYCOV/DESCRIPTION
new file mode 100644
index 0000000..1c27c1d
--- /dev/null
+++ b/R/EXOMECOPYCOV/DESCRIPTION
@@ -0,0 +1,22 @@
+Package: EXOMECOPYCOV
+Title: EXOMECOPY Package With Interface To External Coverage File
+Version: 0.0.1
+Authors@R: c(
+    person("Tomasz", "Gambin", email = "tgambin@gmail.com", role = c("aut", "cre")),
+    person("Marek", "Wiewiórka", email = "marek.wiewiorka@gmail.com", role = c("aut")),
+    person("Wiktor", "Kuśmirek", email = "kusmirekwiktor@gmail.com", role = c("aut")))
+Description: An extended implementation of the exomeCopy package in R. It extends 
+    original implementation by using external coverage file, which should
+    speed up calculations for running application with multiple sets of input
+    parameters.
+Depends:
+    R (>= 3.2.3),
+    devtools (>= 1.13.2),
+    DBI (>= 0.8),
+    optparse (== 1.4.4),
+    IRanges (>= 2.0.0),
+    exomeCopy (== 1.22)
+License: GPL-3
+Encoding: UTF-8
+LazyData: true
+RoxygenNote: 6.0.1.9000
diff --git a/R/EXOMECOPYCOV/NAMESPACE b/R/EXOMECOPYCOV/NAMESPACE
new file mode 100644
index 0000000..884a631
--- /dev/null
+++ b/R/EXOMECOPYCOV/NAMESPACE
@@ -0,0 +1,2 @@
+# Generated by roxygen2: fake comment so roxygen2 overwrites silently.
+exportPattern("^[^\\.]")
diff --git a/R/EXOMECOPYCOV/R/functions_EXOMECOPYCOV.R b/R/EXOMECOPYCOV/R/functions_EXOMECOPYCOV.R
new file mode 100644
index 0000000..bcfdd0d
--- /dev/null
+++ b/R/EXOMECOPYCOV/R/functions_EXOMECOPYCOV.R
@@ -0,0 +1,34 @@
+
+# from CODEX package
+getgc <- function(chr, ref) {
+  library(GenomeInfoDb)
+  library(BSgenome.Hsapiens.UCSC.hg19)
+  if (chr == "X" | chr == "x" | chr == "chrX" | chr == "chrx") {
+    chrtemp <- 23
+  } else if (chr == "Y" | chr == "y" | chr == "chrY" | chr == "chry") {
+    chrtemp <- 24
+  } else {
+    chrtemp <- as.numeric(mapSeqlevels(as.character(chr), "NCBI")[1])
+  }
+  if (length(chrtemp) == 0) 
+    message("Chromosome cannot be found in NCBI Homo sapiens database!")
+  chrm <- unmasked(Hsapiens[[chrtemp]])
+  seqs <- Views(chrm, ref)
+  af <- alphabetFrequency(seqs, baseOnly = TRUE, as.prob = TRUE)
+  gc <- round((af[, "G"] + af[, "C"]) * 100,2)
+  gc
+}
+
+unify_calls_format <- function(compiled.segments, chr){
+  calls <- matrix(nrow=length(compiled.segments$sample.name), ncol=7)
+  colnames(calls) <- c('sample_name', 'chr', 'st_bp', 'ed_bp', 'cnv', 'copy_no', 'log_odds')
+  calls[,'sample_name'] <- compiled.segments$sample.name
+  calls[,'chr'] <- rep(chr, nrow(calls))
+  calls[,'st_bp'] <- unlist(start(ranges(compiled.segments)))
+  calls[,'ed_bp'] <- unlist(end(ranges(compiled.segments)))
+  calls[,'copy_no'] <- compiled.segments$copy.count
+  calls[,'cnv'] <- ifelse(calls[,'copy_no'] > 2, 'dup', 'del')
+  calls[,'log_odds'] <- compiled.segments$log.odds
+  calls <- subset(calls, calls[,'copy_no'] != "2")
+  return(list(calls=calls))
+}
diff --git a/R/EXOMECOPYCOV/R/run_EXOMECOPYCOV.R b/R/EXOMECOPYCOV/R/run_EXOMECOPYCOV.R
new file mode 100644
index 0000000..af45307
--- /dev/null
+++ b/R/EXOMECOPYCOV/R/run_EXOMECOPYCOV.R
@@ -0,0 +1,59 @@
+library(methods)
+library(exomeCopy)
+
+run_EXOMECOPYCOV <- function(input_cov_table,
+                             input_bed,
+                             reference_sample_set_file,
+                             output_calls_file){
+
+  con <- file(reference_sample_set_file, open='r')
+  reference_sample_set <- readLines(con)
+  Y <- read.csv(input_cov_table)
+  targets <- read.delim(input_bed)
+  rownames(Y) <- 1:nrow(Y)
+  rownames(targets) <- 1:nrow(targets)
+  chr <- targets[1,'chr']
+  ref <- IRanges(start = targets[,"st_bp"], end = targets[,"ed_bp"])
+  if (length(ref) == 0) {    # 0 elements for specified chromosome in bed
+    next()
+  }
+  target <- GRanges(seqname = chr, IRanges(start = start(ref) + 1, end = end(ref)))
+  gc <- getgc(chr, ref)
+
+  rdata_org <- RangedData(IRanges(start=start(ref), end=end(ref)), space=rep(chr,nrow(Y)), universe="hg19", gc=gc, gc.sq=gc^2) 
+  finalcall <- matrix(nrow=0, ncol=13)
+
+  for (i in 1:length(reference_sample_set)) {
+    if (reference_sample_set[[i]] == '') {
+      next()
+    }
+    samples <- unlist(strsplit(reference_sample_set[[i]], ','))
+    actual_sample <- samples[1]
+    reference_samples <- samples[-1]
+    samples <- sort(samples)
+    rdata <- rdata_org
+
+    for(sample.name in samples) {
+      rdata[[sample.name]] <- Y[,sample.name]
+    }
+
+    rdata[["bg"]] <- generateBackground(samples, rdata, median)
+    rdata[["log.bg"]] <- log(rdata$bg + .1) 
+    rdata[["width"]] <- width(ref)
+
+    samples <- c(actual_sample)
+    fit.list <- lapply(samples, function(sample.name) {
+      lapply(seqlevels(target), function(seq.name) {
+        print(paste("Processing sample: ", sample.name, sep=""))
+        exomeCopy(rdata, sample.name, X.names = c("log.bg", "gc", "gc.sq", "width"), S = 0:4, d = 2)
+      })
+    })
+    compiled.segments <- compileCopyCountSegments(fit.list)
+    finalcallIt <- unify_calls_format(compiled.segments, chr)$calls
+    if (nrow(finalcall)==0){finalcall <- matrix(nrow=0, ncol=ncol(finalcallIt))}
+    finalcall <- rbind(finalcall, finalcallIt)
+    print(finalcallIt)
+
+  }
+  write.csv(finalcall, output_calls_file, row.names=F)
+}
diff --git a/R/EXOMEDEPTHCOV/DESCRIPTION b/R/EXOMEDEPTHCOV/DESCRIPTION
index d4a3025..4627623 100644
--- a/R/EXOMEDEPTHCOV/DESCRIPTION
+++ b/R/EXOMEDEPTHCOV/DESCRIPTION
@@ -12,11 +12,10 @@ Description: An extended implementation of the ExomeDepth package in R. It exten
 Depends:
     R (>= 3.2.3),
     devtools (>= 1.13.2),
-    DBI (== 0.8),
+    DBI (>= 0.8),
     optparse (== 1.4.4),
     IRanges (>= 2.0.0),
     ExomeDepth (>= 1.1.10),
-    REFERENCE.SAMPLE.SET.SELECTOR (>= 0.0.1)
 License: GPL-3
 Encoding: UTF-8
 LazyData: true
diff --git a/R/EXOMEDEPTHCOV/R/functions_EXOMEDEPTHCOV.R b/R/EXOMEDEPTHCOV/R/functions_EXOMEDEPTHCOV.R
index 2b38d6b..e69de29 100644
--- a/R/EXOMEDEPTHCOV/R/functions_EXOMEDEPTHCOV.R
+++ b/R/EXOMEDEPTHCOV/R/functions_EXOMEDEPTHCOV.R
@@ -1,15 +0,0 @@
-library(ExomeDepth)
-
-coverageObj1 <- function(cov_table, sampname, targets_for_chr, chr){
-  Y <- matrix(data=as.integer(0), nrow = nrow(targets_for_chr), ncol = 0)
-  for(sample in sampname) {
-    cov_targets_for_sample <- cov_table[cov_table[,"sample_name"] == sample,]
-    cov_targets_for_sample <- cov_targets_for_sample[with(cov_targets_for_sample, order(target_id)), ]
-    Y <- cbind(Y, cov_targets_for_sample[,"read_count"])
-  }
-  colnames(Y) <- sampname
-  rownames(Y) <- targets_for_chr[,"target_id"]
-  return(list(Y=Y))
-}
-
-
diff --git a/R/EXOMEDEPTHCOV/R/run_EXOMEDEPTHCOV.R b/R/EXOMEDEPTHCOV/R/run_EXOMEDEPTHCOV.R
index 295306e..5694e4b 100644
--- a/R/EXOMEDEPTHCOV/R/run_EXOMEDEPTHCOV.R
+++ b/R/EXOMEDEPTHCOV/R/run_EXOMEDEPTHCOV.R
@@ -1,66 +1,54 @@
 library(ExomeDepth)
 library(methods)
 
-run_EXOMEDEPTHCOV <- function(reference_set_select_method,
-                              num_of_samples_in_reference_set,
-                              cov_table){
+run_EXOMEDEPTHCOV <- function(input_cov_table,
+                              input_bed,
+                              reference_sample_set_file,
+                              output_calls_file){
 
-  sampname <- unique(cov_table[,"sample_name"])
-  targets <- cov_table[,c("target_id", "chr", "pos_min", "pos_max")]
-  targets <- targets[!duplicated(targets[,"target_id"]),]
-  targets <- targets[with(targets, order(target_id)), ]
-  
+  con <- file(reference_sample_set_file, open='r')
+  reference_sample_set <- readLines(con)
+  Y <- read.csv(input_cov_table)
+  sampname <- colnames(Y)
+  targets <- read.delim(input_bed)
+  rownames(Y) <- 1:nrow(Y)
+  rownames(targets) <- 1:nrow(targets)
   calls <- data.frame(matrix(nrow=0, ncol=13))
-  chrs <- c(1:22, "X", "Y", paste0("chr",c(1:22, "X", "Y")))
-  library(IRanges)
-  for(chr in chrs) {
-    targets_for_chr <- targets[targets[,"chr"] == chr,]
-    ref <- IRanges(start = targets_for_chr[,"pos_min"], end = targets_for_chr[,"pos_max"])
-    if (length(ref) == 0) {    # 0 elements for specified chromosome in bed
+
+  for (i in 1:length(reference_sample_set)) {
+    if (reference_sample_set[[i]] == '') {
       next()
     }
-    Y <- coverageObj1(cov_table, sampname, targets_for_chr, chr)$Y
-
-    for (actual_sample_id in 1:length(sampname)) {
-      actual_sample <- sampname[actual_sample_id]
-      ## ----reference.selection-------------------------------------------------
-      target_length <- c()
-      for (i in 1:nrow(Y)) {
-        target_length <- c(target_length, width(ref[i]))
-      }
-      reference_samples <- run_REFERENCE.SAMPLE.SET.SELECTOR(actual_sample,
-                                                             Y,
-                                                             reference_set_select_method,
-                                                             num_of_samples_in_reference_set,
-                                                             target_length)
+    samples <- unlist(strsplit(reference_sample_set[[i]], ','))
+    actual_sample <- samples[1]
+    reference_samples <- samples[-1]
 
-      ## ----construct.ref-------------------------------------------------------
-      my.matrix <- as.matrix(Y[,reference_samples])
-      my.reference.selected <- apply(X = my.matrix, 
-                                     MAR = 1, 
-                                     FUN = sum)
+    ## ----construct.ref-------------------------------------------------------
+    my.matrix <- as.matrix(Y[,reference_samples])
+    my.reference.selected <- apply(X = my.matrix, 
+                                   MAR = 1, 
+                                   FUN = sum)
 
-      ## ----build.complete------------------------------------------------------
-      all.exons <- new('ExomeDepth',
-                       test = Y[,actual_sample_id],
-                       reference = my.reference.selected,
-                       formula = 'cbind(test, reference) ~ 1')
+    ## ----build.complete------------------------------------------------------
+    all.exons <- new('ExomeDepth',
+                     test = Y[,actual_sample],
+                     reference = my.reference.selected,
+                     formula = 'cbind(test, reference) ~ 1')
 
-      ## ----call.CNVs-----------------------------------------------------------
-      all.exons <- ExomeDepth::CallCNVs(x = all.exons, 
-                                        transition.probability = 10^-4, 
-                                        chromosome = rep(chr, nrow(Y)), 
-                                        start = start(ref), 
-                                        end = end(ref), 
-                                        name = rep('name', nrow(Y)))
-      print(all.exons@CNV.calls)
-      if (nrow(all.exons@CNV.calls) > 0) {
-        actual_sample_column <- data.frame(matrix(rep(actual_sample, nrow(all.exons@CNV.calls)), nrow=nrow(all.exons@CNV.calls))) 
-        callsIt <- cbind(actual_sample_column, all.exons@CNV.calls)
-        colnames(callsIt) <- c(c, colnames(all.exons@CNV.calls))
-        if (nrow(calls)==0){calls <- data.frame(matrix(nrow=0, ncol=ncol(callsIt)))} 
-        calls <- rbind(calls, callsIt)
-      }
+    ## ----call.CNVs-----------------------------------------------------------
+    all.exons <- ExomeDepth::CallCNVs(x = all.exons, 
+                                      transition.probability = 10^-4, 
+                                      chromosome = targets[,"chr"], 
+                                      start = targets[,"st_bp"], 
+                                      end = targets[,"ed_bp"], 
+                                      name = rep('name', nrow(Y)))
+    print(all.exons@CNV.calls)
+    if (nrow(all.exons@CNV.calls) > 0) {
+      actual_sample_column <- data.frame(matrix(rep(actual_sample, nrow(all.exons@CNV.calls)), nrow=nrow(all.exons@CNV.calls))) 
+      callsIt <- cbind(actual_sample_column, all.exons@CNV.calls)
+      colnames(callsIt) <- c(c, colnames(all.exons@CNV.calls))
+      if (nrow(calls)==0){calls <- data.frame(matrix(nrow=0, ncol=ncol(callsIt)))} 
+      calls <- rbind(calls, callsIt)
     }
   }
   # unify names of output columns
@@ -86,5 +74,5 @@ run_EXOMEDEPTHCOV <- function(reference_set_select_method,
   colnames(calls)[colnames(calls) == 'reads.observed'] <- 'raw_cov'
   colnames(calls)[colnames(calls) == 'reads.ratio'] <- 'copy_no'
   calls[colnames(calls) == 'copy_no'] <- round(calls[colnames(calls) == 'raw_cov'] / (calls[colnames(calls) == 'norm_cov'] / 2))
-  calls
+  write.csv(calls, output_calls_file, row.names=F)
 }
diff --git a/R/REFERENCE.SAMPLE.SET.SELECTOR/R/functions_REFERENCE.SAMPLE.SET.SELECTOR.R b/R/REFERENCE.SAMPLE.SET.SELECTOR/R/functions_REFERENCE.SAMPLE.SET.SELECTOR.R
index 6ac3736..4863a15 100644
--- a/R/REFERENCE.SAMPLE.SET.SELECTOR/R/functions_REFERENCE.SAMPLE.SET.SELECTOR.R
+++ b/R/REFERENCE.SAMPLE.SET.SELECTOR/R/functions_REFERENCE.SAMPLE.SET.SELECTOR.R
@@ -1,4 +1,3 @@
-library(ExomeDepth)
 
 canoes_method <- function(investigated_sample, Y, num_refs){
   if (num_refs == 0) {
@@ -13,7 +12,19 @@ canoes_method <- function(investigated_sample, Y, num_refs){
   return(list(reference_samples=reference_samples))
 }
 
+canoes_cov_thresh_method <- function(investigated_sample, Y, cov_thresh){
+  samples <- colnames(Y)
+  cov <- cor(Y[, samples], Y[, samples])
+  reference_samples <- setdiff(samples, investigated_sample)
+  covariances <- cov[investigated_sample, reference_samples]
+  num_refs <- sum(covariances > cov_thresh)
+  reference_samples <- names(sort(covariances, 
+          decreasing=T)[1:num_refs])
+  return(list(reference_samples=reference_samples))
+}
+
 exomedepth_method <- function(investigated_sample, Y, num_refs, target_length){
+  library(ExomeDepth)
   samples <- colnames(Y)
   reference_samples <- setdiff(samples, investigated_sample)
   reference_set <- select.reference.set(test.counts = Y[,investigated_sample],
@@ -30,3 +41,45 @@ exomedepth_method <- function(investigated_sample, Y, num_refs, target_length){
   } 
   return(list(reference_samples=reference_samples))
 }
+
+random_method <- function(investigated_sample, Y, num_refs){
+  samples <- colnames(Y)
+  reference_samples <- setdiff(samples, investigated_sample)
+  reference_samples <- reference_samples[sample(1:length(reference_samples), num_refs, replace=F)]
+  return(list(reference_samples=reference_samples))
+}
+
+kmeans_select_groups <- function(Y, number_of_clusters){
+  samples <- colnames(Y)
+  cov <- cor(Y[, samples], Y[, samples])
+  d <- cov
+  for(i in 1:nrow(d)) {
+    d[i,] <- cov[samples[i], samples]
+  }
+  d <- 1-d
+  c <- c()
+  for(i in 1:ncol(d)-1) {
+    c <- c(c, d[(i+1):nrow(d),i])
+  }
+  d <- dist(d)
+  for(i in 1:length(d)) {
+    d[i] <- c[i]
+  }
+  km1 <- kmeans(d, number_of_clusters, nstart=100)
+  return(list(clusters=km1))
+}
+
+kmeans_method <- function(investigated_sample, Y, kmeans_clusters){
+  samples <- colnames(Y)
+  cluster_id <- kmeans_clusters$cluster[investigated_sample]
+  reference_samples <- c()
+  list_index <- 1
+  for(i in kmeans_clusters$cluster) {
+    if(i == cluster_id) {
+      reference_samples <- c(reference_samples, samples[list_index])
+    }
+    list_index <- list_index + 1
+  }
+  reference_samples <- setdiff(reference_samples, investigated_sample)
+  return(list(reference_samples=reference_samples))
+}
diff --git a/R/REFERENCE.SAMPLE.SET.SELECTOR/R/run_REFERENCE.SAMPLE.SET.SELECTOR.R b/R/REFERENCE.SAMPLE.SET.SELECTOR/R/run_REFERENCE.SAMPLE.SET.SELECTOR.R
index f025b95..202c284 100644
--- a/R/REFERENCE.SAMPLE.SET.SELECTOR/R/run_REFERENCE.SAMPLE.SET.SELECTOR.R
+++ b/R/REFERENCE.SAMPLE.SET.SELECTOR/R/run_REFERENCE.SAMPLE.SET.SELECTOR.R
@@ -1,16 +1,48 @@
-run_REFERENCE.SAMPLE.SET.SELECTOR <- function(investigated_sample,
-                                              Y,
-                                              select_method,
+run_REFERENCE.SAMPLE.SET.SELECTOR <- function(select_method,
                                               num_refs,
-                                              target_length){
-  if(select_method == "canoes") {
-    reference_samples <- canoes_method(investigated_sample, Y, num_refs)$reference_samples
-  } else if(select_method == "codex") {
-    #reference_samples <- codex_method(investigated_sample, Y, num_refs)$reference_samples
-  } else if(select_method == "exomedepth") {
-    reference_samples <- exomedepth_method(investigated_sample, Y, num_refs, target_length)$reference_samples
-  } else if(select_method == "clamms") {
-    #reference_samples <- clamms_method(investigated_sample, Y, num_refs)$reference_samples
+                                              cov_thresh,
+                                              input_cov_table,
+                                              input_bed,
+                                              output_reference_file){
+
+  Y <- data.matrix(read.csv(input_cov_table))
+  sampname <- colnames(Y)
+  targets <- read.delim(input_bed)
+  target_length <- targets[,"st_bp"] - targets[,"ed_bp"]
+  reference_samples <- list()
+  if(select_method == "kmeans") {
+    kmeans_clusters <- kmeans_select_groups(Y, num_refs)$clusters
   }
-  reference_samples
+
+  for(i in 1:length(sampname)) {
+    investigated_sample <- as.character(sampname[i])
+    print(paste("Processing ", investigated_sample, " sample ...", sep=""))
+    if(select_method == "canoes") {
+      reference_samples_for_investigated_sample <- canoes_method(investigated_sample, Y, num_refs)$reference_samples
+      reference_samples[[i]] <- c(investigated_sample, reference_samples_for_investigated_sample)
+    } else if(select_method == "codex") {
+      #reference_samples_for_investigated_sample <- codex_method(investigated_sample, Y, num_refs)$reference_samples
+      #reference_samples[[i]] <- c(investigated_sample, reference_samples_for_investigated_sample)
+    } else if(select_method == "exomedepth") {
+      reference_samples_for_investigated_sample <- exomedepth_method(investigated_sample, Y, num_refs, target_length)$reference_samples
+      reference_samples[[i]] <- c(investigated_sample, reference_samples_for_investigated_sample)
+    } else if(select_method == "clamms") {
+      #reference_samples_for_investigated_sample <- clamms_method(investigated_sample, Y, num_refs)$reference_samples
+      #reference_samples[[i]] <- c(investigated_sample, reference_samples_for_investigated_sample)
+    } else if(select_method == "random") {
+      reference_samples_for_investigated_sample <- random_method(investigated_sample, Y, num_refs)$reference_samples
+      reference_samples[[i]] <- c(investigated_sample, reference_samples_for_investigated_sample)
+    } else if(select_method == "canoes_cov_thresh") {
+      reference_samples_for_investigated_sample <- canoes_cov_thresh_method(investigated_sample, Y, cov_thresh)$reference_samples
+      reference_samples[[i]] <- c(investigated_sample, reference_samples_for_investigated_sample)
+    } else if(select_method == "kmeans") {
+      reference_samples_for_investigated_sample <- kmeans_method(investigated_sample, Y, kmeans_clusters)$reference_samples
+      reference_samples[[i]] <- c(investigated_sample, reference_samples_for_investigated_sample)
+    }
+  }
+  resultant_string <- ''
+  for(i in 1:length(reference_samples)) {
+    resultant_string <- paste(resultant_string, paste(reference_samples[[i]], collapse=","), '\n', sep="")
+  }
+  write(resultant_string, output_reference_file)
 }
diff --git a/R/TARGET.QC/R/functions_TARGET.QC.R b/R/TARGET.QC/R/functions_TARGET.QC.R
index 63f2278..850803e 100644
--- a/R/TARGET.QC/R/functions_TARGET.QC.R
+++ b/R/TARGET.QC/R/functions_TARGET.QC.R
@@ -1,17 +1,5 @@
 library(CODEX)
 
-coverageObj1 <- function(cov_table, sampname, targets_for_chr, chr){
-  Y <- matrix(data=as.integer(0), nrow = nrow(targets_for_chr), ncol = 0)
-  for(sample in sampname) {
-    cov_targets_for_sample <- cov_table[cov_table[,"sample_name"] == sample,]
-    cov_targets_for_sample <- cov_targets_for_sample[with(cov_targets_for_sample, order(target_id)), ]
-    Y <- cbind(Y, cov_targets_for_sample[,"read_count"])
-  }
-  colnames(Y) <- sampname
-  rownames(Y) <- targets_for_chr[,"target_id"]
-  return(list(Y=Y))
-}
-
 gcmapp1 <- function(chr, ref){
   gc <- getgc(chr, ref)
   mapp <- getmapp(chr, ref)
diff --git a/R/TARGET.QC/R/run_TARGET.QC.R b/R/TARGET.QC/R/run_TARGET.QC.R
index 23eed55..f5560c6 100644
--- a/R/TARGET.QC/R/run_TARGET.QC.R
+++ b/R/TARGET.QC/R/run_TARGET.QC.R
@@ -5,7 +5,10 @@ run_TARGET.QC <- function(mapp_thresh,
                           length_thresh_to,
                           gc_thresh_from,
                           gc_thresh_to,
-                          cov_table){
+                          input_cov_table,
+                          output_cov_table,
+                          input_bed,
+                          output_bed){
   #mapp_thresh <- 0.9
   #cov_thresh_from <- 20
   #cov_thresh_to <- 4000
@@ -13,51 +16,23 @@ run_TARGET.QC <- function(mapp_thresh,
   #length_thresh_to <- 2000
   #gc_thresh_from <- 20
   #gc_thresh_to <- 80
-  #K_from <- 1
-  #K_to <- 9
-  #lmax <- 200
-  sampname <- unique(cov_table[,"sample_name"])
-  targets <- cov_table[,c("target_id", "chr", "pos_min", "pos_max")]
-  targets <- targets[!duplicated(targets[,"target_id"]),]
-  targets <- targets[with(targets, order(target_id)), ]
-  cov_table_qc <- matrix(nrow=0, ncol=6)
-  colnames(cov_table_qc) <- colnames(cov_table)
-
-  chrs <- c(1:22, "X", "Y", paste0("chr",c(1:22, "X", "Y")))
-  for(chr in chrs) {
-    targets_for_chr <- targets[targets[,"chr"] == chr,]
-    ref <- IRanges(start = targets_for_chr[,"pos_min"], end = targets_for_chr[,"pos_max"])
-    if (length(ref) == 0) {    # 0 elements for specified chromosome in bed
-      next()
-    }
-    Y <- coverageObj1(cov_table, sampname, targets_for_chr, chr)$Y
-    gcmapp1_result <- gcmapp1(chr, ref)
-    gc <- gcmapp1_result$gc
-    mapp <- gcmapp1_result$mapp
-
-    qcObj1_result <- qcObj1(Y, sampname, chr, ref, mapp, gc, cov_thresh = c(cov_thresh_from, cov_thresh_to), 
-                        length_thresh = c(length_thresh_from, length_thresh_to), mapp_thresh, 
-                        gc_thresh = c(gc_thresh_from, gc_thresh_to))
-    Y_qc <- qcObj1_result$Y_qc
-    sampname_qc <- qcObj1_result$sampname_qc
-    ref_qc <- qcObj1_result$ref_qc
-    colnames(Y_qc) <- sampname_qc
-    for(sample in colnames(Y_qc)) {
-      new_cov_table_qc_rows <- cbind(sample, rownames(Y_qc), chr, start(ref_qc), end(ref_qc), Y_qc[,sample])
-      cov_table_qc <- rbind(cov_table_qc, new_cov_table_qc_rows)
-    }
-  }
-  cov_table_qc <- as.data.frame(cov_table_qc)
-  cov_table_qc[,"pos_min"] <- strtoi(cov_table_qc[,"pos_min"])
-  cov_table_qc[,"pos_max"] <- strtoi(cov_table_qc[,"pos_max"])
-  cov_table_qc[,"target_id"] <- strtoi(cov_table_qc[,"target_id"])
-  cov_table_qc[,"read_count"] <- strtoi(cov_table_qc[,"read_count"])
-  cov_table_qc
+  Y <- read.csv(input_cov_table)
+  sampname <- colnames(Y)
+  targets <- read.delim(input_bed)
+  rownames(Y) <- 1:nrow(Y)
+  rownames(targets) <- 1:nrow(targets)
+  ref <- IRanges(start = targets[,"st_bp"], end = targets[,"ed_bp"])
+  gcmapp1_result <- gcmapp1(targets[1,"chr"], ref)
+  gc <- gcmapp1_result$gc
+  mapp <- gcmapp1_result$mapp
+  qcObj1_result <- qcObj1(Y, sampname, targets[1,"chr"], ref, mapp, gc, cov_thresh = c(cov_thresh_from, cov_thresh_to), 
+                          length_thresh = c(length_thresh_from, length_thresh_to), mapp_thresh, 
+                          gc_thresh = c(gc_thresh_from, gc_thresh_to))
+  Y_qc <- qcObj1_result$Y_qc
+  sampname_qc <- qcObj1_result$sampname_qc
+  ref_qc <- qcObj1_result$ref_qc
+  colnames(Y_qc) <- sampname_qc
+  write.csv(Y_qc, output_cov_table, row.names=F, quote=F)
+  write.table(targets[rownames(Y_qc),], output_bed, row.names=F, quote=F, sep="\t")
 }
 
-#  sample_name target_id chr  pos_min  pos_max read_count
-#1     NA19012    193524   Y 25426932 25427053          0
-#2     NA19012    193525   Y 25431556 25431676          0
-#3     NA19012    193526   Y 25535089 25535239          0
-#4     NA19012    193527   Y 25537286 25537526          0
-#5     NA19012    193528   Y 25538793 25538913          0
diff --git a/airflow/dags/canoes.py b/airflow/dags/canoes.py
new file mode 100755
index 0000000..fe08612
--- /dev/null
+++ b/airflow/dags/canoes.py
@@ -0,0 +1,59 @@
+from airflow import DAG
+from airflow.operators.bash_operator import BashOperator
+from airflow.models import Variable
+from datetime import datetime, timedelta
+
+default_args = {
+    'owner': 'biodatageeks',
+    'depends_on_past': False,
+    'start_date': datetime(2017, 10, 18),
+    'email': ['team@biodatageeks.ii.pw.edu.pl'],
+    'email_on_failure': False,
+    'email_on_retry': False,
+    'retries': 0
+}
+
+dag = DAG(
+    'canoes', default_args=default_args, schedule_interval=None)
+
+##############################################
+########## RUN RAW CANOES CNV CALLER ##########
+##############################################
+
+### target qc parameters
+mapp_thresh = '0.9'
+cov_thresh_from = '20'
+cov_thresh_to = '4000'
+length_thresh_from = '20'
+length_thresh_to = '2000'
+gc_thresh_from = '20'
+gc_thresh_to = '80'
+raw_cov_table = 'raw_cov_table.csv'
+qc_cov_table = 'qc_cov_table.csv'
+raw_bed = 'raw_bed.bed'
+qc_bed = 'qc_bed.bed'
+
+### select reference sample set parameters
+select_method = 'exomedepth' # "canoes", "codex" or "exomedepth"
+num_refs = '30'
+reference_sample_set_file = 'reference_sample_set.csv'
+
+### canoes parameters
+output_calls_file = 'calls.csv'
+
+run_canoes_caller_cmd= " \
+docker pull biodatageeks/cnv-opt-target-qc; \
+docker run --rm -v /tmp:/tmp -w=\"/tmp\" biodatageeks/cnv-opt-target-qc Rscript -e \"library(\'TARGET.QC\');run_TARGET.QC(" + mapp_thresh + "," + cov_thresh_from + "," + cov_thresh_to + "," + length_thresh_from + "," + length_thresh_to + "," + gc_thresh_from + "," + gc_thresh_to + ",'" + raw_cov_table + "','" + qc_cov_table + "','" + raw_bed + "','" + qc_bed + "')\"; \
+docker pull biodatageeks/cnv-opt-reference-sample-set-selector; \
+docker run --rm -v /tmp:/tmp -w=\"/tmp\" biodatageeks/cnv-opt-reference-sample-set-selector Rscript -e \"library(\'REFERENCE.SAMPLE.SET.SELECTOR\');run_REFERENCE.SAMPLE.SET.SELECTOR('" + select_method + "'," + num_refs + ",'" + qc_cov_table + "','" + qc_bed + "','" + reference_sample_set_file + "')\"; \
+docker pull biodatageeks/cnv-opt-canoescov; \
+docker run --rm -v /tmp:/tmp -w=\"/tmp\" biodatageeks/cnv-opt-canoescov Rscript -e \"library(\'CANOESCOV\');run_CANOESCOV('" + qc_cov_table + "','" + qc_bed + "','" + reference_sample_set_file + "','" + output_calls_file + "')\"; \
+"
+
+run_canoes_caller_task= BashOperator (
+    bash_command=run_canoes_caller_cmd,
+    task_id='run_canoes_caller_task',
+    dag=dag
+)
+
+run_canoes_caller_task
diff --git a/airflow/dags/codex.py b/airflow/dags/codex.py
new file mode 100755
index 0000000..210b3bb
--- /dev/null
+++ b/airflow/dags/codex.py
@@ -0,0 +1,62 @@
+from airflow import DAG
+from airflow.operators.bash_operator import BashOperator
+from airflow.models import Variable
+from datetime import datetime, timedelta
+
+default_args = {
+    'owner': 'biodatageeks',
+    'depends_on_past': False,
+    'start_date': datetime(2017, 10, 18),
+    'email': ['team@biodatageeks.ii.pw.edu.pl'],
+    'email_on_failure': False,
+    'email_on_retry': False,
+    'retries': 0
+}
+
+dag = DAG(
+    'codex', default_args=default_args, schedule_interval=None)
+
+##############################################
+########## RUN RAW CODEX CNV CALLER ##########
+##############################################
+
+### target qc parameters
+mapp_thresh = '0.9'
+cov_thresh_from = '20'
+cov_thresh_to = '4000'
+length_thresh_from = '20'
+length_thresh_to = '2000'
+gc_thresh_from = '20'
+gc_thresh_to = '80'
+raw_cov_table = 'raw_cov_table.csv'
+qc_cov_table = 'qc_cov_table.csv'
+raw_bed = 'raw_bed.bed'
+qc_bed = 'qc_bed.bed'
+
+### select reference sample set parameters
+select_method = 'exomedepth' # "canoes", "codex" or "exomedepth"
+num_refs = '30'
+reference_sample_set_file = 'reference_sample_set.csv'
+
+### codex parameters
+k_from = '1'
+k_to = '3'
+lmax = '200'
+output_calls_file = 'calls.csv'
+
+run_codex_caller_cmd= " \
+docker pull biodatageeks/cnv-opt-target-qc; \
+docker run --rm -v /tmp:/tmp -w=\"/tmp\" biodatageeks/cnv-opt-target-qc Rscript -e \"library(\'TARGET.QC\');run_TARGET.QC(" + mapp_thresh + "," + cov_thresh_from + "," + cov_thresh_to + "," + length_thresh_from + "," + length_thresh_to + "," + gc_thresh_from + "," + gc_thresh_to + ",'" + raw_cov_table + "','" + qc_cov_table + "','" + raw_bed + "','" + qc_bed + "')\"; \
+docker pull biodatageeks/cnv-opt-reference-sample-set-selector; \
+docker run --rm -v /tmp:/tmp -w=\"/tmp\" biodatageeks/cnv-opt-reference-sample-set-selector Rscript -e \"library(\'REFERENCE.SAMPLE.SET.SELECTOR\');run_REFERENCE.SAMPLE.SET.SELECTOR('" + select_method + "'," + num_refs + ",'" + qc_cov_table + "','" + qc_bed + "','" + reference_sample_set_file + "')\"; \
+docker pull biodatageeks/cnv-opt-codexcov; \
+docker run --rm -v /tmp:/tmp -w=\"/tmp\" biodatageeks/cnv-opt-codexcov Rscript -e \"library(\'CODEXCOV\');run_CODEXCOV(" + k_from + "," + k_to + "," + lmax + ",'" + qc_cov_table + "','" + qc_bed  + "','" + reference_sample_set_file + "','" + output_calls_file + "')\"; \
+"
+
+run_codex_caller_task= BashOperator (
+    bash_command=run_codex_caller_cmd,
+    task_id='run_codex_caller_task',
+    dag=dag
+)
+
+run_codex_caller_task
diff --git a/airflow/dags/exomedepth.py b/airflow/dags/exomedepth.py
new file mode 100755
index 0000000..7952c2b
--- /dev/null
+++ b/airflow/dags/exomedepth.py
@@ -0,0 +1,59 @@
+from airflow import DAG
+from airflow.operators.bash_operator import BashOperator
+from airflow.models import Variable
+from datetime import datetime, timedelta
+
+default_args = {
+    'owner': 'biodatageeks',
+    'depends_on_past': False,
+    'start_date': datetime(2017, 10, 18),
+    'email': ['team@biodatageeks.ii.pw.edu.pl'],
+    'email_on_failure': False,
+    'email_on_retry': False,
+    'retries': 0
+}
+
+dag = DAG(
+    'exomedepth', default_args=default_args, schedule_interval=None)
+
+###################################################
+########## RUN RAW EXOMEDEPTH CNV CALLER ##########
+###################################################
+
+### target qc parameters
+mapp_thresh = '0.9'
+cov_thresh_from = '20'
+cov_thresh_to = '4000'
+length_thresh_from = '20'
+length_thresh_to = '2000'
+gc_thresh_from = '20'
+gc_thresh_to = '80'
+raw_cov_table = 'raw_cov_table.csv'
+qc_cov_table = 'qc_cov_table.csv'
+raw_bed = 'raw_bed.bed'
+qc_bed = 'qc_bed.bed'
+
+### select reference sample set parameters
+select_method = 'exomedepth' # "canoes", "codex" or "exomedepth"
+num_refs = '30'
+reference_sample_set_file = 'reference_sample_set.csv'
+
+### exomedepth parameters
+output_calls_file = 'calls.csv'
+
+run_exomedepth_caller_cmd= " \
+docker pull biodatageeks/cnv-opt-target-qc; \
+docker run --rm -v /tmp:/tmp -w=\"/tmp\" biodatageeks/cnv-opt-target-qc Rscript -e \"library(\'TARGET.QC\');run_TARGET.QC(" + mapp_thresh + "," + cov_thresh_from + "," + cov_thresh_to + "," + length_thresh_from + "," + length_thresh_to + "," + gc_thresh_from + "," + gc_thresh_to + ",'" + raw_cov_table + "','" + qc_cov_table + "','" + raw_bed + "','" + qc_bed + "')\"; \
+docker pull biodatageeks/cnv-opt-reference-sample-set-selector; \
+docker run --rm -v /tmp:/tmp -w=\"/tmp\" biodatageeks/cnv-opt-reference-sample-set-selector Rscript -e \"library(\'REFERENCE.SAMPLE.SET.SELECTOR\');run_REFERENCE.SAMPLE.SET.SELECTOR('" + select_method + "'," + num_refs + ",'" + qc_cov_table + "','" + qc_bed + "','" + reference_sample_set_file + "')\"; \
+docker pull biodatageeks/cnv-opt-exomedepthcov; \
+docker run --rm -v /tmp:/tmp -w=\"/tmp\" biodatageeks/cnv-opt-exomedepthcov Rscript -e \"library(\'EXOMEDEPTHCOV\');run_EXOMEDEPTHCOV('" + qc_cov_table + "','" + qc_bed + "','" + reference_sample_set_file + "','" + output_calls_file + "')\"; \
+"
+
+run_exomedepth_caller_task= BashOperator (
+    bash_command=run_exomedepth_caller_cmd,
+    task_id='run_exomedepth_caller_task',
+    dag=dag
+)
+
+run_exomedepth_caller_task
diff --git a/build.sh b/build.sh
new file mode 100755
index 0000000..313e5cb
--- /dev/null
+++ b/build.sh
@@ -0,0 +1,57 @@
+#!/bin/bash -x
+
+BUILD_MODE=$1
+#only build images modified in the last 10h (10*3600s)
+MAX_COMMIT_TS_DIFF=36000
+
+bump_version () {
+  incl=0.01
+  version="0.00"
+  if [ "$(curl -L -s "https://registry.hub.docker.com/v2/repositories/${image}/tags" | jq -r ".detail")" == "Object not found" ]; then
+    version="0.01"
+  else
+    version=`curl -L -s "https://registry.hub.docker.com/v2/repositories/${image}/tags"  | jq -r '.results[0].name '`
+    version=`echo $version + $incl | bc| awk '{printf "%.2f\n", $0}'`
+  fi
+  echo $version
+}
+
+
+find Docker  -name "Dockerfile"  | sed 's/\/Dockerfile//' | while read dir;
+do
+
+  image=`echo $dir| sed 's/^Docker/biodatageeks/'`
+  version=`if [ ! -e $dir/version ]; then bump_version $image; else tail -1 $dir/version; fi`
+  if [ -e $dir/version ]; then
+    ver=`tail -1 $dir/version`;
+    if [[ $OSTYPE != "darwin17" ]]; then
+     sed -i "s/{{COMPONENT_VERSION}}/${ver}/g" $dir/Dockerfile ;
+    else
+     sed -i '' "s/{{COMPONENT_VERSION}}/${ver}/g" $dir/Dockerfile ;
+    fi
+  fi
+  echo "Building image ${image}..."
+  diffTs=`echo "$(date +%s) - $(git log -n 1 --pretty=format:%at ${dir})" | bc`
+  if [ $diffTs -lt $MAX_COMMIT_TS_DIFF ]; then
+    cd $dir
+    if [[ ${image} == "biodatageeks/cnv-opt-cnv-simulator" ]]; then
+      echo "Rebuild of ${image} image forced..."
+      docker build --no-cache --build-arg CACHE_DATE=$(date +%Y-%m-%d:%H:%M:%S) -t $image:$version .
+      docker build --no-cache --build-arg CACHE_DATE=$(date +%Y-%m-%d:%H:%M:%S) -t $image:latest .
+    else
+      docker build -t $image:$version .
+      docker build -t $image:latest .
+    fi
+    if [[ ${BUILD_MODE} != "local" ]]; then
+      docker push docker.io/$image:latest
+      docker push docker.io/$image:$version
+    fi
+    ##revert COMPONENT_VERSION variable
+    if [ -e version ]; then ver=`tail -1 version`; sed -i '' "s/${ver}/{{COMPONENT_VERSION}}/g" Dockerfile ; fi
+    #keep only last 3 versions of an image locally (2+3 in tail part)
+    docker images $image | tail -n +5 | sed 's/ \{1,\}/:/g' | cut -f1,2 -d':' | xargs -i docker rmi {}
+
+    cd ../..
+  fi
+
+done