ntmv · ntmv · Jun 19, 2023 · Jun 26, 2023 · Jun 26, 2023 · Jun 26, 2023
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,6 @@
 .DS_Store
 ../.DS_Store
+.Rproj.user
+.RData
+.Rhistory
+./sample_script/results
diff --git a/Papers/.DS_Store b/Papers/.DS_Store
diff --git a/Papers/.gitignore b/Papers/.gitignore
@@ -0,0 +1 @@
+.DS_Store
diff --git a/README.md b/README.md
@@ -1,10 +1,10 @@
 # 💡 Penalized Competing Risks Analysis using Case-base Sampling
 
-📍 Repository with my code and my final report for my MSc. thesis
+📍 Repository with my (Nirupama Tamvada) code and my final report for my MSc. thesis
 
 🎯 We develop an alternative penalized cause-specific hazards model that extends on the `casebase` package for competing risks survival analysis of high-dimensional biological data
 
-## 📂 Repository organization 
+## 📂 Repository organization
 
 - `bash_script_template` - Template for bash scripts for Compute Canada. Creates individual bash scripts to be submitted to the cluster for one simulation run. Requires `runscripts` and `logs` folder to be setup
 
@@ -35,3 +35,51 @@ Genome-wide transcriptome profiling and advances in experimental technologies ha
 💻 The optimization for the stochastic gradient descent in `mtool` was written by Dr. Yi Lian.
 
 📌 The relaxed LASSO branch contains a relaxed LASSO implementation for the casebase penalized model (WIP) by Alex Romanus.
+
+
+
+## 💡 Relaxed LASSO as a Penalization Method for Competing Risks Analysis using Case-Base Sampling
+
+📍 relaxed_LASSO branch contains the addition of Relaxed LASSO penalization for fitting Competing Risks models using high-dimensional data
+
+🎯 To address issues posed by fitting models to high-dimensional data, such as biased coefficient estimates, we developed a version of Relaxed LASSO regularization for efficient variable selection during model fitting
+
+### 📂 Relaxed_LASSO-specific branch organization
+
+- `practice` - Contains files created during development of relaxed LASSO method
+  - `development` - Files used to edit and check results of relaxed LASSO implementations for each data type (linear, multinomial, and Case-base) during development
+  - `simulation` - Mock simulation scripts and results
+
+- `renv` - Virtual environment directory containing all packages needed to run relaxed LASSO project files (except mtool - more on that below)
+
+- `simulation_final` - Scripts and results from final simulation used in report for each data type
+ - Note: Data in `final_results` sub-directories are results averaged over all iterations of each simulation for each data type
+
+- `src`
+  - `final_relaxed_implementation.R` - Contains implementation of Relaxed LASSO used in final simulations in multinomial and Case-base data settings and its helper functions
+  - `practice_relaxed_implementation`- All versions of Relaxed LASSO implementations made during development and their helper functions
+  - simulation_helper_functions.R files - Helper functions intended for use during simulation, but replaced with scripts in final simulation runs (except in linear case)
+
+
+### 💻 Instructions on running files from Relaxed LASSO project and installing repository packages
+- To run any file and install the packages properly, please set your working directory to the project directory (casebase_relaxed_LASSO)
+
+- Installing packages:
+  - Published packages - To install most of the packages needed for this project, use renv to install the virtual environment.
+
+    1) Install renv using the following command in your RStudio console:
+
+      install.packages("renv")
+
+    2) Use the following command to install all published packages:
+
+      renv::restore()
+
+  - mtool package - mtool is the only unpublished package required in this project, and hence must be downloaded from source. Use the following lines to install the package:
+
+      install_path = paste(getwd(), "/mtool_1.0.tar.gz", sep = "")
+      install.packages(install_path, repos = NULL, type = "source")
+
+    Finally, restart R to run the files using the required packages with:
+
+      .rs.restartR()
diff --git a/bash_script_template/bash_scripts/linear_relaxed.sh b/bash_script_template/bash_scripts/linear_relaxed.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+RUNSCRIPTS=~/projects/def-gcohenfr/ntamvada/simulations/runscripts/linear_relaxed
+LOGS=~/projects/def-gcohenfr/ntamvada/simulations/logs/linear_relaxed
+
+#SBATCH --time=2:00:00
+#SBATCH --mem=128GB
+#SBATCH --nodes=1
+#SBATCH --cpus-per-task=8
+#SBATCH --mail-user=<aromanus@gmail.com>
+#SBATCH --output=${LOGS}/linear_relaxed.txt
+#SBATCH --job-name="linear_relaxed"
+
+module load StdEnv/2020 r/4.2.2
+
+Rscript relaxed_simulation_script.R
+
+">$RUNSCRIPTS/run.job
+echo "Submitting $RUNSCRIPTS/${ID}.run.job to the cluster"
+sbatch $RUNSCRIPTS/${ID}.run.job
+done
diff --git a/casebase_relaxed_LASSO.Rproj b/casebase_relaxed_LASSO.Rproj
@@ -0,0 +1,13 @@
+Version: 1.0
+
+RestoreWorkspace: Default
+SaveWorkspace: Default
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: UTF-8
+
+RnwWeave: Sweave
+LaTeX: pdfLaTeX
diff --git a/logs/mtool_error.txt b/logs/mtool_error.txt
@@ -0,0 +1,80 @@
+  ✔  checking for file ‘/private/var/folders/wd/ykcm825s1_q8b6h4l_kczjx00000gn/T/RtmpIw5JYk/file50d37cb70d53/mtool/DESCRIPTION’
+  ─  preparing ‘mtool’:
+   checking DESCRIPTION meta-information ...  ✔  checking DESCRIPTION meta-information
+─  cleaning src
+  ─  installing the package to process help pages
+           -----------------------------------
+─  installing *source* package ‘mtool’ ...
+   ** using staged installation
+   ** libs
+   clang++ -arch arm64 -std=gnu++14 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG  -I'/Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library/Rcpp/include' -I'/Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library/RcppArmadillo/include' -I/opt/R/arm64/include   -I../inst/include/ -I../inst/include/decomp -I../inst/include/dictLearn -I../inst/include/linalg -I../inst/include/prox -DNDEBUG -DREMOVE_ -fPIC  -falign-functions=64 -Wall -g -O2  -Wall -pedantic -c RcppExports.cpp -o RcppExports.o
+   clang++ -arch arm64 -std=gnu++14 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG  -I'/Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library/Rcpp/include' -I'/Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library/RcppArmadillo/include' -I/opt/R/arm64/include   -I../inst/include/ -I../inst/include/decomp -I../inst/include/dictLearn -I../inst/include/linalg -I../inst/include/prox -DNDEBUG -DREMOVE_ -fPIC  -falign-functions=64 -Wall -g -O2  -Wall -pedantic -c main.cpp -o main.o
+   In file included from main.cpp:5:
+   In file included from ../inst/include/spams.h:25:
+   In file included from ../inst/include/dictLearn/dicts.h:36:
+   In file included from ../inst/include/prox/fista.h:24:
+   ../inst/include/prox/project.h:2044:9: warning: variable 'num4' set but not used [-Wunused-but-set-variable]
+      long num4=0;
+           ^
+   ../inst/include/prox/project.h:2041:9: warning: variable 'num1' set but not used [-Wunused-but-set-variable]
+      long num1=0;
+           ^
+   ../inst/include/prox/project.h:2043:9: warning: variable 'num3' set but not used [-Wunused-but-set-variable]
+      long num3=0;
+           ^
+   ../inst/include/prox/project.h:2042:9: warning: variable 'num2' set but not used [-Wunused-but-set-variable]
+      long num2=0;
+           ^
+../inst/include/prox/project.h:2133:9: warning: variable 'num4' set but not used [     ../inst/include/prox/project.h:2133:9: warning: variable 'num4' set but not used [-Wunused-but-set-variable]
+      long num4=0;
+           ^
+   ../inst/include/prox/project.h:2130:9: warning: variable 'num1' set but not used [-Wunused-but-set-variable]
+      long num1=0;
+           ^
+   ../inst/include/prox/project.h:2131:9: warning: variable 'num2' set but not used [-Wunused-but-set-variable]
+      long num2=0;
+           ^
+   ../inst/include/prox/project.h:2132:9: warning: variable 'num3' set but not used [-Wunused-but-set-variable]
+      long num3=0;
+           ^
+   8 warnings generated.
+   clang++ -arch arm64 -std=gnu++14 -dynamiclib -Wl,-headerpad_max_install_names -undefined dynamic_lookup -single_module -multiply_defined suppress -L/Library/Frameworks/R.framework/Resources/lib -L/opt/R/arm64/lib -o mtool.so RcppExports.o main.o -L/Library/Frameworks/R.framework/Resources/lib -lRlapack -L/Library/Frameworks/R.framework/Resources/lib -lRblas -L/opt/R/arm64/gfortran/lib/gcc/aarch64-apple-darwin20.6.0/12.0.1 -L/opt/R/arm64/gfortran/lib -lgfortran -lemutls_w -lquadmath -F/Library/Frameworks/R.framework/.. -framework R -Wl,-framework -Wl,CoreFoundation
+   ld: warning: directory not found for option '-L/opt/R/arm64/gfortran/lib/gcc/aarch64-apple-darwin20.6.0/12.0.1'
+   ld: warning: directory not found for option '-L/opt/R/arm64/gfortran/lib'
+   ld: library not found for -lgfortran
+   clang: error: linker command failed with exit code 1 (use -v to see invocation)
+   make: *** [mtool.so] Error 1
+   ERROR: compilation failed for package ‘mtool’
+─  removing ‘/private/var/folders/wd/ykcm825s1_q8b6h4l_kczjx00000gn/T/RtmpiJ2JWV/Rinst7e1d419e1dc2/mtool’
+         -----------------------------------
+     ERROR: package installation failed
+
+<system_command_status_error/rlib_error_3_0/rlib_error/error>
+Error in `(function (command = NULL, args = character(), error_on_status = TRUE, …`:
+! System command 'R' failed
+---
+Exit status: 1
+stdout & stderr: <printed>
+---
+Backtrace:
+ 1. devtools::]8;;ide:help:devtools::installinstall]8;;()
+ 2. pkgbuild::build(pkg$path, dest_path = tempdir(), args = build_opts, …
+ 3. withr::with_temp_libpaths(rcmd_build_tools(options$cmd, c(options$path, …
+ 4. base::]8;;ide:help:base::forceforce]8;;(code)
+ 5. pkgbuild::rcmd_build_tools(options$cmd, c(options$path, options$args), …
+ 6. pkgbuild::with_build_tools({ …
+ 7. base::withCallingHandlers(callr::rcmd_safe(..., env = env, spinner = FALSE, …
+ 8. callr::rcmd_safe(..., env = env, spinner = FALSE, show = FALSE, …
+ 9. callr:::run_r(options)
+10. base::with(options, with_envvar(env, do.call(processx::run, c(list(bin, …
+11. base::with.default(options, with_envvar(env, do.call(processx::run, …
+12. base::]8;;ide:help:base::evaleval]8;;(]8;;ide:help:base::substitutesubstitute]8;;(expr), data, enclos = ]8;;ide:help:base::parent.frameparent.frame]8;;())
+13. base::]8;;ide:help:base::evaleval]8;;(]8;;ide:help:base::substitutesubstitute]8;;(expr), data, enclos = ]8;;ide:help:base::parent.frameparent.frame]8;;())
+14. callr:::with_envvar(env, do.call(processx::run, c(list(bin, args = real_cmdargs, …
+15. base::]8;;ide:help:base::forceforce]8;;(code)
+16. base::do.call(processx::run, c(list(bin, args = real_cmdargs, stdout_line_callback = real_callback(stdout), …
+17. (function (command = NULL, args = character(), error_on_status = TRUE, …
+18. base::throw(new_process_error(res, call = sys.call(), echo = echo, …
+19. | base::]8;;ide:help:base::signalConditionsignalCondition]8;;(cond)
+20. (function (e) …
+21. ]8;;ide:help:base::asNamespaceasNamespace]8;;("callr")$err$throw(e)

diff --git a/mtool/.Rbuildignore b/mtool/.Rbuildignore
@@ -0,0 +1,2 @@
+^.*\.Rproj$
+^\.Rproj\.user$
diff --git a/mtool/DESCRIPTION b/mtool/DESCRIPTION
@@ -16,3 +16,6 @@ License: GPL (>= 2)
 Depends: Matrix
 Imports: Rcpp (>= 1.0.5)
 LinkingTo: Rcpp, RcppArmadillo
+
+
+
diff --git a/mtool_1.0.tar.gz b/mtool_1.0.tar.gz
diff --git a/practice/MNLogistic.txt b/practice/MNLogistic.txt
@@ -0,0 +1,93 @@
+# Penalized Multinomial Logistic Regression
+mtool.MNlogistic <- function(X, Y, offset, N_covariates,
+                             regularization = 'l1', transpose = F,
+                             lambda1, lambda2 = 0, lambda3 = 0,
+                             learning_rate = 1e-4, tolerance = 1e-4,
+                             niter_inner_mtplyr = 7, maxit = 100, ncores = -1,
+                             group_id, group_weights,
+                             groups, groups_var,
+                             own_variables, N_own_variables) {
+    ## Dimensions and checks
+    nx <- nrow(X)
+
+    if (!is.vector(Y)) {Y <- as.vector(Y)}
+    ny <- length(Y)
+
+    if (!is.vector(offset)) {offset <- as.vector(offset)}
+    noff <- length(offset)
+
+    if (nx == ny & nx == noff) {
+        n <- nx
+    } else {
+        stop('X, Y and offset have different number of observations.')
+    }
+
+    p <- ncol(X)
+
+    K <- length(unique(Y)) - 1
+
+    ## regularization
+    pen1 <- c("l0", "l1", "l2", "linf", "l2-not-squared",
+              "elastic-net", "fused-lasso",
+              "group-lasso-l2", "group-lasso-linf",
+              "sparse-group-lasso-l2", "sparse-group-lasso-linf",
+              "l1l2", "l1linf", "l1l2+l1", "l1linf+l1", "l1linf-row-column",
+              "trace-norm", "trace-norm-vec", "rank", "rank-vec", "none")
+    pen2 <- c("graph", "graph-ridge", "graph-l2", "multi-task-graph")
+    pen3 <- c("tree-l0", "tree-l2", "tree-linf", "multi-task-tree")
+
+    if (regularization %in% pen1) { penalty <- 1 }
+    if (regularization %in% pen2) { penalty <- 2 }
+    if (regularization %in% pen3) { penalty <- 3 }
+    if (! regularization %in% c(pen1, pen2, pen3)) {
+        stop('The provided regularization is not supported.')
+    }
+
+    ### check regularization-specific inputs
+    #### penalty = 1, call proximal(Flat), requires `group_id` in integer vector
+    if (penalty == 1) {
+        if (missing(group_id)) { group_id <- rep(0L, p) }
+        group_weights <- vector(mode = 'double')
+        groups <- matrix(NA)
+        groups_var <- matrix(NA)
+        own_variables <- vector(mode = 'integer')
+        N_own_variables <- vector(mode = 'integer')
+    }
+
+    #### penalty = 2, call proximalGraph
+    #### requires `groups` and `groups_var` in integer matrices and `group_weights` in double vector
+    if (penalty == 2) {
+        if (missing(groups)) { stop('Required input `groups` is missing.') }
+        if (missing(groups_var)) { stop('Required input `groups_var` is missing.') }
+        if (missing(group_weights)) { stop('Required input `group_weights` is missing.') }
+        group_id <- rep(0L, p)
+        own_variables <- vector(mode = 'integer')
+        N_own_variables <- vector(mode = 'integer')
+    }
+
+    #### penalty = 3, call proximalGraph
+    #### requires `own_variables` and `N_own_variables` in integer vectors, `group_weights` in double vector
+    #### and `groups` in integer matrix
+    if (penalty == 3) {
+        if (missing(groups)) { stop('Required input `groups` is missing.') }
+        if (missing(own_variables)) { stop('Required input `own_variables` is missing.') }
+        if (missing(N_own_variables)) { stop('Required input `N_own_variables` is missing.') }
+        if (missing(group_weights)) { stop('Required input `group_weights` is missing.') }
+        group_id <- rep(0L, p)
+        groups_var <- matrix(NA)
+    }
+
+    ## call mtool main function
+    result <- MultinomLogistic(X = X, Y = Y, offset = offset, K = K, reg_p = p - N_covariates,
+                               penalty = penalty, regul = regularization, transpose = transpose,
+                               grp_id = group_id, etaG = group_weights,
+                               grp = groups, grpV = groups_var,
+                               own_var = own_variables, N_own_var = N_own_variables,
+                               lam1 = lambda1, lam2 = lambda2, lam3 = lambda3,
+                               learning_rate = learning_rate, tolerance = tolerance,
+                               niter_inner = niter_inner_mtplyr * nx, maxit = maxit,
+                               ncores = ncores)
+    nzc <- length(result$`Sparse Estimates`@i)
+    return(list(coefficients = result$`Sparse Estimates`,
+                no_non_zero = nzc))
+}
diff --git a/practice/MNLogistic_New.txt b/practice/MNLogistic_New.txt
@@ -0,0 +1,93 @@
+# Penalized Multinomial Logistic Regression (LASSO) with decreased learning rate
+mtool.MNlogistic_new <- function(X, Y, offset, N_covariates,
+                         	regularization = 'l1', transpose = F,
+                         	lambda1, lambda2 = 0, lambda3 = 0,
+                         	learning_rate = 1e-3, tolerance = 1e-4,
+                         	niter_inner_mtplyr = 7, maxit = 100, ncores = -1,
+                         	group_id, group_weights,
+                         	groups, groups_var,
+                         	own_variables, N_own_variables) {
+  ## Dimensions and checks
+  nx <- nrow(X)
+
+  if (!is.vector(Y)) {Y <- as.vector(Y)}
+  ny <- length(Y)
+
+  if (!is.vector(offset)) {offset <- as.vector(offset)}
+  noff <- length(offset)
+
+  if (nx == ny & nx == noff) {
+	n <- nx
+  } else {
+	stop('X, Y and offset have different number of observations.')
+  }
+
+  p <- ncol(X)
+
+  K <- length(unique(Y)) - 1
+
+  ## regularization
+  pen1 <- c("l0", "l1", "l2", "linf", "l2-not-squared",
+        	"elastic-net", "fused-lasso",
+        	"group-lasso-l2", "group-lasso-linf",
+        	"sparse-group-lasso-l2", "sparse-group-lasso-linf",
+        	"l1l2", "l1linf", "l1l2+l1", "l1linf+l1", "l1linf-row-column",
+        	"trace-norm", "trace-norm-vec", "rank", "rank-vec", "none")
+  pen2 <- c("graph", "graph-ridge", "graph-l2", "multi-task-graph")
+  pen3 <- c("tree-l0", "tree-l2", "tree-linf", "multi-task-tree")
+
+  if (regularization %in% pen1) { penalty <- 1 }
+  if (regularization %in% pen2) { penalty <- 2 }
+  if (regularization %in% pen3) { penalty <- 3 }
+  if (! regularization %in% c(pen1, pen2, pen3)) {
+	stop('The provided regularization is not supported.')
+  }
+
+  ### check regularization-specific inputs
+  #### penalty = 1, call proximal(Flat), requires `group_id` in integer vector
+  if (penalty == 1) {
+	if (missing(group_id)) { group_id <- rep(0L, p) }
+	group_weights <- vector(mode = 'double')
+	groups <- matrix(NA)
+	groups_var <- matrix(NA)
+	own_variables <- vector(mode = 'integer')
+	N_own_variables <- vector(mode = 'integer')
+  }
+
+  #### penalty = 2, call proximalGraph
+  #### requires `groups` and `groups_var` in integer matrices and `group_weights` in double vector
+  if (penalty == 2) {
+	if (missing(groups)) { stop('Required input `groups` is missing.') }
+	if (missing(groups_var)) { stop('Required input `groups_var` is missing.') }
+	if (missing(group_weights)) { stop('Required input `group_weights` is missing.') }
+	group_id <- rep(0L, p)
+	own_variables <- vector(mode = 'integer')
+	N_own_variables <- vector(mode = 'integer')
+  }
+
+  #### penalty = 3, call proximalGraph
+  #### requires `own_variables` and `N_own_variables` in integer vectors, `group_weights` in double vector
+  #### and `groups` in integer matrix
+  if (penalty == 3) {
+	if (missing(groups)) { stop('Required input `groups` is missing.') }
+	if (missing(own_variables)) { stop('Required input `own_variables` is missing.') }
+	if (missing(N_own_variables)) { stop('Required input `N_own_variables` is missing.') }
+	if (missing(group_weights)) { stop('Required input `group_weights` is missing.') }
+	group_id <- rep(0L, p)
+	groups_var <- matrix(NA)
+  }
+
+  ## call mtool main function
+  result <- MultinomLogistic(X = X, Y = Y, offset = offset, K = K, reg_p = p - N_covariates,
+                         	penalty = penalty, regul = regularization, transpose = transpose,
+                         	grp_id = group_id, etaG = group_weights,
+                         	grp = groups, grpV = groups_var,
+                         	own_var = own_variables, N_own_var = N_own_variables,
+                         	lam1 = lambda1, lam2 = lambda2, lam3 = lambda3,
+                         	learning_rate = learning_rate, tolerance = tolerance,
+                         	niter_inner = niter_inner_mtplyr * nx, maxit = maxit,
+                         	ncores = ncores)
+  nzc <- length(result$`Sparse Estimates`@i)
+  return(list(coefficients = result$`Sparse Estimates`,
+          	no_non_zero = nzc))
+}
Original file line number	Diff line number	Diff line change
Expand Up		@@ -16,3 +16,6 @@ License: GPL (>= 2)
		Depends: Matrix
		Imports: Rcpp (>= 1.0.5)
		LinkingTo: Rcpp, RcppArmadillo