diff --git a/.Rbuildignore b/.Rbuildignore index 291a446..c0b7d13 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -6,3 +6,11 @@ ^\.DS_Store$ ^README\.md$ ^roadmap\.md$ +^Benchmark$ +^gedi\.Rcheck$ +^cran-comments\.md$ +^\.claude$ +^Logo\.svg$ +^check_output\.txt$ +^check_run\.log$ +^nohup\.out$ diff --git a/.gitignore b/.gitignore index 00d08a9..97139fb 100644 --- a/.gitignore +++ b/.gitignore @@ -58,3 +58,5 @@ rsconnect/ # Temp directory for development temp/ +Benchmark/ +.claude/ diff --git a/DESCRIPTION b/DESCRIPTION index 5c54173..4f0055f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,33 +1,28 @@ Package: gedi Type: Package Title: Gene Expression Decomposition and Integration -Version: 2.3.0 -Date: 2026-01-08 +Version: 2.3.1 +Date: 2026-02-08 Authors@R: c( person("Arsham", "Mikaeili Namini", email = "arsham.mikaeilinamini@mail.mcgill.ca", role = c("aut", "cre")), person("Hamed", "S.Najafabadi", email = "hamed.najafabadi@mcgill.ca", role = c("aut")) ) -Maintainer: Arsham Mikaeili Namini Description: A memory-efficient implementation for integrating gene expression data - from single-cell RNA sequencing experiments. GEDI v2 uses a C++ backend with + from single-cell RNA sequencing experiments. Uses a C++ backend with thin R wrappers to enable analysis of large-scale single-cell datasets. The package supports multiple data modalities including count matrices, paired - data (Splicing, Velocyto, CITE-seq), and binary indicators. It implements a latent variable model - with block coordinate descent optimization for dimensionality reduction and - batch effect correction. + data (splicing, RNA velocity, CITE-seq), and binary indicators. It implements + a latent variable model with block coordinate descent optimization for + dimensionality reduction and batch effect correction. License: MIT + file LICENSE -URL: https://github.com/Arshammik/gedi -BugReports: https://github.com/Arshammik/gedi/issues +URL: https://github.com/csglab/gedi2 +BugReports: https://github.com/csglab/gedi2/issues Depends: R (>= 4.0.0) -Imports: Rcpp (>= 1.0.0), R6 (>= 2.5.0), Matrix (>= 1.3.0), hdf5r, - methods, stats, utils +Imports: Rcpp (>= 1.0.0), R6 (>= 2.5.0), Matrix (>= 1.3.0), + ggplot2, scales, methods, stats, utils LinkingTo: Rcpp, RcppEigen -Suggests: Seurat, SingleCellExperiment -SystemRequirements: C++14, GNU make, Eigen3 (>= 3.3.0) +Suggests: hdf5r, uwot, digest, glmnet, Seurat, SingleCellExperiment +SystemRequirements: C++14, GNU make Encoding: UTF-8 RoxygenNote: 7.3.3 Roxygen: list(markdown = TRUE) -NeedsCompilation: yes -Packaged: 2025-10-22 18:58:46 UTC; arshammikaeili -Author: Arsham Mikaeili Namini [aut, cre], - Hamed S.Najafabadi [aut] diff --git a/LICENSE b/LICENSE index b146eb5..9253558 100644 --- a/LICENSE +++ b/LICENSE @@ -1,21 +1,2 @@ -MIT License - -Copyright (c) 2025 Arsham Mikaeili Namini - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +YEAR: 2025 +COPYRIGHT HOLDER: Arsham Mikaeili Namini diff --git a/NAMESPACE b/NAMESPACE index bf5baf2..d3e2e68 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,9 @@ # Generated by roxygen2: do not edit by hand +S3method(print,gedi_dynamics) +S3method(print,gedi_dynamics_svd) +S3method(print,gedi_imputation) +S3method(print,gedi_pathway_associations) export(CreateGEDIObject) export(check_optional_dependencies) export(gedi_to_seurat) @@ -11,10 +15,6 @@ export(plot_embedding) export(plot_feature_ratio) export(plot_features) export(plot_vector_field) -export(print.gedi_dynamics) -export(print.gedi_dynamics_svd) -export(print.gedi_imputation) -export(print.gedi_pathway_associations) export(read_h5) export(read_h5ad) export(seurat_to_gedi) @@ -25,14 +25,15 @@ importFrom(Matrix,Matrix) importFrom(Matrix,sparseMatrix) importFrom(Matrix,t) importFrom(Rcpp,sourceCpp) -importFrom(hdf5r,"h5attr<-") -importFrom(hdf5r,H5File) -importFrom(hdf5r,existsGroup) -importFrom(hdf5r,h5attr) importFrom(methods,as) importFrom(methods,is) +importFrom(stats,coef) +importFrom(stats,median) importFrom(stats,rnorm) importFrom(stats,runif) -importFrom(utils,installed.packages) +importFrom(stats,var) +importFrom(utils,object.size) importFrom(utils,packageVersion) +importFrom(utils,setTxtProgressBar) +importFrom(utils,txtProgressBar) useDynLib(gedi, .registration = TRUE) diff --git a/NEWS.md b/NEWS.md new file mode 100644 index 0000000..c1b437f --- /dev/null +++ b/NEWS.md @@ -0,0 +1,29 @@ +# gedi 2.3.1 + +## CRAN Compliance + +* Replace all `cat()` / `print()` calls with `message()` or `warning()` for + suppressible console output. +* Add `verbose` parameters to `seurat_to_gedi()`, `gedi_to_seurat()`, + `check_optional_dependencies()`, and `install_optional_dependencies()`. +* Replace `cat()`-based progress bars with `txtProgressBar()` across imputation + and training routines. +* Replace `installed.packages()` with `requireNamespace()` for dependency + checking. +* Move `hdf5r` from Imports to Suggests (optional dependency for H5AD I/O). +* Add `ggplot2` and `scales` to Imports; add `uwot` and `digest` to Suggests. +* Use CRAN-required two-line LICENSE format. +* Add `@return` documentation tags to all exported and documented functions. +* Replace non-ASCII characters in C++ source files with ASCII equivalents. +* Remove redundant Maintainer field from DESCRIPTION. + +# gedi 2.3.0 + +* Initial public release with C++ backend and R6 interface. +* Support for multiple data modalities (count matrices, paired data, binary + indicators). +* Latent variable model with block coordinate descent optimization. +* Dimensionality reduction, batch correction, and imputation. +* Differential expression and pathway association analysis. +* H5AD file I/O for Python interoperability. +* Seurat and SingleCellExperiment integration. diff --git a/R/RcppExports.R b/R/RcppExports.R index 6d7419f..7a142be 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -216,7 +216,7 @@ run_factorized_svd_cpp <- function(Z, projDB, verbose = 0L) { #' @return Dense matrix (J x Ni) - residual Yi after removing sample effects #' #' @details -#' Computes: Yi - QiDBi - (si ⊗ 1ᵀ) - (o + oi) ⊗ 1ᵀ +#' Computes: Yi - QiDBi - (si (x) 1^T) - (o + oi) (x) 1^T #' This leaves only the ZDBi component (shared biological signal). #' #' @keywords internal @@ -238,7 +238,7 @@ Yi_resZ <- function(Yi, QiDBi, si, o, oi) { #' @return Dense matrix (J x Ni) - predicted Yi #' #' @details -#' Computes: Ŷi = ZDBi + QiDBi + (si ⊗ 1ᵀ) + (o + oi) ⊗ 1ᵀ +#' Computes: Y_hati = ZDBi + QiDBi + (si (x) 1^T) + (o + oi) (x) 1^T #' This is the full model prediction for log-expression. #' #' @keywords internal @@ -262,7 +262,7 @@ predict_Yhat <- function(ZDBi, QiDBi, si, o, oi) { #' #' This comes from the Poisson-lognormal model where: #' - Mi ~ Poisson(exp(Yi)) -#' - Yi ~ N(Ŷi, sigma2) +#' - Yi ~ N(Y_hati, sigma2) #' #' The posterior variance decreases where: #' - Counts are high (exp(Yi) large) @@ -292,7 +292,7 @@ Yi_var_M <- function(Yi, sigma2) { #' #' This comes from the binomial-logistic-normal model where: #' - M1i ~ Binomial(M1i + M2i, p) -#' - logit(p) = Yi ~ N(Ŷi, sigma2) +#' - logit(p) = Yi ~ N(Y_hati, sigma2) #' #' The variance depends on: #' - Total counts M (more counts = less variance) @@ -324,7 +324,7 @@ Yi_var_M_paired <- function(Yi, M1i, M2i, sigma2) { #' for downstream dispersion analysis (binning and aggregation done in R). #' #' Memory optimization: Works only on sparse nonzero positions (typically 5-10% of matrix). -#' For 30K genes × 10K cells with 5% nonzero: samples from ~15M positions instead of 300M. +#' For 30K genes x 10K cells with 5% nonzero: samples from ~15M positions instead of 300M. #' #' @keywords internal #' @noRd @@ -362,7 +362,7 @@ Yi_SSE_M_paired <- function(Yi, M1i, M2i, ZDBi, QiDBi, si, o, oi, sigma2) { #' #' @param feature_weights Vector of length K (factor loadings for this feature) #' @param D Scaling vector of length K -#' @param Bi_list List of sample-specific cell projection matrices (K × Ni each) +#' @param Bi_list List of sample-specific cell projection matrices (K x Ni each) #' @param verbose Integer verbosity level #' #' @return Vector of length N (total cells) with projected values @@ -378,12 +378,12 @@ compute_feature_projection <- function(feature_weights, D, Bi_list, verbose = 0L #' Projects multiple features through the GEDI model simultaneously. #' Computes: (feature_weights * D) %*% B for F features. #' -#' @param feature_weights Matrix K × F (factor loadings for F features) +#' @param feature_weights Matrix K x F (factor loadings for F features) #' @param D Scaling vector of length K -#' @param Bi_list List of sample-specific cell projection matrices (K × Ni each) +#' @param Bi_list List of sample-specific cell projection matrices (K x Ni each) #' @param verbose Integer verbosity level #' -#' @return Matrix N × F with projected values for each feature +#' @return Matrix N x F with projected values for each feature #' #' @keywords internal #' @noRd @@ -420,10 +420,10 @@ aggregate_vectors <- function(Dim1, Dim2, To1, To2, color, alpha, n_bins, min_pe #' the concatenation of all sample-specific Bi matrices. This is the main #' integrated representation of cells in the GEDI latent space. #' -#' @param Z Shared metagene matrix (J × K), where J = genes, K = latent factors +#' @param Z Shared metagene matrix (J x K), where J = genes, K = latent factors #' @param D Scaling vector (length K) representing the importance of each factor #' @param Bi_list List of sample-specific cell projection matrices, where each -#' Bi is a K × Ni matrix (Ni = number of cells in sample i) +#' Bi is a K x Ni matrix (Ni = number of cells in sample i) #' @param verbose Integer verbosity level: #' \itemize{ #' \item 0: Silent (no output) @@ -431,7 +431,7 @@ aggregate_vectors <- function(Dim1, Dim2, To1, To2, color, alpha, n_bins, min_pe #' \item 2: Detailed per-sample information #' } #' -#' @return Dense matrix ZDB of dimensions J × N, where N = sum(Ni) is the total +#' @return Dense matrix ZDB of dimensions J x N, where N = sum(Ni) is the total #' number of cells across all samples. Each column represents a cell in the #' integrated latent space. #' @@ -441,17 +441,17 @@ aggregate_vectors <- function(Dim1, Dim2, To1, To2, color, alpha, n_bins, min_pe #' #' Computational strategy: #' \enumerate{ -#' \item Pre-compute ZD = Z * diag(D) once (saves K×J×numSamples operations) +#' \item Pre-compute ZD = Z * diag(D) once (saves KxJxnumSamples operations) #' \item For each sample i: compute ZD * Bi and concatenate #' \item Use Eigen block operations for efficient memory access #' } #' -#' Memory: Allocates one J × N dense matrix. For large datasets (e.g., 20k genes, +#' Memory: Allocates one J x N dense matrix. For large datasets (e.g., 20k genes, #' 50k cells), this requires ~8 GB RAM. Consider computing projections on demand #' or working with subsets if memory is limited. #' #' Performance: OpenMP parallelization available if enabled during compilation. -#' Typical speed: ~100-200ms for 20k × 5k dataset on modern CPU. +#' Typical speed: ~100-200ms for 20k x 5k dataset on modern CPU. #' #' @keywords internal #' @noRd @@ -467,10 +467,10 @@ compute_ZDB_cpp <- function(Z, D, Bi_list, verbose = 0L) { #' #' @param D Scaling vector (length K) representing factor importance #' @param Bi_list List of sample-specific cell projection matrices, where each -#' Bi is a K × Ni matrix +#' Bi is a K x Ni matrix #' @param verbose Integer verbosity level (0 = silent, 1 = progress, 2 = detailed) #' -#' @return Dense matrix DB of dimensions K × N, where N = sum(Ni). Each column +#' @return Dense matrix DB of dimensions K x N, where N = sum(Ni). Each column #' represents a cell's coordinates in the latent factor space. #' #' @details @@ -488,7 +488,7 @@ compute_ZDB_cpp <- function(Z, D, Bi_list, verbose = 0L) { #' \item Apply diagonal scaling: diag(D) * B #' } #' -#' Memory: Much smaller than ZDB (K × N vs. J × N). For K=10 and N=50k cells, +#' Memory: Much smaller than ZDB (K x N vs. J x N). For K=10 and N=50k cells, #' requires only ~4 MB vs. ~8 GB for ZDB when J=20k. #' #' Performance: Very fast (~10-50ms) since K << J typically. diff --git a/R/gedi-package.R b/R/gedi-package.R index 76e7c28..65f8bb2 100644 --- a/R/gedi-package.R +++ b/R/gedi-package.R @@ -55,7 +55,6 @@ #' @seealso #' \itemize{ #' \item \code{\link{CreateGEDIObject}}: Create a GEDI model -#' \item \code{\link{GEDI}}: R6 class documentation #' } #' #' @examples @@ -86,5 +85,6 @@ #' @import R6 #' @import Matrix #' @importFrom methods as is -#' @importFrom stats rnorm runif +#' @importFrom stats coef median rnorm runif var +#' @importFrom utils object.size setTxtProgressBar txtProgressBar NULL diff --git a/R/gedi_class.R b/R/gedi_class.R index f599ce1..6e70046 100644 --- a/R/gedi_class.R +++ b/R/gedi_class.R @@ -358,7 +358,7 @@ GEDI <- R6Class( # =================================================================== aux <- list( - # DBi optimization: Store smaller K × Ni instead of J × Ni matrices + # DBi optimization: Store smaller K x Ni instead of J x Ni matrices DBi = lapply(Ni, function(n) matrix(0, nrow = K, ncol = n)), Qi_hat = if (L > 0) lapply(1:num_samples, function(i) matrix(0, nrow = J, ncol = K)) else list(), oi_hat = if (L > 0) lapply(1:num_samples, function(i) rep(0, J)) else list(), @@ -609,30 +609,14 @@ GEDI <- R6Class( stop("Model not setup. Call CreateGEDIObject() first.", call. = FALSE) } - # Add Method 4 progress bar for verbose=1 only + # Add progress bar for verbose=1 only if (private$.verbose == 1) { - # Print initial header (stays fixed) - cat(sprintf("GEDI Optimization (%d iterations)\n", iterations)) + message(sprintf("GEDI Optimization (%d iterations)", iterations)) - # Custom progress bar that updates on same line - cat("|", rep(" ", 50), "| 0%\r", sep = "") - flush.console() - - # Create temp file to sink C++ output - tmp <- tempfile() - sink_active <- FALSE - - on.exit({ - if (sink_active) { - sink(type = "output") - } - cat("\n") # Final newline - if (file.exists(tmp)) unlink(tmp) - }, add = TRUE) - - # Redirect C++ output to temp file - sink(tmp, type = "output") - sink_active <- TRUE + # Use txtProgressBar which writes to stderr (suppressible) + pb <- txtProgressBar(min = 0, max = iterations, style = 3, + width = 50, file = stderr()) + on.exit(close(pb), add = TRUE) # Initialize with first iteration (includes initialization) private$.lastResult <- GEDI_train( @@ -641,38 +625,18 @@ GEDI <- R6Class( track_interval = track_interval, multimodal = multimodal ) + setTxtProgressBar(pb, 1) - # Restore output temporarily to update progress bar - sink(type = "output") - sink_active <- FALSE - - # Update progress bar - pct <- round(1 / iterations * 100) - n_filled <- round(50 * 1 / iterations) - cat("|", rep("=", n_filled), rep(" ", 50 - n_filled), "| ", pct, "%\r", sep = "") - flush.console() - - # Resume sinking for remaining iterations + # Run remaining iterations if (iterations > 1) { - # Run all remaining iterations, only get results at the end - sink(tmp, type = "output", append = TRUE) - sink_active <- TRUE - # MEMORY FIX: Call GEDI_optimize once for all remaining iterations # This avoids copying GBs of data from C++ to R on every iteration private$.lastResult <- GEDI_optimize( model_ptr = private$.cppPtr, - iterations = iterations - 1, # Remaining iterations after first one + iterations = iterations - 1, track_interval = track_interval ) - - # Restore output - sink(type = "output") - sink_active <- FALSE - - # Update progress bar to 100% - cat("|", rep("=", 50), "| 100%\r", sep = "") - flush.console() + setTxtProgressBar(pb, iterations) } } else { @@ -829,7 +793,7 @@ GEDI <- R6Class( if (!is.null(private$.lastResult)) { aux <- private$.lastResult$aux - cat(sprintf("Dimensions: %d genes × %d cells\n", aux$J, aux$N)) + cat(sprintf("Dimensions: %d genes x %d cells\n", aux$J, aux$N)) cat(sprintf("Samples: %d (%s)\n", aux$numSamples, paste(head(private$.sampleNames, 3), collapse = ", "))) @@ -1034,8 +998,8 @@ GEDI <- R6Class( #' @param Y Log-transformed expression matrix (optional if M provided) #' @param X Binary indicator matrix (optional if M or Y provided) #' @param colData Optional data.frame with cell metadata -#' @param C Gene-level prior matrix (genes × pathways) -#' @param H Sample-level covariate matrix (covariates × samples) +#' @param C Gene-level prior matrix (genes x pathways) +#' @param H Sample-level covariate matrix (covariates x samples) #' @param K Number of latent factors (default: 10) #' @param mode Normalization mode: "Bl2" or "Bsphere" (default: "Bl2") #' @param adjustD Whether to adjust D based on B row norms (default: TRUE) diff --git a/R/gedi_dimred.R b/R/gedi_dimred.R index a72926f..972878d 100644 --- a/R/gedi_dimred.R +++ b/R/gedi_dimred.R @@ -63,7 +63,7 @@ EmbeddingsAccessor <- R6Class( #' Compute Factorized SVD with Caching #' #' @description -#' Computes factorized SVD preserving GEDI structure: SVD(Z) × SVD(middle) × SVD(DB). +#' Computes factorized SVD preserving GEDI structure: SVD(Z) x SVD(middle) x SVD(DB). #' This approach maintains biological interpretability by respecting the decomposition #' structure. Results are cached automatically for subsequent access. #' @@ -74,8 +74,8 @@ EmbeddingsAccessor <- R6Class( #' @return List with three components: #' \itemize{ #' \item d: Singular values (length K vector) -#' \item u: Left singular vectors (J × K matrix) with gene names -#' \item v: Right singular vectors (N × K matrix) with cell names +#' \item u: Left singular vectors (J x K matrix) with gene names +#' \item v: Right singular vectors (N x K matrix) with cell names #' } #' #' @keywords internal @@ -129,14 +129,14 @@ compute_svd_factorized <- function(self, private, force_recompute = FALSE) { #' Compute PCA Coordinates with Caching #' #' @description -#' Computes PCA coordinates as V × diag(d) from the factorized SVD. +#' Computes PCA coordinates as V x diag(d) from the factorized SVD. #' Uses cached SVD result if available. #' #' @param self Reference to GEDI R6 object #' @param private Reference to private environment #' @param force_recompute Logical, if TRUE bypasses cache and recomputes #' -#' @return Matrix (N × K) with PCA coordinates, rows = cells, columns = PCs +#' @return Matrix (N x K) with PCA coordinates, rows = cells, columns = PCs #' #' @keywords internal #' @noRd @@ -158,7 +158,7 @@ compute_pca <- function(self, private, force_recompute = FALSE) { # Get SVD (uses cache if available) svd_result <- compute_svd_factorized(self, private) - # PCA = V × diag(d) + # PCA = V x diag(d) pca_coords <- svd_result$v %*% diag(svd_result$d, nrow = length(svd_result$d)) # Set dimension names @@ -188,7 +188,7 @@ compute_pca <- function(self, private, force_recompute = FALSE) { #' @param private Reference to private environment #' @param force_recompute Logical, if TRUE bypasses cache and recomputes #' -#' @return Matrix (N × 2) with UMAP coordinates, rows = cells +#' @return Matrix (N x 2) with UMAP coordinates, rows = cells #' #' @keywords internal #' @noRd @@ -236,7 +236,7 @@ compute_umap_cached <- function(self, private, force_recompute = FALSE) { #' @param n_threads Integer, number of threads (default: 0 = auto) #' @param ... Additional arguments passed to uwot::umap() #' -#' @return Matrix (N × n_components) with UMAP coordinates, rows = cells +#' @return Matrix (N x n_components) with UMAP coordinates, rows = cells #' #' @keywords internal #' @noRd diff --git a/R/gedi_dynamics.R b/R/gedi_dynamics.R index a6654d5..f661ecd 100644 --- a/R/gedi_dynamics.R +++ b/R/gedi_dynamics.R @@ -6,14 +6,14 @@ #' Compute Differential Q in Z-space (JxK) #' #' @description -#' Computes sample-variable effects on Qi, returning a J × K matrix. +#' Computes sample-variable effects on Qi, returning a J x K matrix. #' This is the R wrapper for the C++ function getDiffQ_cpp(). #' #' @param self Reference to GEDI R6 object #' @param private Reference to private environment #' @param contrast Contrast vector (length L) #' -#' @return Matrix J × K showing differential effect in Z-space +#' @return Matrix J x K showing differential effect in Z-space #' #' @keywords internal #' @noRd @@ -364,7 +364,7 @@ run_joint_svd <- function(self, private, start.cond, end.cond, C_index, #' @param self Reference to GEDI R6 object #' @param private Reference to private environment #' -#' @return Matrix (J × num_pathways) of gene expression gradients +#' @return Matrix (J x num_pathways) of gene expression gradients #' #' @keywords internal #' @noRd @@ -385,11 +385,11 @@ get_pathway_gradients <- function(self, private) { A_full <- private$.aux_static$C.rotation %*% self$params$A # Gradient is transpose of A - gradient <- t(A_full) # K × num_pathways + gradient <- t(A_full) # K x num_pathways # Project to gene space Z <- self$params$Z - gradient <- Z %*% gradient # J × num_pathways + gradient <- Z %*% gradient # J x num_pathways # Set dimension names rownames(gradient) <- private$.geneIDs @@ -441,6 +441,8 @@ create_dynamics_accessor <- function(self, private) { #' @param x Object of class gedi_dynamics #' @param ... Additional arguments (ignored) #' +#' @return Invisibly returns \code{x}. +#' @method print gedi_dynamics #' @keywords internal #' @export print.gedi_dynamics <- function(x, ...) { @@ -476,12 +478,14 @@ print.gedi_dynamics <- function(x, ...) { #' @param x Object of class gedi_dynamics_svd #' @param ... Additional arguments (ignored) #' +#' @return Invisibly returns \code{x}. +#' @method print gedi_dynamics_svd #' @keywords internal #' @export print.gedi_dynamics_svd <- function(x, ...) { cat("\n\n") cat(sprintf("Method: %s\n", x$metadata$method)) - cat(sprintf("Dimensions: %d genes × %d components\n", nrow(x$u), ncol(x$u))) + cat(sprintf("Dimensions: %d genes x %d components\n", nrow(x$u), ncol(x$u))) cat(sprintf("Cells: %d\n", length(x$indices$embedding))) if (x$metadata$method == "vector_field") { diff --git a/R/gedi_imputation.R b/R/gedi_imputation.R index 785ca9b..914c6c1 100644 --- a/R/gedi_imputation.R +++ b/R/gedi_imputation.R @@ -215,7 +215,7 @@ validate_M_single <- function(M, fp, label) { #' @param private Reference to private environment #' @param sample_idx Integer, which sample to compute (1 to numSamples) #' -#' @return Dense matrix (J × Ni) - fitted Yi for sample i +#' @return Dense matrix (J x Ni) - fitted Yi for sample i #' #' @keywords internal #' @noRd @@ -263,12 +263,12 @@ compute_Y_fitted <- function(self, private, sample_idx) { #' Removes sample-specific effects from Yi_fitted to get shared biological signal. #' This is the "imputed" expression that can be compared across samples. #' -#' @param Yi_fitted Dense matrix (J × Ni) - fitted Yi for sample i +#' @param Yi_fitted Dense matrix (J x Ni) - fitted Yi for sample i #' @param params List with model parameters #' @param sample_idx Integer, which sample this is #' @param rowCentre Logical, if TRUE removes global gene offset o #' -#' @return Dense matrix (J × Ni) - imputed Y with sample effects removed +#' @return Dense matrix (J x Ni) - imputed Y with sample effects removed #' #' @keywords internal #' @noRd @@ -319,7 +319,7 @@ compute_Y_imputed <- function(Yi_fitted, params, sample_idx, rowCentre) { #' @param private Reference to private environment #' @param M Count matrix (must match original), or list for M_paired #' -#' @return Dense matrix (J × N) - variance of imputed Y +#' @return Dense matrix (J x N) - variance of imputed Y #' #' @keywords internal #' @noRd @@ -333,11 +333,11 @@ compute_Y_variance <- function(self, private, M) { # Check observation type obs_type <- self$aux$obs.type if (obs_type == "Y") { - message("Observation type is 'Y' (not raw counts). Returning NULL.") + if (private$.verbose >= 1) message("Observation type is 'Y' (not raw counts). Returning NULL.") return(NULL) } if (obs_type == "X") { - message("Observation type is 'X' (binary). Variance calculation not applicable. Returning NULL.") + if (private$.verbose >= 1) message("Observation type is 'X' (binary). Variance calculation not applicable. Returning NULL.") return(NULL) } @@ -373,9 +373,10 @@ compute_Y_variance <- function(self, private, M) { Y_var_list <- vector("list", numSamples) if (private$.verbose == 1) { - cat(sprintf("Computing Y variance (%d samples)\n", numSamples)) - cat("|", rep(" ", 50), "| 0%\r", sep = "") - flush.console() + message(sprintf("Computing Y variance (%d samples)", numSamples)) + pb <- txtProgressBar(min = 0, max = numSamples, style = 3, + width = 50, file = stderr()) + on.exit(close(pb), add = TRUE) for (i in 1:numSamples) { idx <- cells_by_sample[[i]] @@ -394,13 +395,8 @@ compute_Y_variance <- function(self, private, M) { Y_var_list[[i]] <- Yi_var_M_paired(Yi_fitted, M1i, M2i, self$params$sigma2) } - # Update progress bar - pct <- round(i / numSamples * 100) - n_filled <- round(50 * i / numSamples) - cat("|", rep("=", n_filled), rep(" ", 50 - n_filled), "| ", pct, "%\r", sep = "") - flush.console() + setTxtProgressBar(pb, i) } - cat("\n") # Final newline } else { # Silent or debug mode - no progress bar @@ -483,9 +479,10 @@ compute_dispersion <- function(self, private, M, subsample = 1e6) { result_list <- vector("list", numSamples) if (private$.verbose == 1) { - cat(sprintf("Computing dispersion (%d samples)\n", numSamples)) - cat("|", rep(" ", 50), "| 0%\r", sep = "") - flush.console() + message(sprintf("Computing dispersion (%d samples)", numSamples)) + pb <- txtProgressBar(min = 0, max = numSamples, style = 3, + width = 50, file = stderr()) + on.exit(close(pb), add = TRUE) for (i in 1:numSamples) { idx <- cells_by_sample[[i]] @@ -532,13 +529,8 @@ compute_dispersion <- function(self, private, M, subsample = 1e6) { result_list[[i]] <- xy - # Update progress bar - pct <- round(i / numSamples * 100) - n_filled <- round(50 * i / numSamples) - cat("|", rep("=", n_filled), rep(" ", 50 - n_filled), "| ", pct, "%\r", sep = "") - flush.console() + setTxtProgressBar(pb, i) } - cat("\n") # Final newline } else { # Silent or debug mode - no progress bar diff --git a/R/gedi_pathway.R b/R/gedi_pathway.R index f177f0c..74cd6a1 100644 --- a/R/gedi_pathway.R +++ b/R/gedi_pathway.R @@ -13,7 +13,7 @@ #' @param self Reference to GEDI R6 object #' @param private Reference to private environment #' -#' @return Dense matrix (num_pathways × K) where rows correspond to original +#' @return Dense matrix (num_pathways x K) where rows correspond to original #' pathways and columns to latent factors. Positive values indicate pathways #' that are enriched in a factor, negative values indicate depletion. #' @@ -55,13 +55,13 @@ compute_dense_A <- function(self, private) { } if (private$.verbose >= 1) { - cat("Computing dense pathway-factor associations...\n") + message("Computing dense pathway-factor associations...") } # Compute A in original pathway space: C.rotation %*% A - # C.rotation is num_pathways × P - # A is P × K - # Result is num_pathways × K + # C.rotation is num_pathways x P + # A is P x K + # Result is num_pathways x K A_full <- private$.aux_static$C.rotation %*% self$params$A # Add row and column names @@ -69,7 +69,7 @@ compute_dense_A <- function(self, private) { colnames(A_full) <- paste0("LV", seq_len(self$aux$K)) if (private$.verbose >= 1) { - cat(sprintf(" Computed %d pathways × %d factors\n", + message(sprintf(" Computed %d pathways x %d factors", nrow(A_full), ncol(A_full))) } @@ -87,11 +87,11 @@ compute_dense_A <- function(self, private) { #' #' @param self Reference to GEDI R6 object #' @param private Reference to private environment -#' @param C Optional gene × pathway matrix. If NULL (default), uses the original +#' @param C Optional gene x pathway matrix. If NULL (default), uses the original #' C matrix provided during model setup. Can provide a different pathway #' database for post-hoc interpretation. #' -#' @return Sparse matrix (num_pathways × K) with many zero entries. Non-zero +#' @return Sparse matrix (num_pathways x K) with many zero entries. Non-zero #' values indicate pathways that are strongly associated with each factor. #' #' @details @@ -151,7 +151,7 @@ compute_sparse_A <- function(self, private, C = NULL) { C <- private$.aux_static$inputC if (private$.verbose >= 1) { - cat("Computing sparse pathway-factor associations (using original C)...\n") + message("Computing sparse pathway-factor associations (using original C)...") } } else { @@ -171,7 +171,7 @@ compute_sparse_A <- function(self, private, C = NULL) { } if (private$.verbose >= 1) { - cat(sprintf("Computing sparse pathway-factor associations (custom C: %d pathways)...\n", + message(sprintf("Computing sparse pathway-factor associations (custom C: %d pathways)...", ncol(C))) } } @@ -186,12 +186,12 @@ compute_sparse_A <- function(self, private, C = NULL) { # For each latent factor, run LASSO regression if (private$.verbose >= 1) { - cat(sprintf(" Running LASSO for %d factors...\n", K)) + message(sprintf(" Running LASSO for %d factors...", K)) } for (k in 1:K) { if (private$.verbose >= 2) { - cat(sprintf(" Factor %d/%d\n", k, K)) + message(sprintf(" Factor %d/%d", k, K)) } # Run cross-validated LASSO @@ -235,9 +235,9 @@ compute_sparse_A <- function(self, private, C = NULL) { total_possible <- num_pathways * K sparsity_pct <- 100 * (1 - total_nonzero / total_possible) - cat(sprintf(" Sparsity: %.1f%% (%d / %d nonzero)\n", + message(sprintf(" Sparsity: %.1f%% (%d / %d nonzero)", sparsity_pct, total_nonzero, total_possible)) - cat(sprintf(" Nonzero per factor: min=%d, median=%d, max=%d\n", + message(sprintf(" Nonzero per factor: min=%d, median=%d, max=%d", min(nonzero_per_factor), median(nonzero_per_factor), max(nonzero_per_factor))) @@ -287,6 +287,8 @@ create_pathway_associations_accessor <- function(self, private) { #' @param x Object of class gedi_pathway_associations #' @param ... Additional arguments (ignored) #' +#' @return Invisibly returns \code{x}. +#' @method print gedi_pathway_associations #' @keywords internal #' @export print.gedi_pathway_associations <- function(x, ...) { diff --git a/R/gedi_plot_utils.R b/R/gedi_plot_utils.R index a8e7a5c..71b9101 100644 --- a/R/gedi_plot_utils.R +++ b/R/gedi_plot_utils.R @@ -1,6 +1,7 @@ # R/gedi_plot_utils.R #' GEDI Plot Theme +#' @return A \code{ggplot2} theme object. #' @keywords internal theme_gedi <- function(base_size = 11) { ggplot2::theme_bw(base_size = base_size) + @@ -27,6 +28,7 @@ theme_gedi <- function(base_size = 11) { #' Diverging Color Scale for GEDI Plots +#' @return A \code{ggplot2} continuous color scale. #' @keywords internal scale_color_gedi_diverging <- function(limits = NULL, name = "Value", ...) { ggplot2::scale_color_gradientn( @@ -51,6 +53,7 @@ scale_color_gedi_diverging <- function(limits = NULL, name = "Value", ...) { #' Fill Scale for GEDI Plots +#' @return A \code{ggplot2} continuous fill scale. #' @keywords internal scale_fill_gedi_diverging <- function(limits = NULL, name = "Value", ...) { ggplot2::scale_fill_gradientn( @@ -75,6 +78,7 @@ scale_fill_gedi_diverging <- function(limits = NULL, name = "Value", ...) { #' Discrete Color Palette for GEDI +#' @return A \code{ggplot2} discrete color scale. #' @keywords internal scale_color_gedi_discrete <- function(name = "Group", ...) { # Extended palette with 24 distinct colors @@ -97,6 +101,7 @@ scale_color_gedi_discrete <- function(name = "Group", ...) { #' Compute Color Limits from Data +#' @return A numeric vector of length 2 with lower and upper limits. #' @keywords internal compute_color_limits <- function(values, symmetric = TRUE, quantile = 0.99) { if (symmetric) { diff --git a/R/gedi_plot_wrappers.R b/R/gedi_plot_wrappers.R index 806e0b1..b293618 100644 --- a/R/gedi_plot_wrappers.R +++ b/R/gedi_plot_wrappers.R @@ -4,10 +4,10 @@ #' Get Embedding Coordinates with Smart Caching #' #' @param model GEDI model object -#' @param embedding_type Character: "umap", "pca", or a custom N×2 matrix +#' @param embedding_type Character: "umap", "pca", or a custom Nx2 matrix #' @param dims Integer vector of length 2 for which dimensions to use (default c(1,2)) #' @param verbose Logical, print messages about computation -#' @return N×2 matrix of embedding coordinates +#' @return Nx2 matrix of embedding coordinates #' @keywords internal .get_embedding <- function(model, embedding_type, dims = c(1, 2), verbose = TRUE) { @@ -90,7 +90,7 @@ # Compute projection on-demand for this gene only Z <- model$params$Z - feature_weights <- Z[gene_idx, ] # K × 1 + feature_weights <- Z[gene_idx, ] # K x 1 if (projection == "zdb") { expr_values <- compute_feature_projection( @@ -100,8 +100,8 @@ verbose = 0 ) } else if (projection == "db") { - DB <- model$projections$DB # K × N - expr_values <- as.vector(t(DB) %*% feature_weights) # N × 1 + DB <- model$projections$DB # K x N + expr_values <- as.vector(t(DB) %*% feature_weights) # N x 1 } else { stop("projection must be 'zdb' or 'db'", call. = FALSE) } @@ -120,7 +120,7 @@ #' Model is the first argument, and color_by handles metadata/genes automatically. #' #' @param model GEDI model object (or embedding matrix for backwards compatibility) -#' @param embedding Character ("umap", "pca") or N×2 matrix +#' @param embedding Character ("umap", "pca") or Nx2 matrix #' @param color_by Character: "sample", metadata column, gene name, or NULL #' @param color Vector for manual coloring (overrides color_by) #' @param projection Character: "zdb" or "db" for gene expression projection @@ -216,7 +216,8 @@ plot_embedding <- function(model, p <- p + ggplot2::geom_point(ggplot2::aes(color = Color), size = point_size, alpha = alpha) } - message("Note: Rasterization not implemented yet. Using regular points.") + warning("Rasterization not implemented yet. Using regular points.", + call. = FALSE) } else { if (!use_color) { p <- p + ggplot2::geom_point(size = point_size, alpha = alpha) diff --git a/R/gedi_plots.R b/R/gedi_plots.R index 933c846..91e1da1 100644 --- a/R/gedi_plots.R +++ b/R/gedi_plots.R @@ -1,11 +1,19 @@ # R/gedi_plot_embedding.R +# Suppress R CMD check NOTEs for ggplot2 NSE variables +utils::globalVariables(c( + "Dim1", "Dim2", "Color", "Value", "Feature", "Alpha", + "To1", "To2", "VF_Color", + "Expected_Var", "Observed_Var", "Sample", + "Iteration", "RMSD", "Parameter", "Factor", "Group", "sigma2" +)) + #' Plot 2D Embedding (Base Function) #' #' Base function for plotting 2D embeddings with customizable coloring. #' Supports both continuous and discrete color variables. #' -#' @param embedding Matrix (N × 2) with x and y coordinates for each cell +#' @param embedding Matrix (N x 2) with x and y coordinates for each cell #' @param color Vector of length N for coloring points, or NULL for uniform color #' @param color_limits Numeric vector c(low, high) for color scale limits, #' or NULL to auto-compute from data (uses 99th percentile for symmetric limits) @@ -91,7 +99,8 @@ p <- p + ggplot2::geom_point(ggplot2::aes(color = Color), size = point_size, alpha = alpha) } - message("Note: Rasterization not implemented yet. Using regular points.") + warning("Rasterization not implemented yet. Using regular points.", + call. = FALSE) } else { if (!use_color) { p <- p + ggplot2::geom_point(size = point_size, alpha = alpha) @@ -147,7 +156,7 @@ #' @param model GEDI model object #' @param features Character vector of gene names or integer indices #' @param embedding Character specifying embedding type ("umap", "pca") or -#' a custom N × 2 matrix +#' a custom N x 2 matrix #' @param projection Character, type of projection to compute ("zdb" or "db") #' @param color_limits Character ("global" for shared scale, "individual" for #' per-facet scale) or numeric vector c(low, high) @@ -224,8 +233,8 @@ plot_features <- function(model, # Extract feature weights from Z Z <- model$params$Z - feature_weights <- Z[feature_idx, , drop = FALSE] # F × K - feature_weights <- t(feature_weights) # K × F for C++ + feature_weights <- Z[feature_idx, , drop = FALSE] # F x K + feature_weights <- t(feature_weights) # K x F for C++ # Compute projections using C++ if (projection == "zdb") { @@ -234,36 +243,36 @@ plot_features <- function(model, D = model$params$D, Bi_list = model$params$Bi, verbose = 0 - ) # Returns N × F matrix + ) # Returns N x F matrix } else if (projection == "db") { # For DB: just use factor weights directly - DB <- model$projections$DB # K × N - projections <- t(DB) %*% feature_weights # N × F + DB <- model$projections$DB # K x N + projections <- t(DB) %*% feature_weights # N x F } else if (projection == "adb") { # For ADB: need to check if model has C/H matrices if (model$aux$P == 0) { stop("projection 'adb' requires a model with gene-level prior (C matrix). This model has P=0.", call. = FALSE) } - # Get the full ADB projection: C.rotation * A * D * B (P × N) + # Get the full ADB projection: C.rotation * A * D * B (P x N) # This gives us pathway activities across all cells - ADB <- model$projections$ADB # P × N + ADB <- model$projections$ADB # P x N # We need to project gene features through the pathway space - # feature_weights is K × F (latent factor loadings for selected genes from Z) + # feature_weights is K x F (latent factor loadings for selected genes from Z) # # Mathematical logic: - # 1. Get C.rotation[feature_idx, ] which is J_selected × P + # 1. Get C.rotation[feature_idx, ] which is J_selected x P # This tells us how each selected gene maps to pathways # 2. For each gene, compute its projection as: # sum over pathways: C.rotation[gene, pathway] * ADB[pathway, cells] # Get C.rotation and extract rows for selected genes - C_rotation <- model$.__enclos_env__$private$.aux_static$C.rotation # J × P - C_feature <- C_rotation[feature_idx, , drop = FALSE] # F × P + C_rotation <- model$.__enclos_env__$private$.aux_static$C.rotation # J x P + C_feature <- C_rotation[feature_idx, , drop = FALSE] # F x P - # Project through pathway space: (F × P) %*% (P × N) = F × N - projections <- t(C_feature %*% ADB) # N × F + # Project through pathway space: (F x P) %*% (P x N) = F x N + projections <- t(C_feature %*% ADB) # N x F } else { stop("projection must be 'zdb', 'db', or 'adb'", call. = FALSE) @@ -356,7 +365,7 @@ plot_features <- function(model, #' @param gene2 Character or integer, second gene name or index #' @param comparison Character, type of comparison ("difference" or "correlation") #' @param embedding Character specifying embedding type ("umap", "pca") or -#' a custom N × 2 matrix +#' a custom N x 2 matrix #' @param projection Character, type of projection ("zdb" or "db") #' @param color_limits Numeric vector c(low, high) or NULL for auto-compute #' @param randomize Logical, whether to randomize point order @@ -368,7 +377,7 @@ plot_features <- function(model, #' #' @details #' For comparison = "difference": -#' Computes (Z[gene1,] - Z[gene2,]) * D * B, equivalent to ZDB[gene1,] - ZDB[gene2,]. +#' Computes `(Z[gene1,] - Z[gene2,]) * D * B`, equivalent to `ZDB[gene1,] - ZDB[gene2,]`. #' In log-space, this represents log(gene1/gene2) in the original count space. #' Positive values indicate gene1 > gene2, negative indicates gene2 > gene1. #' @@ -439,7 +448,7 @@ plot_feature_ratio <- function(model, if (comparison == "difference") { # Compute (Z[gene1,] - Z[gene2,]) * D * B Z <- model$params$Z - weight_diff <- Z[gene1_idx, ] - Z[gene2_idx, ] # K × 1 + weight_diff <- Z[gene1_idx, ] - Z[gene2_idx, ] # K x 1 ratio_values <- compute_feature_projection( feature_weights = weight_diff, diff --git a/R/gedi_progress.R b/R/gedi_progress.R index d23b1e9..163e64d 100644 --- a/R/gedi_progress.R +++ b/R/gedi_progress.R @@ -23,7 +23,7 @@ create_progress_bar <- function(total, label = "Progress", verbose = 1) { } if (nchar(label) > 0) { - cat(label, "\n", sep = "") + message(label) } pb <- txtProgressBar(min = 0, max = total, style = 3, width = 50) @@ -87,8 +87,7 @@ close_progress_bar <- function(pb) { #' @noRd log_stage <- function(stage, total, message, verbose = 1) { if (verbose >= 1) { - cat(sprintf(" [%d/%d] %s\n", stage, total, message)) - flush.console() + message(sprintf(" [%d/%d] %s", stage, total, message)) } invisible(NULL) } @@ -109,8 +108,7 @@ log_stage <- function(stage, total, message, verbose = 1) { #' @noRd log_start <- function(operation, verbose = 1) { if (verbose >= 1) { - cat("Computing ", operation, "...\n", sep = "") - flush.console() + message("Computing ", operation, "...") } invisible(NULL) } @@ -133,11 +131,10 @@ log_start <- function(operation, verbose = 1) { log_complete <- function(operation, details = NULL, verbose = 1) { if (verbose >= 1) { if (is.null(details)) { - cat("[DONE] ", operation, " complete\n", sep = "") + message("[DONE] ", operation, " complete") } else { - cat("[DONE] ", operation, " computed: ", details, "\n", sep = "") + message("[DONE] ", operation, " computed: ", details) } - flush.console() } invisible(NULL) } @@ -160,11 +157,10 @@ log_complete <- function(operation, details = NULL, verbose = 1) { log_info <- function(message, verbose = 1, indent = TRUE) { if (verbose >= 1) { if (indent) { - cat(" ", message, "\n", sep = "") + message(" ", message) } else { - cat(message, "\n", sep = "") + message(message) } - flush.console() } invisible(NULL) } @@ -185,8 +181,7 @@ log_info <- function(message, verbose = 1, indent = TRUE) { #' @noRd log_cached <- function(item, verbose = 1) { if (verbose >= 1) { - cat("[CACHED] Using cached ", item, "\n", sep = "") - flush.console() + message("[CACHED] Using cached ", item) } invisible(NULL) } diff --git a/R/gedi_projections.R b/R/gedi_projections.R index 532011a..aa936dc 100644 --- a/R/gedi_projections.R +++ b/R/gedi_projections.R @@ -18,10 +18,10 @@ ProjectionsAccessor <- R6Class( print = function() { cat("\n") cat("\nAvailable projections (lazy-computed):\n") - cat(" $ZDB - Shared manifold projection (J × N)\n") - cat(" $DB - Latent factor embedding (K × N)\n") + cat(" $ZDB - Shared manifold projection (J x N)\n") + cat(" $DB - Latent factor embedding (K x N)\n") if (private$.gedi_self$aux$P > 0) { - cat(" $ADB - Pathway activity projection (P × N)\n") + cat(" $ADB - Pathway activity projection (P x N)\n") } cat("\nAccess with: model$projections$ZDB\n") invisible(self) @@ -65,7 +65,7 @@ ProjectionsAccessor <- R6Class( #' @param private Reference to private environment #' @param force_recompute Logical, if TRUE bypasses cache and recomputes #' -#' @return Dense matrix (J × N) with row/column names set to geneIDs/cellIDs +#' @return Dense matrix (J x N) with row/column names set to geneIDs/cellIDs #' #' @keywords internal #' @noRd @@ -119,7 +119,7 @@ compute_ZDB <- function(self, private, force_recompute = FALSE) { #' @param private Reference to private environment #' @param force_recompute Logical, if TRUE bypasses cache and recomputes #' -#' @return Dense matrix (K × N) with row names "LV1", "LV2", ... and +#' @return Dense matrix (K x N) with row names "LV1", "LV2", ... and #' column names set to cellIDs #' #' @keywords internal @@ -173,7 +173,7 @@ compute_DB <- function(self, private, force_recompute = FALSE) { #' @param private Reference to private environment #' @param force_recompute Logical, if TRUE bypasses cache and recomputes #' -#' @return Dense matrix (P × N) with row names from original C matrix column +#' @return Dense matrix (P x N) with row names from original C matrix column #' names and column names set to cellIDs #' #' @keywords internal @@ -242,7 +242,7 @@ compute_ADB <- function(self, private, force_recompute = FALSE) { #' @param contrast Numeric vector of length L specifying the contrast #' @param include_O Logical, if TRUE adds the global offset effect (diffO) #' -#' @return Dense matrix (J × N) of differential expression values +#' @return Dense matrix (J x N) of differential expression values #' #' @keywords internal #' @noRd diff --git a/R/h5_readers.R b/R/h5_readers.R index dc9a879..0c19750 100644 --- a/R/h5_readers.R +++ b/R/h5_readers.R @@ -48,7 +48,6 @@ #' gedi_obj$setup(Y = expr_matrix, sample_id = samples, K = 10) #' } #' -#' @importFrom hdf5r H5File #' @importFrom Matrix sparseMatrix t Matrix #' @export read_h5ad <- function(file_path, @@ -455,7 +454,6 @@ read_h5ad <- function(file_path, #' gedi_obj$setup(Y = expr_matrix, K = 10) #' } #' -#' @importFrom hdf5r H5File existsGroup #' @importFrom Matrix sparseMatrix #' @export read_h5 <- function(file_path, @@ -696,7 +694,6 @@ read_h5 <- function(file_path, #' list_h5_structure("filtered_feature_bc_matrix.h5") #' } #' -#' @importFrom hdf5r H5File #' @export list_h5_structure <- function(file_path, recursive = TRUE) { @@ -726,14 +723,14 @@ list_h5_structure <- function(file_path, recursive = TRUE) { " Error: ", conditionMessage(e)) }) - cat("\n") - cat("Structure of:", basename(file_path), "\n") - cat(paste(rep("=", nchar(basename(file_path)) + 14), collapse = ""), "\n") - cat("Full path:", file_path, "\n") - cat(paste(rep("-", nchar(file_path) + 11), collapse = ""), "\n\n") + message("") + message("Structure of: ", basename(file_path)) + message(paste(rep("=", nchar(basename(file_path)) + 14), collapse = "")) + message("Full path: ", file_path) + message(paste(rep("-", nchar(file_path) + 11), collapse = ""), "\n") print(structure_info) - cat("\n") + message("") return(invisible(structure_info)) } diff --git a/R/h5_writers.R b/R/h5_writers.R index 986d6de..3946847 100644 --- a/R/h5_writers.R +++ b/R/h5_writers.R @@ -66,7 +66,6 @@ #' # adata.obsm['X_gedi'] # GEDI embeddings #' } #' -#' @importFrom hdf5r H5File h5attr h5attr<- #' @importFrom Matrix t #' @export write_h5ad <- function(model, @@ -116,7 +115,7 @@ write_h5ad <- function(model, if (!is.null(private_env$.M_fingerprint)) { if (verbose) message("[write_h5ad] Validating M matrix identity...") validate_M_identity(M, private_env$.M_fingerprint) - if (verbose) message("[write_h5ad] ✓ M matrix validated successfully") + if (verbose) message("[write_h5ad] M matrix validated successfully") } else { warning("[write_h5ad] No M fingerprint stored in model. Skipping validation.") } @@ -458,7 +457,7 @@ write_h5ad <- function(model, ) # Write attributes for AnnData compatibility - h5attr(mat_group, "shape") <- as.integer(c(nrow(matrix), ncol(matrix))) + hdf5r::h5attr(mat_group, "shape") <- as.integer(c(nrow(matrix), ncol(matrix))) # Write string attributes as scalars for Python compatibility .write_h5ad_scalar_string_attr(mat_group, "encoding-type", "csr_matrix") @@ -536,7 +535,7 @@ write_h5ad <- function(model, ) # Note: encoding attributes removed for AnnData compatibility - h5attr(col_group, "ordered") <- FALSE + hdf5r::h5attr(col_group, "ordered") <- FALSE } else if (is.character(col_data)) { .write_h5ad_string_array(df_group, col_name, col_data) @@ -571,7 +570,7 @@ write_h5ad <- function(model, attr$close() } else { # Empty column-order for dataframes with no columns (just index) - h5attr(df_group, "column-order") <- numeric(0) + hdf5r::h5attr(df_group, "column-order") <- numeric(0) } if (verbose) { diff --git a/R/imputation_accessors.R b/R/imputation_accessors.R index 06d9870..c404571 100644 --- a/R/imputation_accessors.R +++ b/R/imputation_accessors.R @@ -49,7 +49,7 @@ create_imputation_accessor <- function(self, private) { #' @param logScale Logical, if TRUE returns log-scale values (default) #' @param rowCentre Logical, if TRUE removes global gene offset o (default) #' -#' @return Dense matrix (J × N) with imputed expression values +#' @return Dense matrix (J x N) with imputed expression values #' #' @keywords internal #' @noRd @@ -96,9 +96,10 @@ get_imputed_Y <- function(self, private, M = NULL, logScale = TRUE, rowCentre = Y_imputed_list <- vector("list", numSamples) if (private$.verbose == 1) { - cat(sprintf("Computing imputed Y (%d samples)\n", numSamples)) - cat("|", rep(" ", 50), "| 0%\r", sep = "") - flush.console() + message(sprintf("Computing imputed Y (%d samples)", numSamples)) + pb <- txtProgressBar(min = 0, max = numSamples, style = 3, + width = 50, file = stderr()) + on.exit(close(pb), add = TRUE) for (i in 1:numSamples) { # Reconstruct Yi_fitted from parameters (NO M needed!) @@ -112,13 +113,8 @@ get_imputed_Y <- function(self, private, M = NULL, logScale = TRUE, rowCentre = rowCentre = rowCentre ) - # Update progress bar - pct <- round(i / numSamples * 100) - n_filled <- round(50 * i / numSamples) - cat("|", rep("=", n_filled), rep(" ", 50 - n_filled), "| ", pct, "%\r", sep = "") - flush.console() + setTxtProgressBar(pb, i) } - cat("\n") # Final newline } else { # Silent or debug mode - no progress bar @@ -182,7 +178,7 @@ get_imputed_Y <- function(self, private, M = NULL, logScale = TRUE, rowCentre = #' @param private Reference to private environment #' @param M Count matrix (required - must match original) #' -#' @return Dense matrix (J × N) with variance values, or NULL if not applicable +#' @return Dense matrix (J x N) with variance values, or NULL if not applicable #' #' @keywords internal #' @noRd @@ -264,6 +260,8 @@ get_dispersion <- function(self, private, M, subsample = 1e6) { #' @param x Object of class gedi_imputation #' @param ... Additional arguments (ignored) #' +#' @return Invisibly returns \code{x}. +#' @method print gedi_imputation #' @keywords internal #' @export print.gedi_imputation <- function(x, ...) { diff --git a/R/seurat_integration.R b/R/seurat_integration.R index a7c42be..643742d 100644 --- a/R/seurat_integration.R +++ b/R/seurat_integration.R @@ -20,12 +20,13 @@ #' @param subset_samples Character vector, subset to specific samples (default: NULL = all) #' @param K Integer, number of latent factors (default: 10) #' @param mode Character, normalization mode: "Bl2" or "Bsphere" (default: "Bl2") -#' @param C Gene-level prior matrix (genes × pathways) (default: NULL) -#' @param H Sample-level covariate matrix (covariates × samples) (default: NULL) +#' @param C Gene-level prior matrix (genes x pathways) (default: NULL) +#' @param H Sample-level covariate matrix (covariates x samples) (default: NULL) #' @param validate_counts Logical, whether to validate data appears to be counts #' (default: TRUE) #' @param use_variable_features Logical, whether to subset to highly variable features #' (default: TRUE). If TRUE, uses genes from VariableFeatures(seurat_object). +#' @param verbose Logical, whether to print progress messages (default: TRUE) #' @param ... Additional arguments passed to CreateGEDIObject() #' #' @return GEDI R6 object @@ -75,6 +76,7 @@ seurat_to_gedi <- function(seurat_object, H = NULL, validate_counts = TRUE, use_variable_features = TRUE, + verbose = TRUE, ...) { # Check if Seurat is available @@ -108,9 +110,9 @@ seurat_to_gedi <- function(seurat_object, if (length(matching_layers) > 1) { # Multiple split layers found - need to join them - message("Detected ", length(matching_layers), " split layers: ", + if (verbose) message("Detected ", length(matching_layers), " split layers: ", paste(matching_layers, collapse = ", ")) - message("Joining layers with do.call(cbind)...") + if (verbose) message("Joining layers with do.call(cbind)...") # Extract each layer and combine layer_matrices <- lapply(matching_layers, function(layer_name) { @@ -180,7 +182,7 @@ seurat_to_gedi <- function(seurat_object, "Check that VariableFeatures match the gene names in the assay.", call. = FALSE) } - message("Subsetting to ", length(hvg_in_matrix), " highly variable features") + if (verbose) message("Subsetting to ", length(hvg_in_matrix), " highly variable features") M <- M[hvg_in_matrix, , drop = FALSE] } } @@ -228,7 +230,7 @@ seurat_to_gedi <- function(seurat_object, M <- M[, keep_cells] Samples <- Samples[keep_cells] - message("Subset to ", sum(keep_cells), " cells from ", + if (verbose) message("Subset to ", sum(keep_cells), " cells from ", length(subset_samples), " samples") } @@ -242,10 +244,10 @@ seurat_to_gedi <- function(seurat_object, colData[[sample_column]] <- NULL # Create GEDI object - message("Creating GEDI object from Seurat data...") - message(" Genes: ", nrow(M)) - message(" Cells: ", ncol(M)) - message(" Samples: ", length(unique(Samples))) + if (verbose) message("Creating GEDI object from Seurat data...") + if (verbose) message(" Genes: ", nrow(M)) + if (verbose) message(" Cells: ", ncol(M)) + if (verbose) message(" Samples: ", length(unique(Samples))) gedi_model <- CreateGEDIObject( Samples = Samples, @@ -258,7 +260,7 @@ seurat_to_gedi <- function(seurat_object, ... ) - message("GEDI object created successfully!") + if (verbose) message("GEDI object created successfully!") return(gedi_model) } @@ -283,6 +285,7 @@ seurat_to_gedi <- function(seurat_object, #' available (default: TRUE) #' @param min_cells Integer, filter genes with counts in < min_cells (default: 0) #' @param min_features Integer, filter cells with < min_features genes (default: 0) +#' @param verbose Logical, whether to print progress messages (default: TRUE) #' #' @return Seurat object with: #' \itemize{ @@ -290,9 +293,9 @@ seurat_to_gedi <- function(seurat_object, #' \item imputed assay: GEDI imputed expression (if use_imputed = TRUE) #' \item ZDB/DB/ADB assays: GEDI projections (if add_projections = TRUE) #' \itemize{ -#' \item ZDB: Batch-corrected gene expression (genes × cells) -#' \item DB: Cell embeddings in latent factor space (K × cells) -#' \item ADB: Pathway activities (pathways × cells) - only if C matrix provided +#' \item ZDB: Batch-corrected gene expression (genes x cells) +#' \item DB: Cell embeddings in latent factor space (K x cells) +#' \item ADB: Pathway activities (pathways x cells) - only if C matrix provided #' } #' \item umap/pca reductions: Embeddings (if add_embeddings = TRUE and cached) #' \item meta.data: Sample labels and colData from GEDI model @@ -328,7 +331,8 @@ gedi_to_seurat <- function(model, add_projections = TRUE, add_embeddings = TRUE, min_cells = 0, - min_features = 0) { + min_features = 0, + verbose = TRUE) { # Check if Seurat is available if (!requireNamespace("Seurat", quietly = TRUE)) { @@ -341,7 +345,7 @@ gedi_to_seurat <- function(model, stop("Model not trained. Run model$train() first.", call. = FALSE) } - message("Converting GEDI model to Seurat object...") + if (verbose) message("Converting GEDI model to Seurat object...") # Get metadata gene_ids <- model$metadata$geneIDs @@ -350,16 +354,16 @@ gedi_to_seurat <- function(model, # Prepare count matrix if (is.null(M)) { - message(" No original counts provided, using back-transformed imputed values...") + if (verbose) message(" No original counts provided, using back-transformed imputed values...") # Get imputed log-expression and back-transform to counts (approximate) Y_imputed <- model$imputed$Y() M <- Matrix::Matrix(expm1(Y_imputed), sparse = TRUE) } else { - message(" Using provided count matrix...") + if (verbose) message(" Using provided count matrix...") # Ensure M has correct dimensions and ordering if (!identical(dim(M), c(length(gene_ids), length(cell_ids)))) { stop("M dimensions don't match model (expected ", length(gene_ids), - " × ", length(cell_ids), ")", call. = FALSE) + " x ", length(cell_ids), ")", call. = FALSE) } } @@ -368,7 +372,7 @@ gedi_to_seurat <- function(model, colnames(M) <- cell_ids # Create Seurat object - message(" Creating Seurat object...") + if (verbose) message(" Creating Seurat object...") seurat_obj <- Seurat::CreateSeuratObject( counts = M, project = project, @@ -398,7 +402,7 @@ gedi_to_seurat <- function(model, # Add imputed data as separate assay if (use_imputed) { - message(" Adding imputed assay...") + if (verbose) message(" Adding imputed assay...") Y_imputed <- model$imputed$Y() rownames(Y_imputed) <- gene_ids colnames(Y_imputed) <- cell_ids @@ -416,9 +420,9 @@ gedi_to_seurat <- function(model, # Add projections as separate assays if (add_projections) { - message(" Adding projection assays...") + if (verbose) message(" Adding projection assays...") - # ZDB projection (J genes × N cells) + # ZDB projection (J genes x N cells) ZDB <- model$projections$ZDB rownames(ZDB) <- gene_ids colnames(ZDB) <- cell_ids @@ -430,7 +434,7 @@ gedi_to_seurat <- function(model, ) seurat_obj[["ZDB"]] <- zdb_assay - # DB projection (K factors × N cells) + # DB projection (K factors x N cells) DB <- model$projections$DB K <- nrow(DB) rownames(DB) <- paste0("LV", 1:K) @@ -442,7 +446,7 @@ gedi_to_seurat <- function(model, ) seurat_obj[["DB"]] <- db_assay - # ADB projection (P pathways × N cells) - only if model has C matrix + # ADB projection (P pathways x N cells) - only if model has C matrix if (model$aux$P > 0) { ADB <- model$projections$ADB P <- nrow(ADB) @@ -463,7 +467,7 @@ gedi_to_seurat <- function(model, # Add UMAP if cached if (cache_status["umap"]) { - message(" Adding UMAP embedding...") + if (verbose) message(" Adding UMAP embedding...") umap_coords <- model$embeddings$umap common_cells <- intersect(cell_ids, colnames(seurat_obj)) umap_coords <- umap_coords[match(common_cells, cell_ids), , drop = FALSE] @@ -479,7 +483,7 @@ gedi_to_seurat <- function(model, # Add PCA if cached if (cache_status["pca"]) { - message(" Adding PCA embedding...") + if (verbose) message(" Adding PCA embedding...") pca_coords <- model$embeddings$pca common_cells <- intersect(cell_ids, colnames(seurat_obj)) pca_coords <- pca_coords[match(common_cells, cell_ids), , drop = FALSE] @@ -494,9 +498,9 @@ gedi_to_seurat <- function(model, } } - message("Seurat object created successfully!") - message(" Assays: ", paste(names(seurat_obj@assays), collapse = ", ")) - if (length(seurat_obj@reductions) > 0) { + if (verbose) message("Seurat object created successfully!") + if (verbose) message(" Assays: ", paste(names(seurat_obj@assays), collapse = ", ")) + if (verbose && length(seurat_obj@reductions) > 0) { message(" Reductions: ", paste(names(seurat_obj@reductions), collapse = ", ")) } diff --git a/R/zzz.R b/R/zzz.R index 0a2b805..7501a8f 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,4 +1,4 @@ -#' @importFrom utils packageVersion installed.packages +#' @importFrom utils packageVersion .onAttach <- function(libname, pkgname) { # Get package version from DESCRIPTION @@ -63,6 +63,7 @@ #' Options: "h5" (hdf5r), "umap" (uwot), "validation" (digest), or "all" (default). #' @param repos Character vector. The CRAN repository to use for installation. #' Default is the user's configured repository. +#' @param verbose Logical, whether to print progress messages (default: TRUE) #' #' @return NULL (invisibly) #' @@ -79,7 +80,8 @@ #' } #' #' @export -install_optional_dependencies <- function(which = "all", repos = getOption("repos")) { +install_optional_dependencies <- function(which = "all", repos = getOption("repos"), + verbose = TRUE) { # Define package groups pkg_groups <- list( @@ -101,27 +103,31 @@ install_optional_dependencies <- function(which = "all", repos = getOption("repo packages <- unlist(pkg_groups[which], use.names = FALSE) } - message("Checking optional GEDI dependencies...") + if (verbose) message("Checking optional GEDI dependencies...") # Check which packages are not installed - installed <- packages %in% rownames(utils::installed.packages()) + installed <- vapply(packages, function(pkg) { + requireNamespace(pkg, quietly = TRUE) + }, logical(1)) to_install <- packages[!installed] if (length(to_install) > 0) { - message("Installing: ", paste(to_install, collapse = ", ")) + if (verbose) message("Installing: ", paste(to_install, collapse = ", ")) utils::install.packages(to_install, repos = repos) - message("\n=== Installation complete! ===") + if (verbose) message("\n=== Installation complete! ===") # Verify installation - newly_installed <- to_install %in% rownames(utils::installed.packages()) + newly_installed <- vapply(to_install, function(pkg) { + requireNamespace(pkg, quietly = TRUE) + }, logical(1)) if (all(newly_installed)) { - message("All packages successfully installed.") + if (verbose) message("All packages successfully installed.") } else { failed <- to_install[!newly_installed] warning("Failed to install: ", paste(failed, collapse = ", "), call. = FALSE) } } else { - message("All requested packages are already installed.") + if (verbose) message("All requested packages are already installed.") } invisible(NULL) diff --git a/README.md b/README.md index 4fe52fd..90fceaf 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,10 @@ # GEDI-2 -[![R-CMD-check](https://github.com/Arshammik/gedi/workflows/R-CMD-check/badge.svg)](https://github.com/csglab/gedi2/actions) -[![test-coverage](https://github.com/Arshammik/gedi/workflows/test-coverage/badge.svg)](https://github.com/csglab/gedi2/actions) -[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) +[![R-CMD-check](https://github.com/csglab/gedi2/workflows/R-CMD-check/badge.svg)](https://github.com/csglab/gedi2/actions) +[![test-coverage](https://github.com/csglab/gedi2/workflows/test-coverage/badge.svg)](https://github.com/csglab/gedi2/actions) +[![CRAN status](https://www.r-pkg.org/badges/version/gedi)](https://CRAN.R-project.org/package=gedi) +[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/license/MIT) [![Documentation](https://img.shields.io/badge/Docs-Learn%20More-blue.svg)](https://github.com/csglab/gedi2/wiki) A high-performance, memory-efficient R package for integrating gene expression data from single-cell RNA sequencing experiments. GEDI 2.0 implements a unified generative model for interpretable latent embedding of multi-sample, multi-condition single-cell data. @@ -44,7 +45,7 @@ Contributions are welcome! Please: 3. Submit a pull request ### Reporting Issues -Report bugs and request features at: [https://github.com/caglab/gedi2/issues](https://github.com/csglab/gedi2/issues) +Report bugs and request features at: [https://github.com/csglab/gedi2/issues](https://github.com/csglab/gedi2/issues) ## License This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. diff --git a/cran-comments.md b/cran-comments.md new file mode 100644 index 0000000..ed7f1fb --- /dev/null +++ b/cran-comments.md @@ -0,0 +1,25 @@ +## R CMD check results + +0 errors | 0 warnings | 1 note + +## Test environments + +* Local Linux (AlmaLinux 9.6, R 4.4.0) +* GitHub Actions (ubuntu-latest, macOS-latest, windows-latest; R-release, R-oldrel) + +## Notes + +This is the first CRAN submission of the `gedi` package. + +### Conflicting package name + +The incoming feasibility check flags a conflicting name with Bioconductor's +`GeDi` package. Our package `gedi` (Gene Expression Decomposition and +Integration) is unrelated to `GeDi` (Gene Distance based on Enrichment and +Depletion for Inference). The names differ in capitalisation and the packages +serve entirely different purposes. + +### C++14 specification + +The package requires C++14 for the RcppEigen linear algebra backend. +OpenMP is optional and detected at compile time via Makevars. diff --git a/man/CreateGEDIObject.Rd b/man/CreateGEDIObject.Rd index 9d38725..b8af140 100644 --- a/man/CreateGEDIObject.Rd +++ b/man/CreateGEDIObject.Rd @@ -12,8 +12,8 @@ CreateGEDIObject( colData = NULL, C = NULL, H = NULL, - K = 10, - mode = "Bl2", + K = 40, + mode = "Bsphere", adjustD = TRUE, orthoZ = TRUE, Z_shrinkage = 1, @@ -27,7 +27,7 @@ CreateGEDIObject( rsvd_p = 10, rsvd_sdist = "normal", verbose = 1, - num_threads = 0 + num_threads = 1 ) } \arguments{ @@ -42,9 +42,9 @@ C++ will compute Yi = log(M+1) internally - no Yi copy stored in R!} \item{colData}{Optional data.frame with cell metadata} -\item{C}{Gene-level prior matrix (genes × pathways)} +\item{C}{Gene-level prior matrix (genes x pathways)} -\item{H}{Sample-level covariate matrix (covariates × samples)} +\item{H}{Sample-level covariate matrix (covariates x samples)} \item{K}{Number of latent factors (default: 10)} diff --git a/man/compute_color_limits.Rd b/man/compute_color_limits.Rd index c2185bf..5948b4e 100644 --- a/man/compute_color_limits.Rd +++ b/man/compute_color_limits.Rd @@ -6,6 +6,9 @@ \usage{ compute_color_limits(values, symmetric = TRUE, quantile = 0.99) } +\value{ +A numeric vector of length 2 with lower and upper limits. +} \description{ Compute Color Limits from Data } diff --git a/man/dot-get_embedding.Rd b/man/dot-get_embedding.Rd index be31573..a9e37a0 100644 --- a/man/dot-get_embedding.Rd +++ b/man/dot-get_embedding.Rd @@ -9,14 +9,14 @@ \arguments{ \item{model}{GEDI model object} -\item{embedding_type}{Character: "umap", "pca", or a custom N×2 matrix} +\item{embedding_type}{Character: "umap", "pca", or a custom Nx2 matrix} \item{dims}{Integer vector of length 2 for which dimensions to use (default c(1,2))} \item{verbose}{Logical, print messages about computation} } \value{ -N×2 matrix of embedding coordinates +Nx2 matrix of embedding coordinates } \description{ Get Embedding Coordinates with Smart Caching diff --git a/man/dot-plot_embedding_base.Rd b/man/dot-plot_embedding_base.Rd index b182b1a..2a0cd93 100644 --- a/man/dot-plot_embedding_base.Rd +++ b/man/dot-plot_embedding_base.Rd @@ -20,7 +20,7 @@ ) } \arguments{ -\item{embedding}{Matrix (N × 2) with x and y coordinates for each cell} +\item{embedding}{Matrix (N x 2) with x and y coordinates for each cell} \item{color}{Vector of length N for coloring points, or NULL for uniform color} diff --git a/man/gedi-package.Rd b/man/gedi-package.Rd index 7983735..8f576a6 100644 --- a/man/gedi-package.Rd +++ b/man/gedi-package.Rd @@ -4,9 +4,9 @@ \name{gedi-package} \alias{gedi} \alias{gedi-package} -\title{gedi: Gene Expression Data Integration} +\title{gedi: Gene Expression Decomposition and Integration} \description{ -A memory-efficient implementation for integrating gene expression data from single-cell RNA sequencing experiments. GEDI v2 uses a C++ backend with thin R wrappers to enable analysis of large-scale single-cell datasets. The package supports multiple data modalities including count matrices, paired data (Splicing, Velocyto, CITE-seq), and binary indicators. It implements a latent variable model with block coordinate descent optimization for dimensionality reduction and batch effect correction. +A memory-efficient implementation for integrating gene expression data from single-cell RNA sequencing experiments. Uses a C++ backend with thin R wrappers to enable analysis of large-scale single-cell datasets. The package supports multiple data modalities including count matrices, paired data (splicing, RNA velocity, CITE-seq), and binary indicators. It implements a latent variable model with block coordinate descent optimization for dimensionality reduction and batch effect correction. A memory-efficient implementation for integrating gene expression data from single-cell RNA sequencing experiments. GEDI v2 uses a high-performance C++ @@ -90,14 +90,13 @@ Add your publication reference here when available. \seealso{ Useful links: \itemize{ - \item \url{https://github.com/Arshammik/gedi} - \item Report bugs at \url{https://github.com/Arshammik/gedi/issues} + \item \url{https://github.com/csglab/gedi2} + \item Report bugs at \url{https://github.com/csglab/gedi2/issues} } \itemize{ \item \code{\link{CreateGEDIObject}}: Create a GEDI model -\item \code{\link{GEDI}}: R6 class documentation } } \author{ diff --git a/man/gedi_to_seurat.Rd b/man/gedi_to_seurat.Rd index 4238c6b..48f8453 100644 --- a/man/gedi_to_seurat.Rd +++ b/man/gedi_to_seurat.Rd @@ -13,7 +13,8 @@ gedi_to_seurat( add_projections = TRUE, add_embeddings = TRUE, min_cells = 0, - min_features = 0 + min_features = 0, + verbose = TRUE ) } \arguments{ @@ -38,13 +39,20 @@ available (default: TRUE)} \item{min_cells}{Integer, filter genes with counts in < min_cells (default: 0)} \item{min_features}{Integer, filter cells with < min_features genes (default: 0)} + +\item{verbose}{Logical, whether to print progress messages (default: TRUE)} } \value{ Seurat object with: \itemize{ \item RNA assay: Original or back-transformed counts \item imputed assay: GEDI imputed expression (if use_imputed = TRUE) -\item ZDB/DB assays: GEDI projections (if add_projections = TRUE) +\item ZDB/DB/ADB assays: GEDI projections (if add_projections = TRUE) +\itemize{ +\item ZDB: Batch-corrected gene expression (genes x cells) +\item DB: Cell embeddings in latent factor space (K x cells) +\item ADB: Pathway activities (pathways x cells) - only if C matrix provided +} \item umap/pca reductions: Embeddings (if add_embeddings = TRUE and cached) \item meta.data: Sample labels and colData from GEDI model } diff --git a/man/install_optional_dependencies.Rd b/man/install_optional_dependencies.Rd index 8490bd9..41b3e6e 100644 --- a/man/install_optional_dependencies.Rd +++ b/man/install_optional_dependencies.Rd @@ -4,7 +4,11 @@ \alias{install_optional_dependencies} \title{Install Optional GEDI Dependencies} \usage{ -install_optional_dependencies(which = "all", repos = getOption("repos")) +install_optional_dependencies( + which = "all", + repos = getOption("repos"), + verbose = TRUE +) } \arguments{ \item{which}{Character vector specifying which dependencies to install. @@ -12,6 +16,8 @@ Options: "h5" (hdf5r), "umap" (uwot), "validation" (digest), or "all" (default). \item{repos}{Character vector. The CRAN repository to use for installation. Default is the user's configured repository.} + +\item{verbose}{Logical, whether to print progress messages (default: TRUE)} } \value{ NULL (invisibly) diff --git a/man/plot_embedding.Rd b/man/plot_embedding.Rd index dd81635..77197af 100644 --- a/man/plot_embedding.Rd +++ b/man/plot_embedding.Rd @@ -26,7 +26,7 @@ plot_embedding( \arguments{ \item{model}{GEDI model object (or embedding matrix for backwards compatibility)} -\item{embedding}{Character ("umap", "pca") or N×2 matrix} +\item{embedding}{Character ("umap", "pca") or Nx2 matrix} \item{color_by}{Character: "sample", metadata column, gene name, or NULL} diff --git a/man/plot_feature_ratio.Rd b/man/plot_feature_ratio.Rd index 57db217..bbace9b 100644 --- a/man/plot_feature_ratio.Rd +++ b/man/plot_feature_ratio.Rd @@ -28,7 +28,7 @@ plot_feature_ratio( \item{comparison}{Character, type of comparison ("difference" or "correlation")} \item{embedding}{Character specifying embedding type ("umap", "pca") or -a custom N × 2 matrix} +a custom N x 2 matrix} \item{projection}{Character, type of projection ("zdb" or "db")} @@ -51,7 +51,7 @@ the projected space. Mathematically grounded for GEDI's log-space representation } \details{ For comparison = "difference": -Computes (Z\link{gene1,} - Z\link{gene2,}) * D * B, equivalent to ZDB\link{gene1,} - ZDB\link{gene2,}. +Computes \code{(Z[gene1,] - Z[gene2,]) * D * B}, equivalent to \code{ZDB[gene1,] - ZDB[gene2,]}. In log-space, this represents log(gene1/gene2) in the original count space. Positive values indicate gene1 > gene2, negative indicates gene2 > gene1. } diff --git a/man/plot_features.Rd b/man/plot_features.Rd index e2db274..cbf76fd 100644 --- a/man/plot_features.Rd +++ b/man/plot_features.Rd @@ -23,7 +23,7 @@ plot_features( \item{features}{Character vector of gene names or integer indices} \item{embedding}{Character specifying embedding type ("umap", "pca") or -a custom N × 2 matrix} +a custom N x 2 matrix} \item{projection}{Character, type of projection to compute ("zdb" or "db")} diff --git a/man/print.gedi_dynamics.Rd b/man/print.gedi_dynamics.Rd index d576f27..485a818 100644 --- a/man/print.gedi_dynamics.Rd +++ b/man/print.gedi_dynamics.Rd @@ -4,13 +4,16 @@ \alias{print.gedi_dynamics} \title{Print Method for Dynamics Accessor} \usage{ -print.gedi_dynamics(x, ...) +\method{print}{gedi_dynamics}(x, ...) } \arguments{ \item{x}{Object of class gedi_dynamics} \item{...}{Additional arguments (ignored)} } +\value{ +Invisibly returns \code{x}. +} \description{ Print Method for Dynamics Accessor } diff --git a/man/print.gedi_dynamics_svd.Rd b/man/print.gedi_dynamics_svd.Rd index 301618c..27a86ea 100644 --- a/man/print.gedi_dynamics_svd.Rd +++ b/man/print.gedi_dynamics_svd.Rd @@ -4,13 +4,16 @@ \alias{print.gedi_dynamics_svd} \title{Print Method for Dynamics SVD Results} \usage{ -print.gedi_dynamics_svd(x, ...) +\method{print}{gedi_dynamics_svd}(x, ...) } \arguments{ \item{x}{Object of class gedi_dynamics_svd} \item{...}{Additional arguments (ignored)} } +\value{ +Invisibly returns \code{x}. +} \description{ Print Method for Dynamics SVD Results } diff --git a/man/print.gedi_imputation.Rd b/man/print.gedi_imputation.Rd index d9a8f1b..b790769 100644 --- a/man/print.gedi_imputation.Rd +++ b/man/print.gedi_imputation.Rd @@ -4,13 +4,16 @@ \alias{print.gedi_imputation} \title{Print Method for Imputation Accessor} \usage{ -print.gedi_imputation(x, ...) +\method{print}{gedi_imputation}(x, ...) } \arguments{ \item{x}{Object of class gedi_imputation} \item{...}{Additional arguments (ignored)} } +\value{ +Invisibly returns \code{x}. +} \description{ Print Method for Imputation Accessor } diff --git a/man/print.gedi_pathway_associations.Rd b/man/print.gedi_pathway_associations.Rd index 8d9824d..544f021 100644 --- a/man/print.gedi_pathway_associations.Rd +++ b/man/print.gedi_pathway_associations.Rd @@ -4,13 +4,16 @@ \alias{print.gedi_pathway_associations} \title{Print Method for Pathway Associations Accessor} \usage{ -print.gedi_pathway_associations(x, ...) +\method{print}{gedi_pathway_associations}(x, ...) } \arguments{ \item{x}{Object of class gedi_pathway_associations} \item{...}{Additional arguments (ignored)} } +\value{ +Invisibly returns \code{x}. +} \description{ Print Method for Pathway Associations Accessor } diff --git a/man/scale_color_gedi_discrete.Rd b/man/scale_color_gedi_discrete.Rd index 5a85166..6cb4114 100644 --- a/man/scale_color_gedi_discrete.Rd +++ b/man/scale_color_gedi_discrete.Rd @@ -6,6 +6,9 @@ \usage{ scale_color_gedi_discrete(name = "Group", ...) } +\value{ +A \code{ggplot2} discrete color scale. +} \description{ Discrete Color Palette for GEDI } diff --git a/man/scale_color_gedi_diverging.Rd b/man/scale_color_gedi_diverging.Rd index 0b32271..ae1c61c 100644 --- a/man/scale_color_gedi_diverging.Rd +++ b/man/scale_color_gedi_diverging.Rd @@ -6,6 +6,9 @@ \usage{ scale_color_gedi_diverging(limits = NULL, name = "Value", ...) } +\value{ +A \code{ggplot2} continuous color scale. +} \description{ Diverging Color Scale for GEDI Plots } diff --git a/man/scale_fill_gedi_diverging.Rd b/man/scale_fill_gedi_diverging.Rd index 9f960ae..572a125 100644 --- a/man/scale_fill_gedi_diverging.Rd +++ b/man/scale_fill_gedi_diverging.Rd @@ -6,6 +6,9 @@ \usage{ scale_fill_gedi_diverging(limits = NULL, name = "Value", ...) } +\value{ +A \code{ggplot2} continuous fill scale. +} \description{ Fill Scale for GEDI Plots } diff --git a/man/seurat_to_gedi.Rd b/man/seurat_to_gedi.Rd index 2e90317..3ff2688 100644 --- a/man/seurat_to_gedi.Rd +++ b/man/seurat_to_gedi.Rd @@ -16,6 +16,7 @@ seurat_to_gedi( H = NULL, validate_counts = TRUE, use_variable_features = TRUE, + verbose = TRUE, ... ) } @@ -37,9 +38,9 @@ this will automatically detect and combine all matching layers.} \item{mode}{Character, normalization mode: "Bl2" or "Bsphere" (default: "Bl2")} -\item{C}{Gene-level prior matrix (genes × pathways) (default: NULL)} +\item{C}{Gene-level prior matrix (genes x pathways) (default: NULL)} -\item{H}{Sample-level covariate matrix (covariates × samples) (default: NULL)} +\item{H}{Sample-level covariate matrix (covariates x samples) (default: NULL)} \item{validate_counts}{Logical, whether to validate data appears to be counts (default: TRUE)} @@ -47,6 +48,8 @@ this will automatically detect and combine all matching layers.} \item{use_variable_features}{Logical, whether to subset to highly variable features (default: TRUE). If TRUE, uses genes from VariableFeatures(seurat_object).} +\item{verbose}{Logical, whether to print progress messages (default: TRUE)} + \item{...}{Additional arguments passed to CreateGEDIObject()} } \value{ diff --git a/man/theme_gedi.Rd b/man/theme_gedi.Rd index 24c25cb..8e4f11c 100644 --- a/man/theme_gedi.Rd +++ b/man/theme_gedi.Rd @@ -6,6 +6,9 @@ \usage{ theme_gedi(base_size = 11) } +\value{ +A \code{ggplot2} theme object. +} \description{ GEDI Plot Theme } diff --git a/src/Makevars b/src/Makevars index ad32432..326364b 100644 --- a/src/Makevars +++ b/src/Makevars @@ -7,12 +7,8 @@ CXX_STD = CXX14 # Compiler flags # - Disable Eigen's deprecated warnings -# - Disable unused-parameter warnings (common in Rcpp callback functions) -# - Disable ignored-attributes warnings (common with Eigen + OpenMP) PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) \ - -DEIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS \ - -Wno-unused-parameter \ - -Wno-ignored-attributes + -DEIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS # Linker flags for OpenMP and BLAS/LAPACK PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) diff --git a/src/Makevars.win b/src/Makevars.win index 0a6ed02..56b2772 100644 --- a/src/Makevars.win +++ b/src/Makevars.win @@ -7,12 +7,8 @@ CXX_STD = CXX14 # Compiler flags for Windows # - Disable Eigen's deprecated warnings -# - Disable unused-parameter warnings (common in Rcpp callback functions) -# - Disable ignored-attributes warnings (common with Eigen + OpenMP) PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) \ - -DEIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS \ - -Wno-unused-parameter \ - -Wno-ignored-attributes + -DEIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS # Linker flags for OpenMP and BLAS/LAPACK on Windows PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) diff --git a/src/gedi_core.cpp b/src/gedi_core.cpp index 4da3fb5..440f154 100644 --- a/src/gedi_core.cpp +++ b/src/gedi_core.cpp @@ -98,7 +98,7 @@ class GEDI { MatrixXd params_Ro; double params_sigma2; - std::vector aux_DBi; // K × Ni per sample (memory optimized) + std::vector aux_DBi; // K x Ni per sample (memory optimized) std::vector aux_Qi_hat; std::vector aux_oi_hat; MatrixXd aux_C; @@ -239,7 +239,7 @@ class GEDI { params_Ro = MatrixXd(0, 0); } - // Initialize DBi from R's initial values (K × Ni per sample) + // Initialize DBi from R's initial values (K x Ni per sample) List DBi_list = aux["DBi"]; aux_DBi.resize(numSamples); for (int i = 0; i < numSamples; ++i) { diff --git a/src/gedi_differential.cpp b/src/gedi_differential.cpp index 51c2944..c04447e 100644 --- a/src/gedi_differential.cpp +++ b/src/gedi_differential.cpp @@ -22,8 +22,8 @@ using namespace Eigen; * offsets: diffO = Ro * H.rotation * contrast. * This is the C++ implementation of the old getDiffO.gedi() * - * @param Ro Matrix (J × L) representing effect of sample variables on gene offsets - * @param H_rotation Rotation matrix (L × num_covariates) + * @param Ro Matrix (J x L) representing effect of sample variables on gene offsets + * @param H_rotation Rotation matrix (L x num_covariates) * @param contrast Vector of length L specifying the contrast * @param verbose Integer verbosity level * @@ -79,16 +79,16 @@ Eigen::VectorXd getDiffO_cpp( /** * Compute Differential Q in Z-space (Internal C++) * - * Computes sample-variable effects on Qi, returning a J × K matrix. + * Computes sample-variable effects on Qi, returning a J x K matrix. * This is the C++ implementation of the old getDiffQ.gedi() * - * @param Rk_list List of K matrices (each J × L), representing the effect of + * @param Rk_list List of K matrices (each J x L), representing the effect of * sample-level variables on each latent factor k - * @param H_rotation Rotation matrix (L × num_covariates) + * @param H_rotation Rotation matrix (L x num_covariates) * @param contrast Vector of length L specifying the contrast * @param verbose Integer verbosity level * - * @return Dense matrix diffQ of dimensions J × K representing the predicted + * @return Dense matrix diffQ of dimensions J x K representing the predicted * differential effect in Z-space. * * @keywords internal @@ -123,11 +123,11 @@ Eigen::MatrixXd getDiffQ_cpp( } if (verbose >= 1) { - Rcout << "Computing diffQ (Z-space): " << J << " genes × " << K << " factors" << std::endl; + Rcout << "Computing diffQ (Z-space): " << J << " genes x " << K << " factors" << std::endl; } // Pre-compute H.rotation * contrast - VectorXd H_contrast = H_rotation * contrast; // L × 1 + VectorXd H_contrast = H_rotation * contrast; // L x 1 // Allocate result matrix MatrixXd diffQ_Z_space(J, K); @@ -137,19 +137,19 @@ Eigen::MatrixXd getDiffQ_cpp( // Loop for k = 2 to K for (int k = 1; k < K; ++k) { - Eigen::MatrixXd Rk = as(Rk_list[k]); // J × L + Eigen::MatrixXd Rk = as(Rk_list[k]); // J x L // Validate dimensions for subsequent Rk if (Rk.rows() != J || Rk.cols() != L) { stop("Dimension mismatch: all Rk matrices must have the same JxL dimensions"); } - // Compute effect for this factor: Rk * H_contrast (J × 1) + // Compute effect for this factor: Rk * H_contrast (J x 1) diffQ_Z_space.col(k) = Rk * H_contrast; } if (verbose >= 1) { - Rcout << "✓ diffQ (Z-space) computed" << std::endl; + Rcout << "[OK] diffQ (Z-space) computed" << std::endl; } return diffQ_Z_space; @@ -162,14 +162,14 @@ Eigen::MatrixXd getDiffQ_cpp( * Computes the cell-specific differential expression effect (J x N). * This is the C++ implementation of the old getDiffExp.gedi() * - * @param Rk_list List of K matrices (each J × L) - * @param H_rotation Rotation matrix (L × num_covariates) + * @param Rk_list List of K matrices (each J x L) + * @param H_rotation Rotation matrix (L x num_covariates) * @param contrast Vector of length L specifying the contrast * @param D Scaling vector (length K) - * @param Bi_list List of sample-specific cell projection matrices (K × Ni each) + * @param Bi_list List of sample-specific cell projection matrices (K x Ni each) * @param verbose Integer verbosity level * - * @return Dense matrix diffExp of dimensions J × N representing the predicted + * @return Dense matrix diffExp of dimensions J x N representing the predicted * differential expression effect for each gene in each cell. * * @keywords internal @@ -221,7 +221,7 @@ Eigen::MatrixXd getDiffExp_cpp( } if (verbose >= 1) { - Rcout << "Computing diffExp (Cell-space): " << J << " genes × " << N << " cells" << std::endl; + Rcout << "Computing diffExp (Cell-space): " << J << " genes x " << N << " cells" << std::endl; if (verbose >= 2) { Rcout << " Contrast dimension: " << L << ", Factors: " << K << std::endl; } @@ -239,12 +239,12 @@ Eigen::MatrixXd getDiffExp_cpp( col_offset += Ni_current; } - MatrixXd DB = D.asDiagonal() * B; // K × N + MatrixXd DB = D.asDiagonal() * B; // K x N // === Compute: diffExp = sum_k (Rk * H.rotation * contrast) * DB[k, :] === if (verbose >= 2) Rcout << " Computing sum of outer products..." << std::endl; - // Pre-compute H.rotation * contrast (L × 1 vector) + // Pre-compute H.rotation * contrast (L x 1 vector) VectorXd H_contrast = H_rotation * contrast; MatrixXd diffExp = MatrixXd::Zero(J, N); @@ -259,19 +259,19 @@ Eigen::MatrixXd getDiffExp_cpp( Rcout << " Factor " << (k + 1) << "/" << K << std::endl; } - Eigen::MatrixXd Rk = as(Rk_list[k]); // J × L + Eigen::MatrixXd Rk = as(Rk_list[k]); // J x L - // Compute effect for this factor: Rk * H_contrast (J × 1) + // Compute effect for this factor: Rk * H_contrast (J x 1) VectorXd effect_k = Rk * H_contrast; - // Add outer product: effect_k (J×1) with DB.row(k) (1×N) + // Add outer product: effect_k (Jx1) with DB.row(k) (1xN) diffExp += effect_k * DB.row(k); } if (verbose >= 1) { double mean_val = diffExp.mean(); double std_val = std::sqrt((diffExp.array() - mean_val).square().mean()); - Rcout << "✓ diffExp computed: mean = " << mean_val + Rcout << "[OK] diffExp computed: mean = " << mean_val << ", std = " << std_val << std::endl; } diff --git a/src/gedi_dimred.cpp b/src/gedi_dimred.cpp index 4833b3a..bb3e0f9 100644 --- a/src/gedi_dimred.cpp +++ b/src/gedi_dimred.cpp @@ -8,20 +8,20 @@ using namespace Eigen; /** * Compute Factorized SVD (Internal C++) * - * Performs factorized SVD preserving GEDI structure: SVD(Z) × SVD(middle) × SVD(DB). + * Performs factorized SVD preserving GEDI structure: SVD(Z) x SVD(middle) x SVD(DB). * This function computes DB internally from the model parameters (D, Bi_list). * This is used for the standard, cached SVD/PCA embeddings. * - * @param Z Shared metagene matrix (J × K) + * @param Z Shared metagene matrix (J x K) * @param D Scaling vector (length K) - * @param Bi_list List of sample-specific cell projection matrices (K × Ni each) + * @param Bi_list List of sample-specific cell projection matrices (K x Ni each) * @param verbose Integer verbosity level (0 = silent, 1 = progress, 2 = detailed) * * @return List with three components: * \itemize{ * \item d: Singular values (length K vector) - * \item u: Left singular vectors (J × K matrix, genes × factors) - * \item v: Right singular vectors (N × K matrix, cells × factors) + * \item u: Left singular vectors (J x K matrix, genes x factors) + * \item v: Right singular vectors (N x K matrix, cells x factors) * } * * @keywords internal @@ -71,9 +71,9 @@ Rcpp::List compute_svd_factorized_cpp( // ======================================================================== JacobiSVD svd_Z(Z, ComputeThinU | ComputeThinV); - MatrixXd U_Z = svd_Z.matrixU(); // J × K + MatrixXd U_Z = svd_Z.matrixU(); // J x K VectorXd S_Z = svd_Z.singularValues(); // K - MatrixXd V_Z = svd_Z.matrixV(); // K × K + MatrixXd V_Z = svd_Z.matrixV(); // K x K // ======================================================================== // Step 2: Concatenate B and compute DB @@ -90,16 +90,16 @@ Rcpp::List compute_svd_factorized_cpp( } // Apply diagonal scaling: DB = diag(D) * B - MatrixXd DB = D.asDiagonal() * B; // K × N + MatrixXd DB = D.asDiagonal() * B; // K x N // ======================================================================== // Step 3: SVD of DB // ======================================================================== JacobiSVD svd_DB(DB, ComputeThinU | ComputeThinV); - MatrixXd U_DB = svd_DB.matrixU(); // K × K + MatrixXd U_DB = svd_DB.matrixU(); // K x K VectorXd S_DB = svd_DB.singularValues(); // K - MatrixXd V_DB = svd_DB.matrixV(); // N × K + MatrixXd V_DB = svd_DB.matrixV(); // N x K // ======================================================================== // Step 4: Form and decompose middle matrix @@ -113,16 +113,16 @@ Rcpp::List compute_svd_factorized_cpp( // ======================================================================== JacobiSVD svd_middle(middle, ComputeThinU | ComputeThinV); - MatrixXd U_middle = svd_middle.matrixU(); // K × K + MatrixXd U_middle = svd_middle.matrixU(); // K x K VectorXd S_middle = svd_middle.singularValues(); // K - MatrixXd V_middle = svd_middle.matrixV(); // K × K + MatrixXd V_middle = svd_middle.matrixV(); // K x K // ======================================================================== // Reconstruct final SVD and return // ======================================================================== - MatrixXd u = U_Z * U_middle; // J × K - MatrixXd v = V_DB * V_middle; // N × K + MatrixXd u = U_Z * U_middle; // J x K + MatrixXd v = V_DB * V_middle; // N x K VectorXd d = S_middle; // K return List::create( @@ -136,21 +136,21 @@ Rcpp::List compute_svd_factorized_cpp( /** * Run Factorized SVD on a Custom Projection (Internal C++) * - * Performs the GEDI 3-stage factorized SVD: SVD(Z) × SVD(middle) × SVD(projDB). + * Performs the GEDI 3-stage factorized SVD: SVD(Z) x SVD(middle) x SVD(projDB). * This general-purpose version takes a *pre-computed* projDB matrix as input. * This is used by all dynamics functions (vector field, gradient) to analyze * hypothetical or transition states. * - * @param Z Shared metagene matrix (J × K) - * @param projDB Custom projected cell matrix (K × N_total), where N_total + * @param Z Shared metagene matrix (J x K) + * @param projDB Custom projected cell matrix (K x N_total), where N_total * could be N, 2N, 3N, etc. depending on the dynamic analysis. * @param verbose Integer verbosity level (0 = silent, 1 = progress, 2 = detailed) * * @return List with three components: * \itemize{ * \item d: Singular values (length K vector) - * \item u: Left singular vectors (J × K matrix, genes × factors) - * \item v: Right singular vectors (N_total × K matrix, custom_cells × factors) + * \item u: Left singular vectors (J x K matrix, genes x factors) + * \item v: Right singular vectors (N_total x K matrix, custom_cells x factors) * } * * @keywords internal @@ -181,18 +181,18 @@ Rcpp::List run_factorized_svd_cpp( // ======================================================================== JacobiSVD svd_Z(Z, ComputeThinU | ComputeThinV); - MatrixXd U_Z = svd_Z.matrixU(); // J × K + MatrixXd U_Z = svd_Z.matrixU(); // J x K VectorXd S_Z = svd_Z.singularValues(); // K - MatrixXd V_Z = svd_Z.matrixV(); // K × K + MatrixXd V_Z = svd_Z.matrixV(); // K x K // ======================================================================== // Step 2: SVD of projDB (Input matrix) // ======================================================================== JacobiSVD svd_projDB(projDB, ComputeThinU | ComputeThinV); - MatrixXd U_DB = svd_projDB.matrixU(); // K × K + MatrixXd U_DB = svd_projDB.matrixU(); // K x K VectorXd S_DB = svd_projDB.singularValues(); // K - MatrixXd V_DB = svd_projDB.matrixV(); // N_total × K + MatrixXd V_DB = svd_projDB.matrixV(); // N_total x K // ======================================================================== // Step 3: Form and decompose middle matrix @@ -206,16 +206,16 @@ Rcpp::List run_factorized_svd_cpp( // ======================================================================== JacobiSVD svd_middle(middle, ComputeThinU | ComputeThinV); - MatrixXd U_middle = svd_middle.matrixU(); // K × K + MatrixXd U_middle = svd_middle.matrixU(); // K x K VectorXd S_middle = svd_middle.singularValues(); // K - MatrixXd V_middle = svd_middle.matrixV(); // K × K + MatrixXd V_middle = svd_middle.matrixV(); // K x K // ======================================================================== // Reconstruct final SVD and return // ======================================================================== - MatrixXd u = U_Z * U_middle; // J × K - MatrixXd v = V_DB * V_middle; // N_total × K + MatrixXd u = U_Z * U_middle; // J x K + MatrixXd v = V_DB * V_middle; // N_total x K VectorXd d = S_middle; // K return List::create( diff --git a/src/gedi_imputation.cpp b/src/gedi_imputation.cpp index 5ac39f3..6a80c60 100644 --- a/src/gedi_imputation.cpp +++ b/src/gedi_imputation.cpp @@ -27,7 +27,7 @@ using namespace Eigen; //' @return Dense matrix (J x Ni) - residual Yi after removing sample effects //' //' @details -//' Computes: Yi - QiDBi - (si ⊗ 1ᵀ) - (o + oi) ⊗ 1ᵀ +//' Computes: Yi - QiDBi - (si (x) 1^T) - (o + oi) (x) 1^T //' This leaves only the ZDBi component (shared biological signal). //' //' @keywords internal @@ -66,7 +66,7 @@ SEXP Yi_resZ( //' @return Dense matrix (J x Ni) - predicted Yi //' //' @details -//' Computes: Ŷi = ZDBi + QiDBi + (si ⊗ 1ᵀ) + (o + oi) ⊗ 1ᵀ +//' Computes: Y_hati = ZDBi + QiDBi + (si (x) 1^T) + (o + oi) (x) 1^T //' This is the full model prediction for log-expression. //' //' @keywords internal @@ -111,7 +111,7 @@ SEXP predict_Yhat( //' //' This comes from the Poisson-lognormal model where: //' - Mi ~ Poisson(exp(Yi)) -//' - Yi ~ N(Ŷi, sigma2) +//' - Yi ~ N(Y_hati, sigma2) //' //' The posterior variance decreases where: //' - Counts are high (exp(Yi) large) @@ -151,7 +151,7 @@ SEXP Yi_var_M( //' //' This comes from the binomial-logistic-normal model where: //' - M1i ~ Binomial(M1i + M2i, p) -//' - logit(p) = Yi ~ N(Ŷi, sigma2) +//' - logit(p) = Yi ~ N(Y_hati, sigma2) //' //' The variance depends on: //' - Total counts M (more counts = less variance) @@ -213,7 +213,7 @@ SEXP Yi_var_M_paired( //' for downstream dispersion analysis (binning and aggregation done in R). //' //' Memory optimization: Works only on sparse nonzero positions (typically 5-10% of matrix). -//' For 30K genes × 10K cells with 5% nonzero: samples from ~15M positions instead of 300M. +//' For 30K genes x 10K cells with 5% nonzero: samples from ~15M positions instead of 300M. //' //' @keywords internal //' @noRd @@ -314,7 +314,7 @@ double Yi_SSE_M_paired( Eigen::VectorXd oi, double sigma2) { - // Compute residual: Yi - Ŷi + // Compute residual: Yi - Y_hati Eigen::MatrixXd residual = Yi - ZDBi - QiDBi; residual.rowwise() -= si.transpose(); residual.colwise() -= (o + oi); diff --git a/src/gedi_plots.cpp b/src/gedi_plots.cpp index f02667e..7711d17 100644 --- a/src/gedi_plots.cpp +++ b/src/gedi_plots.cpp @@ -15,7 +15,7 @@ using namespace Eigen; //' //' @param feature_weights Vector of length K (factor loadings for this feature) //' @param D Scaling vector of length K -//' @param Bi_list List of sample-specific cell projection matrices (K × Ni each) +//' @param Bi_list List of sample-specific cell projection matrices (K x Ni each) //' @param verbose Integer verbosity level //' //' @return Vector of length N (total cells) with projected values @@ -78,12 +78,12 @@ Eigen::VectorXd compute_feature_projection( //' Projects multiple features through the GEDI model simultaneously. //' Computes: (feature_weights * D) %*% B for F features. //' -//' @param feature_weights Matrix K × F (factor loadings for F features) +//' @param feature_weights Matrix K x F (factor loadings for F features) //' @param D Scaling vector of length K -//' @param Bi_list List of sample-specific cell projection matrices (K × Ni each) +//' @param Bi_list List of sample-specific cell projection matrices (K x Ni each) //' @param verbose Integer verbosity level //' -//' @return Matrix N × F with projected values for each feature +//' @return Matrix N x F with projected values for each feature //' //' @keywords internal //' @noRd @@ -114,7 +114,7 @@ Eigen::MatrixXd compute_multi_feature_projection( N += Bi.cols(); } - // Allocate result: N × F + // Allocate result: N x F MatrixXd result(N, F); int offset = 0; @@ -133,7 +133,7 @@ Eigen::MatrixXd compute_multi_feature_projection( } if (verbose >= 1) { - Rcout << "Multi-feature projection computed: " << N << " cells × " + Rcout << "Multi-feature projection computed: " << N << " cells x " << F << " features" << std::endl; } diff --git a/src/gedi_projections.cpp b/src/gedi_projections.cpp index 6ba0edd..5e76e5c 100644 --- a/src/gedi_projections.cpp +++ b/src/gedi_projections.cpp @@ -21,10 +21,10 @@ using namespace Eigen; //' the concatenation of all sample-specific Bi matrices. This is the main //' integrated representation of cells in the GEDI latent space. //' -//' @param Z Shared metagene matrix (J × K), where J = genes, K = latent factors +//' @param Z Shared metagene matrix (J x K), where J = genes, K = latent factors //' @param D Scaling vector (length K) representing the importance of each factor //' @param Bi_list List of sample-specific cell projection matrices, where each -//' Bi is a K × Ni matrix (Ni = number of cells in sample i) +//' Bi is a K x Ni matrix (Ni = number of cells in sample i) //' @param verbose Integer verbosity level: //' \itemize{ //' \item 0: Silent (no output) @@ -32,7 +32,7 @@ using namespace Eigen; //' \item 2: Detailed per-sample information //' } //' -//' @return Dense matrix ZDB of dimensions J × N, where N = sum(Ni) is the total +//' @return Dense matrix ZDB of dimensions J x N, where N = sum(Ni) is the total //' number of cells across all samples. Each column represents a cell in the //' integrated latent space. //' @@ -42,17 +42,17 @@ using namespace Eigen; //' //' Computational strategy: //' \enumerate{ -//' \item Pre-compute ZD = Z * diag(D) once (saves K×J×numSamples operations) +//' \item Pre-compute ZD = Z * diag(D) once (saves KxJxnumSamples operations) //' \item For each sample i: compute ZD * Bi and concatenate //' \item Use Eigen block operations for efficient memory access //' } //' -//' Memory: Allocates one J × N dense matrix. For large datasets (e.g., 20k genes, +//' Memory: Allocates one J x N dense matrix. For large datasets (e.g., 20k genes, //' 50k cells), this requires ~8 GB RAM. Consider computing projections on demand //' or working with subsets if memory is limited. //' //' Performance: OpenMP parallelization available if enabled during compilation. -//' Typical speed: ~100-200ms for 20k × 5k dataset on modern CPU. +//' Typical speed: ~100-200ms for 20k x 5k dataset on modern CPU. //' //' @keywords internal //' @noRd @@ -130,10 +130,10 @@ Eigen::MatrixXd compute_ZDB_cpp( //' //' @param D Scaling vector (length K) representing factor importance //' @param Bi_list List of sample-specific cell projection matrices, where each -//' Bi is a K × Ni matrix +//' Bi is a K x Ni matrix //' @param verbose Integer verbosity level (0 = silent, 1 = progress, 2 = detailed) //' -//' @return Dense matrix DB of dimensions K × N, where N = sum(Ni). Each column +//' @return Dense matrix DB of dimensions K x N, where N = sum(Ni). Each column //' represents a cell's coordinates in the latent factor space. //' //' @details @@ -151,7 +151,7 @@ Eigen::MatrixXd compute_ZDB_cpp( //' \item Apply diagonal scaling: diag(D) * B //' } //' -//' Memory: Much smaller than ZDB (K × N vs. J × N). For K=10 and N=50k cells, +//' Memory: Much smaller than ZDB (K x N vs. J x N). For K=10 and N=50k cells, //' requires only ~4 MB vs. ~8 GB for ZDB when J=20k. //' //' Performance: Very fast (~10-50ms) since K << J typically.