diff --git a/.gitignore b/.gitignore
index bd1be61b0..8db067bcb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,3 +10,8 @@
 docs/
 .DS_Store
 mia.BiocCheck/
+src/*.dll
+inst/extdata/taxonomic_profiles_mgx.tsv
+inst/extdata/ecs_relab.tsv
+inst/extdata/hmp2_metadata_2018-08-20.csv
+tools/cache/
\ No newline at end of file
diff --git a/DESCRIPTION b/DESCRIPTION
index 36df187b1..f806d90e4 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -40,7 +40,8 @@ Authors@R:
       person(given = "Danielle", family = "Callan", role=c("ctb")),
       person(given = "Sam", family = "Hillman", role=c("ctb")),
       person(given = "Jesse", family = "Pasanen", role=c("ctb")),
-      person(given = "Eetu", family = "Tammi", role=c("ctb")))
+      person(given = "Eetu", family = "Tammi", role=c("ctb")),
+      person(given = "Aituar", family = "Bektanov", role=c("ctb")))
 Title: Microbiome analysis
 Description:
     mia implements tools for microbiome analysis based on the
diff --git a/NAMESPACE b/NAMESPACE
index 851cc6637..318c0d894 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -71,6 +71,7 @@ export(getDominant)
 export(getExperimentCrossAssociation)
 export(getExperimentCrossCorrelation)
 export(getHierarchyTree)
+export(getJointRPCA)
 export(getLDA)
 export(getLowAbundant)
 export(getMDS)
@@ -474,6 +475,7 @@ importFrom(vegan,dbrda)
 importFrom(vegan,decostand)
 importFrom(vegan,eigenvals)
 importFrom(vegan,monoMDS)
+importFrom(vegan,optspace)
 importFrom(vegan,permutest)
 importFrom(vegan,rrarefy)
 importFrom(vegan,scores)
diff --git a/R/getJointRPCA.R b/R/getJointRPCA.R
new file mode 100644
index 000000000..f8d8b6f55
--- /dev/null
+++ b/R/getJointRPCA.R
@@ -0,0 +1,1191 @@
+## Joint RPCA front-end helpers
+##
+## Exported:
+##   - jointRPCAuniversal()
+##   - getJointRPCA()
+##
+
+#' Run Joint-RPCA and store embedding in reducedDim
+#' @name getJointRPCA
+#'
+#' @details
+#' Convenience wrapper that runs Joint Robust PCA on one or more compositional
+#' tables and stores the resulting sample embedding in \code{reducedDim(x, name)},
+#' similar to \code{runMDS()} and \code{runPCA()}.
+#'
+#' @param x A \code{SummarizedExperiment}, \code{TreeSummarizedExperiment},
+#'   \code{MultiAssayExperiment}, or a related object supported by
+#'   \code{jointRPCAuniversal()}.
+#' @param experiments Optional character vector of experiment names to use when
+#'   \code{x} is a \code{MultiAssayExperiment} (i.e. \code{names(experiments(x))}).
+#'   Ignored for \code{SummarizedExperiment} inputs.
+#' @param altexp Optional name of an alternative experiment. If supplied,
+#'   Joint-RPCA is run on \code{altExp(x, altexp)} instead of \code{x}.
+#' @param name Character scalar giving the name of the \code{reducedDim} slot
+#'   in which to store the joint sample embedding. Defaults to \code{"JointRPCA"}.
+#' @param transform Character string specifying preprocessing applied to each
+#'   input table before ordination. Use \code{"rclr"} to apply the robust CLR
+#'   transform (via \code{decostand(method = "rclr")}) or \code{"none"} to
+#'   disable transformation (data are used as-is after masking non-finite values).
+#' @param optspace.tol Numeric tolerance passed to \code{optspace()}.
+#' @param center Logical; whether to center the reconstructed low-rank matrix
+#'   (double-centering) prior to SVD/PCA steps.
+#' @param scale Logical; whether to scale the reconstructed matrix prior to
+#'   SVD/PCA steps. Defaults to \code{FALSE}.
+#' @param ... Additional arguments passed to \code{jointRPCAuniversal()} and then
+#'   to the internal \code{.joint_rpca()} engine (e.g. \code{n.components},
+#'   \code{min.sample.count}, \code{min.feature.count}, \code{min.feature.frequency},
+#'   \code{max.iterations}, \code{sample.metadata}).
+#'
+#' @return The input object \code{x} with a new entry in
+#'   \code{reducedDim(x, name)} containing the Joint-RPCA sample embedding.
+#'   The full Joint-RPCA result (including distances, cross-validation
+#'   statistics and transformed tables) is stored in
+#'   \code{metadata(x)$JointRPCA[[name]]}.
+#'
+#' @export
+NULL
+
+getJointRPCA <- function(x,
+                         experiments = NULL,
+                         altexp = NULL,
+                         name = "JointRPCA",
+                         transform = c("rclr", "none"),
+                         optspace.tol = 1e-5,
+                         center = TRUE,
+                         scale = FALSE,
+                         ...) {
+    transform <- match.arg(transform)
+
+    # Select the object to operate on
+    y <- x
+    if (!is.null(altexp)) {
+        y <- altExp(x, altexp)
+    }
+
+    # Use universal front-end to build tables + run .joint_rpca()
+    res <- jointRPCAuniversal(
+        y,
+        experiments = experiments,
+        transform = transform,
+        optspace.tol = optspace.tol,
+        center = center,
+        scale = scale,
+        ...
+    )
+
+    # Extract sample embedding
+    emb <- res[["ord_res"]][["samples"]]
+
+    if (is.null(emb)) {
+        stop(
+            "Internal error: JointRPCA did not return a sample embedding. ",
+            "Please report this and include sessionInfo().",
+            call. = FALSE
+        )
+    }
+
+    emb <- as.matrix(emb)
+
+    # Ensure embedding rownames match colnames of the target object (Bioconductor requirement)
+    target_cols <- colnames(x)
+    if (!is.null(altexp)) {
+        target_cols <- colnames(x)
+    }
+
+    if (is.null(target_cols)) {
+        stop("Cannot store reducedDim: 'x' has no colnames().", call. = FALSE)
+    }
+
+    if (is.null(rownames(emb))) {
+        stop("Cannot store reducedDim: embedding has no rownames().", call. = FALSE)
+    }
+
+    # Require the same set of samples/cells
+    if (!setequal(rownames(emb), target_cols)) {
+        missing_in_emb <- setdiff(target_cols, rownames(emb))
+        extra_in_emb <- setdiff(rownames(emb), target_cols)
+        stop(
+            "Cannot store reducedDim: embedding rownames do not match colnames(x).\n",
+            "Missing in embedding: ", paste(missing_in_emb, collapse = ", "), "\n",
+            "Extra in embedding: ", paste(extra_in_emb, collapse = ", "),
+            call. = FALSE
+        )
+    }
+
+    # Reorder embedding to exactly match colnames(x)
+    emb <- emb[target_cols, , drop = FALSE]
+    rownames(emb) <- target_cols
+
+    if (nrow(emb) == 0L || ncol(emb) == 0L || is.null(rownames(emb))) {
+        stop(
+            "Internal error: JointRPCA returned an invalid sample embedding. ",
+            "Please report this and include sessionInfo().",
+            call. = FALSE
+        )
+    }
+
+    # Store embedding in reducedDim only if supported (SCE / TreeSE / mia-specific)
+    cls <- class(x)
+    if (any(cls %in% c("SingleCellExperiment", "TreeSummarizedExperiment"))) {
+        reducedDim(x, name) <- emb
+    }
+
+    # Store full result in metadata
+    if (is.null(metadata(x)$JointRPCA)) {
+        metadata(x)$JointRPCA <- list()
+    }
+    metadata(x)$JointRPCA[[name]] <- res
+
+    return(x)
+}
+
+#' Universal Joint RPCA Wrapper
+#' @name getJointRPCA
+#' @param x Input object: \code{MultiAssayExperiment}, \code{SummarizedExperiment}
+#'   (including \code{TreeSummarizedExperiment}), list of matrices, or single matrix.
+#' @param experiments Character vector of experiment names to extract when \code{x}
+#'   is a \code{MultiAssayExperiment} (i.e. \code{names(experiments(x))}).
+#'   If \code{NULL}, all experiments are used.
+#'
+#'   For \code{MultiAssayExperiment} inputs, \strong{one assay per experiment} is
+#'   used: by default the first assay returned by
+#'   \code{assayNames()} (or index \code{1L} if unnamed).
+#'   The actually used assay names are recorded in \code{$assay_names_used} in
+#'   the result. If you need a different assay (e.g. \code{"relab"} instead of
+#'   \code{"counts"}), subset or reorder assays in \code{x} before calling
+#'   \code{jointRPCAuniversal()}.
+#' @param transform Character string specifying preprocessing applied to each
+#'   input table before ordination. Use \code{"rclr"} to apply the robust CLR
+#'   transform (via \code{decostand(method = "rclr")}) or \code{"none"} to
+#'   disable transformation (data are used as-is after masking non-finite values).
+#' @param optspace.tol Numeric tolerance passed to \code{optspace()}.
+#' @param center Logical; whether to center the reconstructed low-rank matrix
+#'   (double-centering) prior to SVD/PCA steps.
+#' @param scale Logical; whether to scale the reconstructed matrix prior to
+#'   SVD/PCA steps. Defaults to \code{FALSE}.
+#' @param ... Additional arguments passed to \code{.joint_rpca()}.
+#'
+#' @return Output from \code{.joint_rpca()} with extra fields when \code{x} is a
+#'   \code{MultiAssayExperiment}:
+#'   \itemize{
+#'     \item \code{$experiment_names}: character vector of experiments used.
+#'     \item \code{$assay_names_used}: named character vector giving, for each
+#'           experiment, the assay name that was used (typically the first in
+#'           \code{assayNames()}).
+#'   }
+#' @importFrom SummarizedExperiment assayNames
+#' @importFrom SummarizedExperiment assay
+#' @importFrom MultiAssayExperiment experiments
+#' @importFrom vegan decostand
+#' @importFrom vegan optspace
+#' @export
+NULL
+
+jointRPCAuniversal <- function(x, experiments = NULL,
+                               transform = c("rclr", "none"),
+                               optspace.tol = 1e-5,
+                               center = TRUE,
+                               scale = FALSE,
+                               ...) {
+    transform <- match.arg(transform)
+
+    assay_names_used <- NULL
+
+    if (inherits(x, "MultiAssayExperiment")) {
+        mae <- .extract_mae_tables(x, experiments)
+        tables <- mae$tables
+        experiments <- mae$experiments
+        assay_names_used <- mae$assay_names_used
+    } else if (inherits(x, "SummarizedExperiment")) {
+        tables <- list(assay(x))
+        anm <- assayNames(x)
+        nm <- if (length(anm) && !is.na(anm[1])) anm[1] else "assay1"
+        names(tables) <- nm
+    } else if (is.list(x) && all(vapply(x, is.matrix, logical(1)))) {
+        tables <- x
+        if (is.null(names(tables))) {
+            names(tables) <- paste0("view", seq_along(tables))
+        }
+    } else if (is.matrix(x)) {
+        tables <- list(x)
+        names(tables) <- "assay1"
+    } else {
+        stop(
+            "Unsupported input type for jointRPCAuniversal(): ",
+            paste(class(x), collapse = ", "),
+            call. = FALSE
+        )
+    }
+
+    res <- .joint_rpca(
+        tables = tables,
+        transform = transform,
+        optspace.tol = optspace.tol,
+        center = center,
+        scale = scale,
+        ...
+    )
+
+    if (inherits(x, "MultiAssayExperiment")) {
+        res$experiment_names <- experiments
+        res$assay_names_used <- assay_names_used
+    }
+
+    return(res)
+}
+
+#' Joint Robust PCA on Multiple Compositional Tables
+#'
+#' Internal engine for Joint Robust Principal Component Analysis (RPCA) using
+#' OptSpace on multiple compositional tables.
+#'
+#' This function assumes a list of already extracted tables and is typically
+#' called via \code{jointRPCAuniversal()} or \code{getJointRPCA()}.
+#'
+#' @param tables A list of compositional data tables (matrices or data frames).
+#' @param n.test.samples Integer specifying the number of samples to hold out for testing
+#'   (only used if \code{sample.metadata} is \code{NULL}). Default is 10.
+#' @param sample.metadata Optional data frame containing sample-level metadata.
+#' @param train.test.column The name of the column in \code{sample.metadata}
+#'   that defines training vs test samples.
+#' @param n.components Integer specifying the number of principal components to compute.
+#' @param transform Character string specifying preprocessing applied to each
+#'   input table before ordination: \code{"rclr"} or \code{"none"}.
+#' @param optspace.tol Numeric tolerance passed to \code{vegan::optspace()}.
+#' @param center,scale Logical; whether to center/scale the reconstructed matrix
+#'   prior to SVD/PCA steps.
+#' @param min.sample.count Minimum total count required for a sample to be retained.
+#' @param min.feature.count Minimum total count required for a feature to be retained.
+#' @param min.feature.frequency Minimum percentage (0–100) of samples in which a
+#'   feature must be non-zero to be retained.
+#' @param max.iterations Maximum number of optimization iterations.
+#'
+#' @return A list with \code{ord_res}, \code{dist}, \code{cv_stats}, and
+#'   \code{rclr_tables}.
+#'
+#' @keywords internal
+#' @noRd
+
+.joint_rpca <- function(tables,
+                        n.test.samples = 10,
+                        sample.metadata = NULL,
+                        train.test.column = NULL,
+                        n.components = 3,
+                        transform = c("rclr", "none"),
+                        min.sample.count = 0,
+                        min.feature.count = 0,
+                        min.feature.frequency = 0,
+                        max.iterations = 5,
+                        optspace.tol = 1e-5,
+                        center = TRUE,
+                        scale = FALSE) {
+    transform <- match.arg(transform)
+
+    if (is.null(names(tables))) {
+        names(tables) <- paste0("view", seq_along(tables))
+    }
+
+    if (n.components < 2) {
+        stop("n.components must be at least 2.", call. = FALSE)
+    }
+    if (max.iterations < 1) {
+        stop("max.iterations must be at least 1.", call. = FALSE)
+    }
+
+    # Filtering (always done, independent of rclr)
+    tables <- lapply(tables, function(tbl) {
+        out <- .rpca_table_processing(
+            tbl,
+            min.sample.count      = min.sample.count,
+            min.feature.count     = min.feature.count,
+            min.feature.frequency = min.feature.frequency
+        )
+        if (nrow(out) == 0L || ncol(out) == 0L) {
+            stop(
+                "Filtering removed all data in at least one table (0 features or 0 samples). ",
+                "Try relaxing filtering thresholds (min.sample.count / min.feature.count / ",
+                "min.feature.frequency) or check your input.",
+                call. = FALSE
+            )
+        }
+        out
+    })
+
+    # Find shared samples across views
+    sample.sets <- lapply(tables, colnames)
+    shared.all.samples <- Reduce(intersect, sample.sets)
+    if (length(shared.all.samples) == 0L) {
+        stop(
+            "No samples overlap between all tables. ",
+            "Check that colnames are consistent across views, or",
+            "if you are using pre-transformed tables set transform = 'none'.",
+            call. = FALSE
+        )
+    }
+    if (length(shared.all.samples) < (n.components + 1L)) {
+        stop(
+            "Too few shared samples across all tables after filtering (",
+            length(shared.all.samples), "). Need at least n.components + 1 shared samples. ",
+            "Try lowering n.components or relaxing filtering thresholds.",
+            call. = FALSE
+        )
+    }
+    unshared.samples <- setdiff(unique(unlist(sample.sets)), shared.all.samples)
+    if (length(unshared.samples) > 0) {
+        warning(sprintf("Removing %d sample(s) that do not overlap in tables.", length(unshared.samples)))
+    }
+
+    # Restrict each table to the shared sample set
+    tables <- lapply(tables, function(tbl) {
+        tbl[, shared.all.samples, drop = FALSE]
+    })
+    shared.all.samples <- Reduce(intersect, lapply(tables, colnames))
+
+    # Transform tables: rCLR or masking
+    rclr_tables <- lapply(tables, function(tbl) {
+        mat <- as.matrix(tbl)
+        rown <- rownames(mat)
+        coln <- colnames(mat)
+
+        if (transform == "rclr") {
+            mat[!is.finite(mat)] <- 0
+            mat[mat < 0] <- 0
+            out <- vegan::decostand(mat, method = "rclr", MARGIN = 2)
+            dimnames(out) <- list(rown, coln)
+            out
+        } else {
+            out <- .mask_value_only(mat)$data
+            dimnames(out) <- list(rown, coln)
+            out
+        }
+    })
+    names(rclr_tables) <- names(tables)
+
+    # Determine train/test split
+    if (!is.null(sample.metadata) && !is.null(train.test.column)) {
+        md <- as.data.frame(sample.metadata)
+        md <- md[shared.all.samples, , drop = FALSE]
+        train.samples <- rownames(md)[md[[train.test.column]] == "train"]
+        test.samples <- rownames(md)[md[[train.test.column]] == "test"]
+    } else {
+        ord.tmp <- .optspace_helper(
+            rclr.table      = t(rclr_tables[[1]]),
+            feature.ids     = rownames(rclr_tables[[1]]),
+            sample.ids      = colnames(rclr_tables[[1]]),
+            n.components    = n.components,
+            max.iterations  = max.iterations,
+            tol             = optspace.tol,
+            center          = center,
+            scale           = scale
+        )$ord_res
+        sorted.ids <- rownames(ord.tmp$samples[order(ord.tmp$samples[, 1]), ])
+        idx <- round(seq(1, length(sorted.ids), length.out = n.test.samples))
+        test.samples <- sorted.ids[idx]
+        train.samples <- setdiff(shared.all.samples, test.samples)
+    }
+
+    # Run joint OptSpace
+    result <- .joint_optspace_helper(
+        tables         = rclr_tables,
+        n.components   = n.components,
+        max.iterations = max.iterations,
+        test.samples   = test.samples,
+        train.samples  = train.samples,
+        sample.order   = shared.all.samples,
+        tol            = optspace.tol,
+        center         = center,
+        scale          = scale
+    )
+
+    return(list(
+        ord_res      = result$ord_res,
+        dist         = result$dist,
+        cv_stats     = result$cv_stats,
+        rclr_tables  = rclr_tables
+    ))
+}
+
+#' Joint RPCA Ordination Across Multiple Compositional Tables
+#'
+#' Internal function that performs Robust PCA via joint OptSpace decomposition across multiple compositional tables.
+#' It splits each table into train/test sets, applies joint factorization, reconstructs sample and feature embeddings,
+#' optionally projects test samples, computes a sample distance matrix, and returns cross-validation error statistics.
+#'
+#' @param tables A list of compositional matrices or data frames with features as rows and samples as columns.
+#' @param n.components Number of principal components to compute.
+#' @param max.iterations Maximum number of optimization iterations for OptSpace.
+#' @param test.samples Character vector of sample IDs to be projected into the ordination space.
+#' @param train.samples Character vector of sample IDs used to fit the ordination.
+#'
+#' @return A list with:
+#' \describe{
+#'   \item{ord_res}{An \code{OrdinationResults} object containing embeddings, loadings, and variance explained.}
+#'   \item{dist}{A \code{DistanceMatrix} object for sample embeddings.}
+#'   \item{cv_stats}{A data frame summarizing reconstruction error across iterations and tables.}
+#' }
+#'
+#' @keywords internal
+#' @noRd
+
+.joint_optspace_helper <- function(tables,
+                                   n.components,
+                                   max.iterations,
+                                   test.samples,
+                                   train.samples,
+                                   sample.order = NULL,
+                                   tol = 1e-5,
+                                   center = TRUE,
+                                   scale = FALSE) {
+    # Coerce to matrices and enforce colnames presence
+    tables <- lapply(tables, function(tbl) {
+        mat <- as.matrix(tbl)
+        if (is.null(colnames(mat))) {
+            stop("[.joint_optspace_helper] Input table is missing column names (sample IDs).")
+        }
+        mat
+    })
+
+    # Global set of samples present in all views
+    all_samples <- Reduce(intersect, lapply(tables, colnames))
+
+    # Align train/test to actually available samples
+    test.samples <- intersect(test.samples, all_samples)
+    train.samples <- intersect(train.samples, all_samples)
+
+    if (!length(test.samples) || !length(train.samples)) {
+        stop("[.joint_optspace_helper] Empty train/test split after aligning sample IDs.")
+    }
+
+    # Split and transpose training/test data per table
+    tables.split <- lapply(tables, function(tbl) {
+        list(
+            t(tbl[, test.samples, drop = FALSE]),
+            t(tbl[, train.samples, drop = FALSE])
+        )
+    })
+
+    # Format input for solver
+    tables.for.solver <- lapply(tables.split, function(pair) {
+        lapply(pair, as.matrix)
+    })
+
+    # Run joint OptSpace solver
+    opt.result <- .joint_optspace_solve(
+        train.test.pairs = tables.for.solver,
+        n.components     = n.components,
+        max.iter         = max.iterations,
+        tol              = tol
+    )
+
+    U <- opt.result$U
+    S <- opt.result$S
+    V_list <- opt.result$V_list
+    dists <- opt.result$dists
+
+    # Assign row/column names to loadings
+    pc.names <- paste0("PC", seq_len(n.components))
+
+    # Combine feature loadings with table-derived row names
+    vjoint <- do.call(rbind, Map(function(tbl, V) {
+        rownames(V) <- rownames(tbl)
+        colnames(V) <- pc.names
+        V
+    }, tables, V_list))
+
+    U <- U[seq_along(train.samples), , drop = FALSE]
+    rownames(U) <- train.samples
+    colnames(U) <- pc.names
+
+    # Recenter & re-factor via SVD
+    X <- U %*% S %*% t(vjoint)
+
+    if (center) {
+        X <- sweep(X, 2, colMeans(X))
+        X <- sweep(X, 1, rowMeans(X))
+    }
+    if (scale) {
+        X <- scale(X, center = FALSE, scale = TRUE)
+    }
+    svd.res <- svd(X)
+    u <- svd.res$u[, seq_len(n.components), drop = FALSE]
+    v <- svd.res$v[, seq_len(n.components), drop = FALSE]
+    s.eig <- svd.res$d[seq_len(n.components)]
+
+    rownames(u) <- train.samples
+    rownames(v) <- rownames(vjoint)
+    pc.names <- paste0("PC", seq_len(n.components))
+    colnames(u) <- colnames(v) <- pc.names
+
+    # Build a named per-view features list
+    features_list <- lapply(seq_along(tables), function(i) {
+        rid <- rownames(tables[[i]])
+        v[rid, , drop = FALSE]
+    })
+    names(features_list) <- names(tables)
+
+    prop.exp <- s.eig^2 / sum(s.eig^2)
+    ord_res <- .ordination_results(
+        method = "rpca",
+        eigvals = setNames(s.eig, pc.names),
+        samples = u,
+        features = features_list,
+        proportion.explained = setNames(prop.exp, pc.names)
+    )
+
+    # Project test samples
+    if (length(test.samples) > 0) {
+        test.matrices <- lapply(tables, function(tbl) tbl[, test.samples, drop = FALSE])
+        names(test.matrices) <- names(tables)
+        ord_res <- .transform(ord_res, test.matrices, apply.rclr = FALSE)
+    }
+
+    # Compute distance matrix and CV error summary
+    dist.base <- as.matrix(dist(ord_res$samples))
+
+    if (!is.null(sample.order)) {
+        order_use <- intersect(sample.order, rownames(dist.base))
+        dist.mat <- dist.base[order_use, order_use, drop = FALSE]
+    } else {
+        dist.mat <- dist.base
+        order_use <- rownames(dist.base)
+    }
+
+    dist.res <- .distance_matrix(dist.mat, ids = order_use)
+
+    cv.dist <- data.frame(t(dists))
+    colnames(cv.dist) <- c("mean_CV", "std_CV")
+    cv.dist$run <- sprintf(
+        "tables_%d.n.components_%d.max.iterations_%d.n.test_%d",
+        length(tables), n.components, max.iterations, length(test.samples)
+    )
+    cv.dist$iteration <- seq_len(nrow(cv.dist))
+    rownames(cv.dist) <- seq_len(nrow(cv.dist))
+
+    return(list(ord_res = ord_res, dist = dist.res, cv_stats = cv.dist))
+}
+
+#' Apply Projection of New Compositional Tables to Existing Ordination
+#'
+#' Internal function that transforms and projects new sample tables into an existing Joint RPCA ordination space.
+#' It handles feature alignment, optional rCLR preprocessing, padding of missing features,
+#' and merges projected samples with existing ones.
+#'
+#' @param ordination A list containing previous ordination results: `samples`, `features`, and `eigvals`.
+#' @param tables A named list of new compositional tables (matrices or data frames)
+#'   with features as rows and samples as columns.
+#' @param apply.rclr Logical; whether to apply rCLR transformation to new input tables before projection. Default is `TRUE`.
+#'
+#' @return An updated ordination list with the `samples` matrix extended to include new projected samples.
+#' @keywords internal
+#' @noRd
+
+.transform <- function(ordination, tables,
+                       apply.rclr = TRUE) {
+    Udf <- ordination$samples
+    Vobj <- ordination$features
+    s.eig <- ordination$eigvals
+
+    # Ensure tables is a list of views
+    if (!is.list(tables)) {
+        stop("[.transform] 'tables' must be a list of view matrices (features x samples).")
+    }
+
+    if (is.list(Vobj) && !is.null(names(Vobj))) {
+        if (is.null(names(tables))) {
+            names(tables) <- names(Vobj)[seq_along(tables)]
+        }
+        if (is.null(names(tables))) {
+            stop("[.transform] 'tables' must be a *named* list of view matrices (features x samples).")
+        }
+    }
+
+    # rCLR if requested
+    prep_view <- function(tab) {
+        mat <- as.matrix(tab)
+        rown <- rownames(mat)
+        coln <- colnames(mat)
+
+        if (apply.rclr) {
+            mat <- vegan::decostand(mat, method = "rclr", MARGIN = 2)
+
+            dimnames(mat) <- list(rown, coln)
+        }
+
+        storage.mode(mat) <- "double"
+        mat[!is.finite(mat)] <- 0
+        mat
+    }
+    tables <- lapply(tables, prep_view)
+
+    if (is.matrix(Vobj)) {
+        all.features <- rownames(Vobj)
+        tables <- lapply(tables, function(mat) {
+            miss <- setdiff(all.features, rownames(mat))
+            if (length(miss)) {
+                pad <- matrix(0,
+                    nrow = length(miss), ncol = ncol(mat),
+                    dimnames = list(miss, colnames(mat))
+                )
+                mat <- rbind(mat, pad)
+            }
+            mat[all.features, , drop = FALSE]
+        })
+        proj.mat <- do.call(cbind, tables)
+        colnames(proj.mat) <- make.unique(colnames(proj.mat), sep = "_")
+        ordination$samples <- .transform_helper(Udf, Vobj, s.eig, proj.mat)
+        return(ordination)
+    }
+
+    # 3+-omic path: V is a named list per view
+    if (!is.list(Vobj) || is.null(names(Vobj))) {
+        stop("[.transform] ordination$features is neither a matrix nor a named list.")
+    }
+
+    # Intersect views by name, preserve training order
+    views <- intersect(names(Vobj), names(tables))
+    if (!length(views)) stop("[.transform] No overlapping view names between ordination and new tables.")
+
+    test.matrices <- list()
+    for (vw in views) {
+        Vvw <- Vobj[[vw]]
+        stopifnot(is.matrix(Vvw), !is.null(rownames(Vvw)))
+        mat <- tables[[vw]]
+
+        train_feats <- rownames(Vvw)
+        miss <- setdiff(train_feats, rownames(mat))
+        if (length(miss)) {
+            pad <- matrix(0,
+                nrow = length(miss), ncol = ncol(mat),
+                dimnames = list(miss, colnames(mat))
+            )
+            mat <- rbind(mat, pad)
+        }
+        mat <- mat[train_feats, , drop = FALSE]
+
+        test.matrices[[vw]] <- mat
+    }
+
+    ordination$samples <- .transform_helper(Udf, Vobj, s.eig, test.matrices)
+    return(ordination)
+}
+
+#' Project New Data into Existing Ordination Space
+#'
+#' Internal function to align rCLR-transformed samples to an existing RPCA ordination space.
+#' Handles feature alignment, deduplication of sample names, double-centering normalization,
+#' and projection into low-rank space using previously learned components.
+#'
+#' @param Udf Matrix of training sample embeddings (samples × components).
+#' @param Vdf Matrix or list of feature loadings (features × components, or per-view list).
+#' @param s.eig Singular values from the RPCA decomposition.
+#' @param table.rclr.project New rCLR-transformed table(s) for projection (features × samples).
+#'   If `Vdf` is a matrix, provide a single matrix; if `Vdf` is a list, provide a named list of matrices per view.
+#' @param dedup.samples Logical; whether to merge samples with identical names (e.g. suffixes like `_1`, `_2`)
+#'   by averaging their feature values. Default is `TRUE`. Set to `FALSE` to preserve all duplicate sample IDs.
+#'
+#' @return A combined matrix of training and projected samples (samples × components).
+#' @keywords internal
+#' @noRd
+
+.transform_helper <- function(Udf, Vdf, s.eig, table.rclr.project,
+                              dedup.samples = TRUE) {
+    # Legacy path (single view)
+    if (is.matrix(Vdf)) {
+        stopifnot(is.matrix(table.rclr.project))
+        # Align rows by name
+        common <- intersect(rownames(Vdf), rownames(table.rclr.project))
+        if (length(common) < ncol(Udf)) {
+            stop(sprintf("[.transform_helper] Too few matching features: %d", length(common)))
+        }
+
+        M <- t(as.matrix(table.rclr.project[common, , drop = FALSE]))
+        V <- as.matrix(Vdf[common, , drop = FALSE])
+
+        # Dedup of sample IDs
+        if (dedup.samples) {
+            sid <- sub("_\\d+$", "", rownames(M))
+            if (any(duplicated(sid))) {
+                M <- rowsum(M, group = sid, reorder = FALSE) / as.vector(table(sid))
+            } else {
+                rownames(M) <- sid
+            }
+        }
+
+        # Projection (match training scaling)
+        Uproj <- M %*% V
+        # Scale by singular values
+        if (length(s.eig)) {
+            Sinv <- diag(1 / s.eig, nrow = length(s.eig))
+            Uproj <- Uproj %*% Sinv
+        }
+
+        colnames(Uproj) <- colnames(Udf)
+        U.combined <- rbind(Udf[setdiff(rownames(Udf), rownames(Uproj)), , drop = FALSE], Uproj)
+        return(U.combined)
+    }
+
+    # Multi-view path (named lists)
+    stopifnot(is.list(Vdf), is.list(table.rclr.project))
+    views <- intersect(names(Vdf), names(table.rclr.project))
+    if (!length(views)) stop("[.transform_helper] No overlapping views.")
+
+    # Project per view, then sum contributions in the shared latent space
+    Usum <- NULL
+    ncomp <- ncol(Udf)
+    for (vw in views) {
+        Vvw <- Vdf[[vw]]
+        Tvw <- table.rclr.project[[vw]]
+        stopifnot(is.matrix(Vvw), is.matrix(Tvw))
+
+        common <- intersect(rownames(Vvw), rownames(Tvw))
+        if (length(common) < ncomp) {
+            stop(sprintf("[.transform_helper] View '%s': too few matching features (%d).", vw, length(common)))
+        }
+
+        M <- t(as.matrix(Tvw[common, , drop = FALSE]))
+        V <- as.matrix(Vvw[common, , drop = FALSE])
+
+        # Accumulate per-view U
+        Uvw <- M %*% V
+        if (is.null(Usum)) {
+            Usum <- Uvw
+        } else {
+            # Align rows (samples) by name before summing
+            all_s <- union(rownames(Usum), rownames(Uvw))
+            Utmp <- matrix(0,
+                nrow = length(all_s), ncol = ncol(Udf),
+                dimnames = list(all_s, colnames(Udf))
+            )
+            Utmp[rownames(Usum), ] <- Usum
+            Utmp[rownames(Uvw), ] <- Utmp[rownames(Uvw), ] + Uvw
+            Usum <- Utmp
+        }
+    }
+
+    # Sample dedup (after combining views)
+    if (dedup.samples) {
+        sid <- sub("_\\d+$", "", rownames(Usum))
+        if (any(duplicated(sid))) {
+            Usum <- rowsum(Usum, group = sid, reorder = FALSE) / as.vector(table(sid))
+        } else {
+            rownames(Usum) <- sid
+        }
+    }
+
+    # Scale by S
+    if (length(s.eig)) {
+        Sinv <- diag(1 / s.eig, nrow = length(s.eig))
+        Usum <- Usum %*% Sinv
+    }
+    colnames(Usum) <- colnames(Udf)
+
+    # Merge with training U, avoiding duplicates
+    keep_train <- setdiff(rownames(Udf), rownames(Usum))
+    rbind(Udf[keep_train, , drop = FALSE], Usum)
+}
+
+#' RPCA Table Filtering and Preprocessing
+#'
+#' Internal function that performs filtering and cleanup on a compositional data table
+#' prior to Robust PCA analysis. Removes low-count features/samples, enforces non-zero frequency thresholds,
+#' checks for ID duplication, and returns a matrix suitable for transformation and ordination.
+#'
+#' @param table A matrix or data frame with features as rows and samples as columns.
+#' @param min.sample.count Minimum total count required for a sample to be retained. Default is 0.
+#' @param min.feature.count Minimum total count required for a feature to be retained. Default is 0.
+#' @param min.feature.frequency Minimum percentage (0–100) of samples in which a feature must be non-zero. Default is 0.
+#'
+#' @return A filtered numeric matrix containing non-empty features and samples.
+#' @keywords internal
+#' @noRd
+
+.rpca_table_processing <- function(table,
+                                   min.sample.count = 0,
+                                   min.feature.count = 0,
+                                   min.feature.frequency = 0) {
+    # Ensure the input is a matrix
+    if (is.data.frame(table)) {
+        table <- as.matrix(table)
+    }
+
+    n.features <- nrow(table)
+    n.samples <- ncol(table)
+
+    # Filter features by total count
+    if (!is.null(min.feature.count)) {
+        feature.totals <- rowSums(table, na.rm = TRUE)
+        keep.features <- feature.totals > min.feature.count
+        table <- table[keep.features, , drop = FALSE]
+    }
+
+    # Filter features by frequency across samples
+    if (!is.null(min.feature.frequency)) {
+        freq.threshold <- min.feature.frequency / 100
+        feature.freq <- rowMeans(table > 0, na.rm = TRUE)
+        keep.features <- feature.freq > freq.threshold
+        table <- table[keep.features, , drop = FALSE]
+    }
+
+    # Filter samples by total count
+    if (!is.null(min.sample.count)) {
+        sample.totals <- colSums(table, na.rm = TRUE)
+        keep.samples <- sample.totals > min.sample.count
+        table <- table[, keep.samples, drop = FALSE]
+    }
+
+    # Check for duplicate IDs
+    if (any(duplicated(colnames(table)))) {
+        stop("Data table contains duplicate sample (column) IDs.", call. = FALSE)
+    }
+    if (any(duplicated(rownames(table)))) {
+        stop("Data table contains duplicate feature (row) IDs.", call. = FALSE)
+    }
+
+    # Remove empty rows and columns if sample filtering applied
+    if (!is.null(min.sample.count)) {
+        nonzero.features <- rowSums(table, na.rm = TRUE) > 0
+        nonzero.samples <- colSums(table, na.rm = TRUE) > 0
+        table <- table[nonzero.features, nonzero.samples, drop = FALSE]
+    }
+
+    return(table)
+}
+
+#' Generate a MaskedMatrix from Numeric Input
+#'
+#' Internal helper that ensures input is a 2D numeric matrix and returns a masked version,
+#' replacing non-finite values (e.g., NA, NaN, Inf) with `NA` and recording their positions in a logical mask.
+#'
+#' @param mat A numeric matrix or vector. If a vector, it will be converted to a 1-row matrix.
+#'
+#' @return A list of class \code{"MaskedMatrix"} with two elements:
+#' \describe{
+#'   \item{data}{The original matrix with non-finite values replaced by \code{NA}.}
+#'   \item{mask}{Logical matrix indicating non-finite entries (TRUE if missing).}
+#' }
+#'
+#' @keywords internal
+#' @noRd
+
+.mask_value_only <- function(mat) {
+    # Ensure matrix is at least 2D
+    if (is.vector(mat)) {
+        mat <- matrix(mat, nrow = 1)
+    }
+
+    # Ensure matrix is not more than 2D
+    if (length(dim(mat)) > 2) {
+        stop("Input matrix can only have two dimensions or less")
+    }
+
+    # Generate logical mask: TRUE where values are missing
+    mask <- !is.finite(mat)
+
+    # Create masked matrix
+    masked.mat <- mat
+    masked.mat[!is.finite(mat)] <- NA
+
+    # Return as a masked matrix
+    return(structure(list(
+        data = masked.mat,
+        mask = mask
+    ), class = "MaskedMatrix"))
+}
+
+#' Internal constructor for ordination results
+#' @keywords internal
+#' @noRd
+.ordination_results <- function(method, eigvals, samples, features,
+                                proportion.explained, dist = NULL, metadata = list()) {
+    return(structure(list(
+        method = method,
+        eigvals = eigvals,
+        samples = samples,
+        features = features,
+        proportion.explained = proportion.explained,
+        dist = dist,
+        metadata = metadata
+    ), class = "OrdinationResults"))
+}
+
+#' Internal constructor for a distance matrix object
+#' @keywords internal
+#' @noRd
+.distance_matrix <- function(matrix, ids = NULL, method = "euclidean") {
+    if (!is.matrix(matrix)) stop("Input must be a matrix.")
+    if (!isSymmetric(matrix)) stop("Distance matrix must be symmetric.")
+    if (!is.null(ids)) {
+        if (length(ids) != nrow(matrix)) stop("Length of 'ids' must match matrix dimensions.")
+        rownames(matrix) <- ids
+        colnames(matrix) <- ids
+    }
+    return(structure(list(
+        data = matrix,
+        ids = rownames(matrix),
+        method = method
+    ), class = "DistanceMatrix"))
+}
+
+#' Extract per-experiment assay tables from a MultiAssayExperiment
+#'
+#' @param x A MultiAssayExperiment.
+#' @param experiments Character vector of experiment names (or NULL for all).
+#'
+#' @return A list with `tables`, `experiments`, and `assay_names_used`.
+#'
+#' @keywords internal
+#' @noRd
+.extract_mae_tables <- function(x, experiments = NULL) {
+    exps <- experiments(x)
+
+    if (is.null(experiments)) {
+        experiments <- names(exps)
+    }
+    if (length(experiments) == 0L) {
+        stop("No experiments found in 'x'.", call. = FALSE)
+    }
+
+    assay_names_used <- setNames(character(length(experiments)), experiments)
+    tables <- vector("list", length(experiments))
+    names(tables) <- experiments
+
+    for (i in seq_along(experiments)) {
+        e <- experiments[[i]]
+        exp_se <- exps[[e]]
+        if (is.null(exp_se)) {
+            stop(sprintf("Experiment '%s' not found in 'x'.", e), call. = FALSE)
+        }
+
+        anm <- assayNames(exp_se)
+        default_assay <- if (length(anm)) anm[[1]] else 1L
+
+        assay_names_used[[e]] <- if (is.character(default_assay)) default_assay else as.character(default_assay)
+        tables[[e]] <- assay(exp_se, default_assay)
+    }
+
+    list(
+        tables = tables,
+        experiments = experiments,
+        assay_names_used = assay_names_used
+    )
+}
+
+#' OptSpace back-end (joint & single-view)
+#'
+#' OptSpace-based solvers built on top of \code{vegan::optspace()}.
+#'
+#' This file contains:
+#' \itemize{
+#'   \item \code{.optspace_helper()} — single-view rCLR → OptSpace → biplot
+#'   \item \code{.joint_optspace_solve()} — multi-view joint factorization
+#' }
+#'
+#' @keywords internal
+#' @noRd
+NULL
+
+#' OptSpace-Based Dimensionality Reduction and RPCA Biplot Generation
+#'
+#' Internal function that fits an OptSpace model to a rCLR-transformed compositional table,
+#' reconstructs the low-rank matrix, applies PCA, and constructs an ordination result
+#' capturing sample embeddings, feature loadings, and explained variance. A distance matrix
+#' is also generated using Aitchison geometry.
+#'
+#' @param rclr.table A numeric matrix representing rCLR-transformed compositional data.
+#' @param feature.ids Character vector of feature names (used for row labeling of loadings).
+#' @param sample.ids Character vector of sample names (used for row labeling of embeddings).
+#' @param n.components Integer specifying number of principal components to retain. Default is 3.
+#' @param max.iterations Maximum number of iterations to run OptSpace optimization. Default is 5.
+#'
+#' @return A list with:
+#' \describe{
+#'   \item{ord_res}{An \code{OrdinationResults} object containing PCA scores, loadings, and metadata.}
+#'   \item{dist}{A sample-by-sample \code{DistanceMatrix} object using Aitchison geometry.}
+#'   \item{opt_fit}{The raw OptSpace fit result containing matrices \code{X}, \code{Y}, and \code{S}.}
+#' }
+#'
+#' @keywords internal
+#' @noRd
+
+.optspace_helper <- function(rclr.table,
+                             feature.ids,
+                             sample.ids,
+                             n.components = 3,
+                             max.iterations = 5,
+                             tol = 1e-5,
+                             center = TRUE,
+                             scale = FALSE) {
+    opt.result <- vegan::optspace(
+        x       = rclr.table,
+        ropt    = n.components,
+        niter   = max.iterations,
+        tol     = tol,
+        verbose = FALSE
+    )
+
+    n.components <- ncol(opt.result$S)
+
+    # Reconstruct and re-center matrix
+    X.hat <- opt.result$X %*% opt.result$S %*% t(opt.result$Y)
+    X.hat <- scale(X.hat, center = center, scale = scale)
+    X.hat <- t(scale(t(X.hat), center = center, scale = scale))
+
+    # PCA
+    svd.out <- svd(X.hat)
+    u <- svd.out$u[, 1:n.components, drop = FALSE]
+    s <- svd.out$d[1:n.components]
+    v <- svd.out$v[, 1:n.components, drop = FALSE]
+
+    # Label loadings
+    rename.cols <- paste0("PC", seq_len(n.components))
+    sample.scores <- u
+    feature.scores <- data.frame(v, row.names = feature.ids)
+    feature.scores <- as.matrix(feature.scores)
+    rownames(sample.scores) <- sample.ids
+    colnames(sample.scores) <- rename.cols
+    colnames(feature.scores) <- rename.cols
+
+    # Proportion explained
+    prop.var <- s^2 / sum(svd.out$d^2)
+    names(prop.var) <- rename.cols
+    names(s) <- rename.cols
+
+    # Add PC3 for 2D case
+    if (n.components == 2) {
+        sample.scores <- cbind(sample.scores, PC3 = 0)
+        feature.scores <- cbind(feature.scores, PC3 = 0)
+        sample.scores <- as.matrix(sample.scores)
+        feature.scores <- as.matrix(feature.scores)
+
+        s <- c(s, PC3 = 0)
+        prop.var <- c(prop.var, PC3 = 0)
+        rename.cols <- c(rename.cols, "PC3")
+    }
+
+    # Compute distance in sample PC-space
+    dist.matrix.raw <- as.matrix(dist(u))
+    rownames(dist.matrix.raw) <- sample.ids
+    colnames(dist.matrix.raw) <- sample.ids
+
+    dist.res <- .distance_matrix(dist.matrix.raw,
+        ids = sample.ids,
+        method = "aitchison"
+    )
+
+    ord_res <- .ordination_results(
+        method = "rpca_biplot",
+        eigvals = s,
+        samples = sample.scores,
+        features = feature.scores,
+        proportion.explained = prop.var,
+        dist = dist.matrix.raw,
+        metadata = list(
+            long.method.name = "(Robust Aitchison) RPCA Biplot",
+            run.id = sprintf(
+                "optspace_helper_n.components_%d.max.iterations_%d",
+                n.components, max.iterations
+            )
+        )
+    )
+
+    return(list(
+        ord_res = ord_res,
+        dist    = dist.res,
+        opt_fit = opt.result
+    ))
+}
+
+#' Joint OptSpace Optimization Across Multiple Train/Test Splits
+#'
+#' Internal function that performs joint matrix factorization using OptSpace across a set of paired train/test compositional tables.
+#' Stacks training matrices horizontally, applies low-rank optimization, splits feature loadings per table,
+#' and evaluates projection error on test data via Frobenius norm.
+#'
+#' @param train.test.pairs A list of paired matrices where each element is a two-item list: \code{[[test, train]]}.
+#' @param n.components Integer specifying number of components to retain in the OptSpace model.
+#' @param max.iter Maximum number of optimization iterations. Default is 50.
+#' @param verbose Logical; whether to print progress messages. Default is \code{TRUE}.
+#'
+#' @return A list with:
+#' \describe{
+#'   \item{U}{Shared sample embedding matrix across all input tables.}
+#'   \item{S}{Singular values matrix from OptSpace decomposition.}
+#'   \item{V_list}{List of per-table feature loading matrices.}
+#'   \item{dists}{Matrix of reconstruction errors (rows: error type, columns: tables).}
+#' }
+#'
+#' @keywords internal
+#' @noRd
+
+.joint_optspace_solve <- function(train.test.pairs, n.components,
+                                  max.iter = 50, verbose = TRUE,
+                                  tol = 1e-5) {
+    # Prepare lists to hold training matrices and dimensions
+    train.matrices <- list()
+    test.matrices <- list()
+    dims <- list()
+
+    for (pair in train.test.pairs) {
+        test.mat <- pair[[1]]
+        train.mat <- pair[[2]]
+        train.matrices <- append(train.matrices, list(train.mat))
+        test.matrices <- append(test.matrices, list(test.mat))
+        dims <- append(dims, list(dim(train.mat)))
+    }
+
+    # Stack training matrices horizontally
+    train.stacked <- do.call(cbind, train.matrices)
+
+    # Apply OptSpace to stacked matrix via vegan
+    if (verbose) {
+        message("Running vegan::optspace() on stacked training data...")
+    }
+
+    fit <- vegan::optspace(
+        x       = train.stacked,
+        ropt    = n.components,
+        niter   = max.iter,
+        tol     = tol,
+        verbose = verbose
+    )
+
+    # Extract sample loadings
+    U.shared <- fit$X
+    S.shared <- fit$S
+
+    # Split V back into per-table pieces
+    feat.indices <- cumsum(vapply(dims, function(d) d[2], numeric(1)))
+    feat.starts <- c(1, head(feat.indices, -1) + 1)
+    V_list <- Map(
+        function(start, end) fit$Y[start:end, , drop = FALSE],
+        feat.starts, feat.indices
+    )
+
+    # Reconstruction error per view (mean_CV / std_CV placeholder)
+    n_views <- length(test.matrices)
+    dists <- matrix(0, nrow = 2, ncol = n_views)
+
+    for (i in seq_along(test.matrices)) {
+        V.k <- V_list[[i]]
+        test.mat <- test.matrices[[i]]
+
+        # Project test samples: U.test = test × V
+        U.test <- as.matrix(test.mat) %*% V.k
+        U.test <- sweep(U.test, 2, diag(S.shared), "/")
+        recon.test <- U.test %*% S.shared %*% t(V.k)
+
+        # Center for consistency
+        recon.test <- scale(recon.test, center = TRUE, scale = FALSE)
+        recon.test <- t(scale(t(recon.test), center = TRUE, scale = FALSE))
+
+        error <- test.mat - recon.test
+        error[is.na(error)] <- 0
+        error.val <- norm(error, "F") / sqrt(sum(!is.na(test.mat)))
+
+        dists[1, i] <- error.val
+        dists[2, i] <- 0
+    }
+
+    return(list(U = U.shared, S = S.shared, V_list = V_list, dists = dists))
+}
diff --git a/R/mia.R b/R/mia.R
index f0f419f19..fc62a795b 100644
--- a/R/mia.R
+++ b/R/mia.R
@@ -27,7 +27,7 @@ NULL
 #' The datasets represent instances of the TreeSummarizedExperiment and
 #' MultiAssayExperiment containers and can serve as tools to practice the
 #' mia functionality.
-#' 
+#'
 #' Currently, the following datasets are available:
 #' \itemize{
 #'   \item{\code{\link{dmn_se}}: A SummarizedExperiment with 130 features and
@@ -38,6 +38,8 @@ NULL
 #'     and 3 samples}
 #'   \item{\code{\link{GlobalPatterns}}: A TreeSummarizedExperiment with 19216
 #'     features and 26 samples}
+#'   \item{\code{\link{ibdmdb_2omic_demo}}: A compact
+#'     \code{MultiAssayExperiment} demo dataset with 2 experiments (MGX + MTX)}
 #'   \item{\code{\link{HintikkaXOData}}: A MultiAssayExperiment with 3
 #'     experiments (microbiota, metabolites and biomarkers)}
 #'   \item{\code{\link{peerj13075}}: A TreeSummarizedExperiment with 674
@@ -47,19 +49,19 @@ NULL
 #'   \item{\code{\link{Tito2024QMP}}: A TreeSummarizedExperiment with 676
 #'     features and 589 samples}
 #' }
-#' 
+#'
 #' @name mia-datasets
 #' @docType data
 #' @keywords datasets
-#' 
+#'
 #' @examples
 #' # Load dataset from mia
 #' library(mia)
 #' data("GlobalPatterns", package = "mia")
-#' 
+#'
 #' # In this case, the dataset is a TreeSE, so it is renamed as tse
 #' tse <- GlobalPatterns
-#' 
+#'
 #' # Print summary
 #' tse
 NULL
@@ -70,17 +72,17 @@ NULL
 #' GlobalPatterns compared the microbial communities from 25 environmental
 #' samples
 #' and three known "mock communities" at a an average depth of 3.1 million reads
-#' per sample. Authors reproduced diversity patterns seen in many other 
+#' per sample. Authors reproduced diversity patterns seen in many other
 #' published studies, while investigating technical bias by applying the same
 #' techniques to simulated microbial communities of known composition. Special
 #' thanks are given to J. Gregory Caporaso for providing the OTU-clustered data
 #' files for inclusion in the \pkg{phyloseq} package, from which this data was
 #' converted to \code{TreeSummarizedExperiment}.
-#' 
+#'
 #' @format A TreeSummarizedExperiment with 19216 features and 26 samples. The
 #' rowData contains taxonomic information at Kingdom, Phylum, Class, Order,
 #' Family, Genus and Species levels. The colData includes:
-#' 
+#'
 #' \describe{
 #'   \item{X.SampleID}{Sample ID taken from the corresponding study}
 #'   \item{Primer}{primer used for sequencing}
@@ -102,7 +104,7 @@ NULL
 #' @seealso \code{\link{mia-datasets}}
 #' @author Caporaso, J. G., et al.
 #' @references
-#' Caporaso, J. G., et al. (2011). 
+#' Caporaso, J. G., et al. (2011).
 #' Global patterns of 16S rRNA diversity at a depth of millions of sequences per
 #' sample.
 #' PNAS, 108, 4516-4522. \url{https://doi.org/10.1073/pnas.1000080107}
@@ -121,7 +123,7 @@ NULL
 #'
 #' @format A TreeSummarizedExperiment with 553 features and 280 samples. The
 #' rowData contains taxonomic information at Genus level. The colData includes:
-#' 
+#'
 #' \describe{
 #'   \item{Enterotype}{enterotype the sample belongs to (1, 2 and 3)}
 #'   \item{Sample_ID}{sample ID of samples from all studies}
@@ -136,7 +138,7 @@ NULL
 #'   \item{ClinicalStatus}{participant's clinical status (healthy, obese, CD,
 #'     UC and elderly)}
 #' }
-#' 
+#'
 #' @name enterotype
 #' @docType data
 #' @keywords datasets
@@ -150,18 +152,18 @@ NULL
 #' Arumugam, M., et al. (2014). Addendum: Enterotypes of the human gut
 #' microbiome.
 #' Nature 506, 516 (2014). \url{https://doi.org/10.1038/nature13075}
-#' 
+#'
 #' @source \url{http://www.bork.embl.de/Docu/Arumugam_et_al_2011/downloads.html}
 NULL
 
 #' Human esophageal community from 3 individuals
-#' 
+#'
 #' This small dataset from a human esophageal community includes 3 samples from
 #' 3 human adults based on biopsies analysed with 16S rDNA PCR. The 16S rRNA
 #' sequence processing is provided in the mothur wiki from the link below. It
 #' was
 #' converted into a TreeSummarizedExperiment from the \pkg{phyloseq} package.
-#' 
+#'
 #' @format A TreeSummarizedExperiment with 58 features and 3 samples. The
 #' rowData contains no taxonomic information. The colData is empty.
 #'
@@ -171,9 +173,9 @@ NULL
 #' @usage data(esophagus)
 #' @seealso \code{\link{mia-datasets}}
 #' @author Pei et al. \email{zhiheng.pei@@med.nyu.edu}.
-#' @references 
+#' @references
 #' Pei, Z., Bini, E. J., Yang, L., Zhou, M., Francois, F., & Blaser, M. J.
-#' (2004). 
+#' (2004).
 #' Bacterial biota in the human distal esophagus.
 #' Proceedings of the National Academy of Sciences of the United States of
 #' America, 101(12), 4250-4255.
@@ -192,14 +194,14 @@ NULL
 #' dmn_se is a dataset on twins' microbiome where samples are stratified by
 #' their community composition through Dirichlet Multinomial Mixtures (DMM). It
 #' was derived from the \pkg{DirichletMultinomial} package.
-#' 
+#'
 #' @format A SummarizedExperiment with 130 features and 278 samples. The
 #' rowData contains no taxonomic information. The colData includes:
-#' 
+#'
 #' \describe{
 #'   \item{pheno}{participant's weight condition (Lean, Overwt and Obese)}
 #' }
-#' 
+#'
 #' @name dmn_se
 #' @docType data
 #' @aliases twins
@@ -215,23 +217,23 @@ NULL
 #' PLoS ONE 7(2): e30126. \url{https://doi.org/10.1371/journal.pone.0030126}
 #'
 #' Turnbaugh PJ, Hamady M, Yatsunenko T, Cantarel BL, Duncan A, et al. (2009).
-#' A core gut microbiome in obese and lean twins. Nature 457: 480–484. 
+#' A core gut microbiome in obese and lean twins. Nature 457: 480–484.
 #' \url{https://doi.org/10.1038/nature07540}
 NULL
 
 #' Skin microbial profiles 58 genetically unrelated individuals
-#' 
+#'
 #' peerj13075 includes skin microbial profiles of 58 volunteers with multiple
 #' factors. 16S r-RNA sequencing of V3-V4 regions was done to generate millions
 #' of read using illumina platform. A standard bioinformatic and statistical
 #' analysis done to explore skin bacterial diversity and its association with
 #' age, diet, geographical locations. The authors investigated significant
 #' association of skin microbiota with individual’s geographical location.
-#' 
+#'
 #' @format A TreeSummarizedExperiment with 674 features and 58 samples. The
 #' rowData contains taxonomic information at kingdom, phylum, class, order,
 #' family and genus level. The colData includes:
-#' 
+#'
 #' \describe{
 #'   \item{Sample}{sample ID}
 #'   \item{Geographical_location}{city where participant lives (Ahmednagar,
@@ -248,18 +250,18 @@ NULL
 #' @seealso \code{\link{mia-datasets}}
 #' @author Potbhare, R., et al.
 #' @references
-#' Potbhare, R., RaviKumar, A., Munukka, E., Lahti, L., & Ashma, R. (2022). 
+#' Potbhare, R., RaviKumar, A., Munukka, E., Lahti, L., & Ashma, R. (2022).
 #' Skin microbiota diversity among genetically unrelated individuals of Indian
-#' origin. 
+#' origin.
 #' PeerJ, 10, e13075. \url{https://doi.org/10.7717/peerj.13075}
 #' Supplemental information includes OTU table and taxonomy table
-#' publicly-accessible from: 
+#' publicly-accessible from:
 #' \url{https://www.doi.org/10.7717/peerj.13075/supp-1}
 #' \url{https://www.doi.org/10.7717/peerj.13075/supp-2}
 NULL
 
 #' Multiomics dataset from 40 rat samples
-#' 
+#'
 #' HintikkaXO is a multiomics dataset from a rat experiment studying effect of
 #' fat and prebiotics in diet. It contains high-throughput profiling data from
 #' 40 rat samples, including 39 biomarkers, 38 metabolites (NMR), and 12706 OTUs
@@ -267,7 +269,7 @@ NULL
 #' High/Low fat diet and xylo-oligosaccaride supplementation. Column metadata is
 #' common for all experiments (microbiota, metabolites, biomarkers) and is
 #' described below.
-#' 
+#'
 #' @format A MultiAssayExperiment with 3 experiments (microbiota, metabolites
 #' and
 #' biomarkers). rowData of the microbiota experiment contains taxonomic
@@ -276,7 +278,7 @@ NULL
 #' metabolites
 #' and biomarkers experiments contain 38 NMR metabolites and 39 biomarkers,
 #' respectively. The colData includes:
-#' 
+#'
 #' \describe{
 #'   \item{Sample}{Sample ID (character)}
 #'   \item{Rat}{Rat ID (factor)}
@@ -285,7 +287,7 @@ NULL
 #'   \item{Fat}{Fat in Diet (factor; Low/High)}
 #'   \item{XOS}{XOS Diet Supplement (numeric; 0/1)}
 #' }
-#' 
+#'
 #' @name HintikkaXOData
 #' @docType data
 #' @keywords datasets
@@ -293,16 +295,16 @@ NULL
 #' @seealso \code{\link{mia-datasets}}
 #' @author Hintikka L et al.
 #' @references
-#' Hintikka L et al. (2021): Xylo-oligosaccharides in prevention of hepatic 
-#' steatosis and adipose tissue inflammation: associating taxonomic and 
-#' metabolomic patterns in fecal microbiota with biclustering. International 
+#' Hintikka L et al. (2021): Xylo-oligosaccharides in prevention of hepatic
+#' steatosis and adipose tissue inflammation: associating taxonomic and
+#' metabolomic patterns in fecal microbiota with biclustering. International
 #' Journal of Environmental Research and Public Health 18(8):4049.
 #' \url{https://doi.org/10.3390/ijerph18084049}
-#' 
+#'
 NULL
 
 #'  Gut microbiota profiles of 27 individuals with ADHD and healthy controls
-#' 
+#'
 #' Tengeler2020 includes gut microbiota profiles of 27 persons with ADHD. A
 #' standard bioinformatic and statistical analysis done to demonstrate that
 #' altered microbial composition could be a driver of altered brain structure
@@ -313,7 +315,7 @@ NULL
 #' @format A TreeSummarizedExperiment with 151 features and 27 samples. The
 #' rowData contains taxonomic information at Kingdom, Phylum, Class, Order,
 #' Family and Genus level. The colData includes:
-#' 
+#'
 #' \describe{
 #'   \item{patient_status}{clinical status of the patient (ADHD or Control)}
 #'   \item{cohort}{cohort to which the patient belongs (Cohort_1, Cohort_2 and
@@ -321,7 +323,7 @@ NULL
 #'   \item{patient_status_vs_cohort}{combination of patient_status and cohort}
 #'   \item{sample_name}{unique sample ID}
 #' }
-#' 
+#'
 #' @name Tengeler2020
 #' @docType data
 #' @keywords datasets
@@ -329,42 +331,42 @@ NULL
 #' @seealso \code{\link{mia-datasets}}
 #' @author A.C. Tengeler, et al.
 #' @references
-#' Tengeler, A.C., Dam, S.A., Wiesmann, M. et al. 
+#' Tengeler, A.C., Dam, S.A., Wiesmann, M. et al.
 #' Gut microbiota from persons with attention-deficit/hyperactivity disorder
-#' affects the brain in mice. 
+#' affects the brain in mice.
 #' Microbiome 8, 44 (2020). \url{https://doi.org/10.1186/s40168-020-00816-x}
-#' 
-#' Supplemental information includes Home-cage activity, methods, results and imaging parameters and publicly-accessible from: 
+#'
+#' Supplemental information includes Home-cage activity, methods, results and imaging parameters and publicly-accessible from:
 #' \url{https://static-content.springer.com/esm/art%3A10.1186%2Fs40168-020-00816-x/MediaObjects/40168_2020_816_MOESM1_ESM.docx}
 #' \url{https://static-content.springer.com/esm/art%3A10.1186%2Fs40168-020-00816-x/MediaObjects/40168_2020_816_MOESM2_ESM.docx}
 #' \url{https://static-content.springer.com/esm/art%3A10.1186%2Fs40168-020-00816-x/MediaObjects/40168_2020_816_MOESM3_ESM.docx}
-#' 
+#'
 NULL
 
 #' Fecal microbiota samples from 589 patients across different colorectal
 #' cancer stages
 #'
-#' The study combined Quantitative Microbiome Profiling (QMP) with 
-#' extensive patient phenotyping from a group of 589 colorectal cancer (CRC) 
-#' patients, advanced adenoma (AA) patients, and healthy controls. 
+#' The study combined Quantitative Microbiome Profiling (QMP) with
+#' extensive patient phenotyping from a group of 589 colorectal cancer (CRC)
+#' patients, advanced adenoma (AA) patients, and healthy controls.
 #' By implementing confounder control and quantitative profiling methods,
-#' the study 
+#' the study
 #' was able to reveal potential misleading associations between microbial
-#' markers 
+#' markers
 #' and colorectal cancer development that were driven by other factors like
-#' intestinal 
+#' intestinal
 #' inflammation, rather than the cancer diagnosis itself.
 #'
-#' @format A TreeSummarizedExperiment with 676 features and 589 samples. 
+#' @format A TreeSummarizedExperiment with 676 features and 589 samples.
 #' The rowData contains species. The colData includes:
-#' 
+#'
 #' \describe{
 #'   \item{sampleID}{(character) Sample ID from the corresponding study}
 #'   \item{diagnosis}{(factor) Diagnosis type, with possible values: "ADE"
-#'   (advanced adenoma), 
+#'   (advanced adenoma),
 #'   "CRC" (colorectal cancer), "CTL" (control)}
 #'   \item{colonoscopy}{(factor) Colonoscopy result, with possible values:
-#'   "FIT_Positive", 
+#'   "FIT_Positive",
 #'   "familial_risk_familial_CRC_FCC", "familial_risk_no", "abdomil_complaints"}
 #' }
 #'
@@ -373,16 +375,59 @@ NULL
 #' @keywords datasets
 #' @usage data(Tito2024QMP)
 #' @seealso \code{\link{mia-datasets}}
-#' @author 
+#' @author
 #' Shadman Ishraq
 #' @references
 #' Raúl Y. Tito, Sara Verbandt, Marta Aguirre Vazquez, Leo Lahti, Chloe
 #' Verspecht, Verónica Lloréns-Rico, Sara Vieira-Silva,
 #' Janine Arts, Gwen Falony, Evelien Dekker, Joke Reumers, Sabine Tejpar &
-#' Jeroen Raes (2024). 
+#' Jeroen Raes (2024).
 #' Microbiome confounders and quantitative profiling challenge predicted
-#' microbial targets in colorectal cancer development. 
-#' Nature Medicine,30, 1339-1348. 
+#' microbial targets in colorectal cancer development.
+#' Nature Medicine,30, 1339-1348.
 #' \url{https://doi.org/10.1038/s41591-024-02963-2}
-#' 
+#'
+NULL
+
+#' IBDMDB 2-omic demo dataset (MGX + MTX)
+#'
+#' A compact example derived from the Integrative Human Microbiome Project (iHMP)
+#' Inflammatory Bowel Disease (IBD) cohort.
+#'
+#' The dataset \code{ibdmdb_2omic_demo} is a named list with:
+#' \itemize{
+#'   \item \code{se_mgx}: metagenomic taxonomic profiles (MGX)
+#'   \item \code{se_mtx}: metatranscriptomic taxonomic profiles (MTX)
+#'   \item \code{mae2}: a \link[MultiAssayExperiment]{MultiAssayExperiment}
+#'     containing both experiments
+#' }
+#'
+#' These compact objects are intended for quick examples and vignettes.
+#' Load with:
+#' \code{data("ibdmdb_2omic_demo")}.
+#'
+#' @name ibdmdb_2omic_demo
+#' @docType data
+#' @keywords datasets
+#' @usage data("ibdmdb_2omic_demo")
+#'
+#' @format A \code{MultiAssayExperiment} with two experiments:
+#' \describe{
+#'   \item{se_mgx}{A \code{SummarizedExperiment} with MGX abundance data.}
+#'   \item{se_mtx}{A \code{SummarizedExperiment} with MTX abundance data.}
+#'   \item{mae2}{A \code{MultiAssayExperiment} containing both experiments.}
+#' }
+#'
+#' @source Prepared by \code{inst/scripts/prepare_ibdmdb_demo.R}. The script is for
+#'   developers and downloads publicly available IBDMDB/HMP2 taxonomic profiles
+#'   and metadata from Zenodo into a local cache (e.g. \code{tools/cache/}),
+#'   then subsets and filters the raw data to create a small example suitable
+#'   for vignettes.
+#'
+#' @references
+#' Lloyd-Price J, Arze C, Ananthakrishnan AN, et al.
+#' \emph{Multi-omics of the gut microbial ecosystem in inflammatory bowel
+#' diseases.}
+#' Nature 569, 655–662 (2019).
+#' \doi{10.1038/s41586-019-1237-9}
 NULL
\ No newline at end of file
diff --git a/data/ibdmdb_2omic_demo.rda b/data/ibdmdb_2omic_demo.rda
new file mode 100644
index 000000000..84770ee71
Binary files /dev/null and b/data/ibdmdb_2omic_demo.rda differ
diff --git a/inst/scripts/prepare_ibdmdb_demo.R b/inst/scripts/prepare_ibdmdb_demo.R
new file mode 100644
index 000000000..92fd922b6
--- /dev/null
+++ b/inst/scripts/prepare_ibdmdb_demo.R
@@ -0,0 +1,277 @@
+# Build compact demo objects (.rda) for IBDMDB examples/vignettes.
+#
+# Produces:
+#   data/ibdmdb_2omic_demo.rda  (ibdmdb_2omic_demo)
+#
+# Source raw inputs from inst/extdata and pre-process for speed/size.
+#
+# This script is for DEVELOPERS only. It is NOT run during R CMD check.
+
+message("== IBDMDB demo data preparation ==")
+
+# ------------------------------------------------------------------------------
+# Config
+# ------------------------------------------------------------------------------
+
+# This script is for DEVELOPERS only.
+# It downloads large raw files into a local cache (NOT committed to git),
+# then creates a compact demo dataset in data/ibdmdb_2omic_demo.rda.
+# It is NOT run during R CMD check.
+
+cache_dir <- file.path("tools", "cache", "mia_ibdmdb_cache")
+dir.create(cache_dir, recursive = TRUE, showWarnings = FALSE)
+
+# Stable download URLs (Zenodo)
+url_meta <- "https://zenodo.org/records/18280405/files/hmp2_metadata_2018-08-20.csv"
+url_mtx  <- "https://zenodo.org/records/18280535/files/ecs_relab.tsv"
+url_mgx  <- "https://zenodo.org/records/18280521/files/taxonomic_profiles_mgx.tsv"
+
+# Local cached file paths (downloaded if missing)
+f_mgx  <- file.path(cache_dir, "taxonomic_profiles_mgx.tsv")
+f_mtx  <- file.path(cache_dir, "ecs_relab.tsv")
+f_meta <- file.path(cache_dir, "hmp2_metadata_2018-08-20.csv")
+
+# Prevalence thresholds (fraction of samples)
+prev_mgx_frac <- 0.05
+prev_mtx_frac <- 0.02
+
+# Cap feature counts for speed/size
+cap_mgx <- 800L
+cap_mtx <- 800L
+max_samples <- 60L
+
+# ------------------------------------------------------------------------------
+# Dependencies
+# ------------------------------------------------------------------------------
+
+need <- c("data.table", "matrixStats", "SummarizedExperiment", "MultiAssayExperiment",
+          "S4Vectors")
+miss <- setdiff(need, rownames(installed.packages()))
+if (length(miss)) {
+    stop(
+        "Missing packages: ",
+        paste(miss, collapse = ", "),
+        ". Install them, then re-run this script."
+    )
+}
+
+library(data.table)
+library(matrixStats)
+library(SummarizedExperiment)
+library(MultiAssayExperiment)
+library(S4Vectors)
+
+# ------------------------------------------------------------------------------
+# Helpers
+# ------------------------------------------------------------------------------
+
+download_if_missing <- function(url, dest) {
+    if (file.exists(dest)) return(invisible(dest))
+    message("Downloading: ", basename(dest))
+    utils::download.file(url, dest, mode = "wb", quiet = FALSE)
+    invisible(dest)
+}
+
+read_ibdmdb_tsv <- function(path) {
+    stopifnot(file.exists(path))
+    first <- readLines(path, n = 200L, warn = FALSE)
+    comment_idx <- which(grepl("^#", first))
+    if (length(comment_idx) == 0L) {
+        stop("No commented header line found in: ", path)
+    }
+    
+    header_line <- first[max(comment_idx)]
+    header_line <- sub("^#\\s*", "", header_line)
+    header_line <- sub("^\ufeff", "", header_line)
+    header_vec  <- strsplit(header_line, "\t", fixed = TRUE)[[1]]
+    header_vec  <- gsub('^"|"$', "", header_vec)
+    
+    dt <- data.table::fread(
+        path,
+        skip   = length(comment_idx),
+        header = FALSE,
+        sep    = "\t",
+        quote  = "",
+        fill   = TRUE
+    )
+    if (ncol(dt) < length(header_vec)) {
+        stop(sprintf(
+            "Header columns (%d) > data columns (%d) in %s",
+            length(header_vec), ncol(dt), path
+        ))
+    }
+    if (ncol(dt) > length(header_vec)) {
+        dt <- dt[, seq_len(length(header_vec)), with = FALSE]
+    }
+    data.table::setnames(dt, header_vec)
+    dt
+}
+
+to_matrix <- function(dt) {
+    rn  <- dt[[1]]
+    mat <- as.matrix(dt[, -1, with = FALSE])
+    rownames(mat) <- rn
+    storage.mode(mat) <- "numeric"
+    mat[is.na(mat)] <- 0
+    mat
+}
+
+dedup_rownames <- function(mat) {
+    stopifnot(!is.null(rownames(mat)))
+    rn <- rownames(mat)
+    empty_or_na <- rn == "" | is.na(rn)
+    if (any(empty_or_na)) {
+        rn[empty_or_na] <- paste0("feat_", seq_len(sum(empty_or_na)))
+    }
+    rownames(mat) <- make.unique(as.character(rn), sep = "_")
+    mat
+}
+
+sanitize_matrix <- function(mat) {
+    mat <- as.matrix(mat)
+    storage.mode(mat) <- "numeric"
+    mat[!is.finite(mat)] <- 0
+    mat
+}
+
+cap_by_var <- function(M, cap) {
+    if (nrow(M) <= cap) return(M)
+    ord <- order(matrixStats::rowVars(M), decreasing = TRUE)
+    M[ord[seq_len(cap)], , drop = FALSE]
+}
+
+read_metadata <- function(path) {
+    stopifnot(file.exists(path))
+    read.csv(path, stringsAsFactors = FALSE, check.names = FALSE)
+}
+
+make_SE <- function(mat, meta_df = NULL, assay_name = "counts") {
+    if (is.null(meta_df)) {
+        return(SummarizedExperiment::SummarizedExperiment(
+            assays  = setNames(list(mat), assay_name),
+            colData = S4Vectors::DataFrame(row.names = colnames(mat))
+        ))
+    }
+    
+    md <- as.data.frame(meta_df, stringsAsFactors = FALSE, check.names = FALSE)
+    
+    overlaps <- vapply(
+        md,
+        function(col) sum(as.character(col) %in% colnames(mat)),
+        numeric(1)
+    )
+    best <- names(overlaps)[which.max(overlaps)]
+    
+    if (length(best) == 0 || overlaps[[best]] == 0) {
+        return(SummarizedExperiment::SummarizedExperiment(
+            assays  = setNames(list(mat), assay_name),
+            colData = S4Vectors::DataFrame(row.names = colnames(mat))
+        ))
+    }
+    
+    md_sub <- md[md[[best]] %in% colnames(mat), , drop = FALSE]
+    md_sub <- md_sub[!duplicated(md_sub[[best]]), , drop = FALSE]
+    
+    rownames(md_sub) <- as.character(md_sub[[best]])
+    md_sub <- md_sub[colnames(mat), , drop = FALSE]
+    
+    if (anyDuplicated(rownames(md_sub))) {
+        rownames(md_sub) <- make.unique(rownames(md_sub), sep = "_dup")
+    }
+    stopifnot(identical(rownames(md_sub), colnames(mat)))
+    
+    SummarizedExperiment::SummarizedExperiment(
+        assays  = setNames(list(mat), assay_name),
+        colData = S4Vectors::DataFrame(md_sub)
+    )
+}
+
+# ------------------------------------------------------------------------------
+# I/O guards + download raw inputs
+# ------------------------------------------------------------------------------
+
+if (!dir.exists("data")) dir.create("data", recursive = TRUE)
+
+download_if_missing(url_mgx,  f_mgx)
+download_if_missing(url_mtx,  f_mtx)
+download_if_missing(url_meta, f_meta)
+
+missing_files <- c(f_mgx, f_mtx, f_meta)[!file.exists(c(f_mgx, f_mtx, f_meta))]
+if (length(missing_files)) {
+    stop(
+        "Missing cached file(s):\n- ",
+        paste(missing_files, collapse = "\n- "),
+        "\n\nDownload failed or paths are wrong."
+    )
+}
+
+# ------------------------------------------------------------------------------
+# Metadata
+# ------------------------------------------------------------------------------
+
+meta_full <- read_metadata(f_meta)
+
+# ------------------------------------------------------------------------------
+# Prepare 2-omic (MGX + MTX)
+# ------------------------------------------------------------------------------
+
+message("Preparing 2-omic MGX + MTX demo ...")
+
+dt_mgx <- read_ibdmdb_tsv(f_mgx)
+dt_mtx <- read_ibdmdb_tsv(f_mtx)
+
+M_mgx <- sanitize_matrix(dedup_rownames(to_matrix(dt_mgx)))
+M_mtx <- sanitize_matrix(dedup_rownames(to_matrix(dt_mtx)))
+
+shared <- intersect(colnames(M_mgx), colnames(M_mtx))
+shared <- sort(unique(shared[nchar(shared) > 0]))
+if (length(shared) < 20) {
+    warning("Few shared samples for 2-omic: ", length(shared), " (keeping anyway).")
+}
+M_mgx <- M_mgx[, shared, drop = FALSE]
+M_mtx <- M_mtx[, shared, drop = FALSE]
+
+n_samp <- ncol(M_mgx)
+keep_mgx <- rowSums(M_mgx > 0) >= ceiling(prev_mgx_frac * n_samp)
+keep_mtx <- rowSums(M_mtx > 0) >= ceiling(prev_mtx_frac * n_samp)
+M_mgx <- M_mgx[keep_mgx, , drop = FALSE]
+M_mtx <- M_mtx[keep_mtx, , drop = FALSE]
+
+M_mgx <- M_mgx[, colSums(M_mgx) > 0, drop = FALSE]
+M_mtx <- M_mtx[, colSums(M_mtx) > 0, drop = FALSE]
+
+shared2 <- intersect(colnames(M_mgx), colnames(M_mtx))
+shared2 <- sort(unique(shared2))
+set.seed(1)
+if (length(shared2) > max_samples) {
+    shared2 <- sort(sample(shared2, max_samples))
+}
+M_mgx <- M_mgx[, shared2, drop = FALSE]
+M_mtx <- M_mtx[, shared2, drop = FALSE]
+
+M_mgx <- cap_by_var(M_mgx, cap_mgx)
+M_mtx <- cap_by_var(M_mtx, cap_mtx)
+
+meta_df <- meta_full
+se_mgx  <- make_SE(M_mgx, meta_df, assay_name = "mgx")
+se_mtx  <- make_SE(M_mtx, meta_df, assay_name = "mtx")
+
+mae2 <- MultiAssayExperiment::MultiAssayExperiment(
+    experiments = list(MGX = se_mgx, MTX = se_mtx)
+)
+
+if (!is.null(SummarizedExperiment::colData(se_mgx))) {
+    MultiAssayExperiment::colData(mae2) <- SummarizedExperiment::colData(se_mgx)
+}
+
+# Save only one dataset object
+ibdmdb_2omic_demo <- mae2
+
+save(
+    ibdmdb_2omic_demo,
+    file     = file.path("data", "ibdmdb_2omic_demo.rda"),
+    compress = "xz"
+)
+
+message("Saved: data/ibdmdb_2omic_demo.rda")
+message("== Done. Re-run devtools::document(); devtools::check(); BiocCheck::BiocCheck(). ==")
\ No newline at end of file
diff --git a/man/getJointRPCA.Rd b/man/getJointRPCA.Rd
new file mode 100644
index 000000000..ca1b1cd79
--- /dev/null
+++ b/man/getJointRPCA.Rd
@@ -0,0 +1,68 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/getJointRPCA.R
+\name{getJointRPCA}
+\alias{getJointRPCA}
+\title{Run Joint-RPCA and store embedding in reducedDim}
+\arguments{
+\item{altexp}{Optional name of an alternative experiment. If supplied,
+Joint-RPCA is run on \code{altExp(x, altexp)} instead of \code{x}.}
+
+\item{name}{Character scalar giving the name of the \code{reducedDim} slot
+in which to store the joint sample embedding. Defaults to \code{"JointRPCA"}.}
+
+\item{x}{Input object: \code{MultiAssayExperiment}, \code{SummarizedExperiment}
+(including \code{TreeSummarizedExperiment}), list of matrices, or single matrix.}
+
+\item{experiments}{Character vector of experiment names to extract when \code{x}
+is a \code{MultiAssayExperiment} (i.e. \code{names(experiments(x))}).
+If \code{NULL}, all experiments are used.
+
+For \code{MultiAssayExperiment} inputs, \strong{one assay per experiment} is
+used: by default the first assay returned by
+\code{assayNames()} (or index \code{1L} if unnamed).
+The actually used assay names are recorded in \code{$assay_names_used} in
+the result. If you need a different assay (e.g. \code{"relab"} instead of
+\code{"counts"}), subset or reorder assays in \code{x} before calling
+\code{jointRPCAuniversal()}.}
+
+\item{transform}{Character string specifying preprocessing applied to each
+input table before ordination. Use \code{"rclr"} to apply the robust CLR
+transform (via \code{decostand(method = "rclr")}) or \code{"none"} to
+disable transformation (data are used as-is after masking non-finite values).}
+
+\item{optspace.tol}{Numeric tolerance passed to \code{optspace()}.}
+
+\item{center}{Logical; whether to center the reconstructed low-rank matrix
+(double-centering) prior to SVD/PCA steps.}
+
+\item{scale}{Logical; whether to scale the reconstructed matrix prior to
+SVD/PCA steps. Defaults to \code{FALSE}.}
+
+\item{...}{Additional arguments passed to \code{.joint_rpca()}.}
+}
+\value{
+The input object \code{x} with a new entry in
+\code{reducedDim(x, name)} containing the Joint-RPCA sample embedding.
+The full Joint-RPCA result (including distances, cross-validation
+statistics and transformed tables) is stored in
+\code{metadata(x)$JointRPCA[[name]]}.
+
+Output from \code{.joint_rpca()} with extra fields when \code{x} is a
+\code{MultiAssayExperiment}:
+\itemize{
+\item \code{$experiment_names}: character vector of experiments used.
+\item \code{$assay_names_used}: named character vector giving, for each
+experiment, the assay name that was used (typically the first in
+\code{assayNames()}).
+}
+}
+\description{
+Run Joint-RPCA and store embedding in reducedDim
+
+Universal Joint RPCA Wrapper
+}
+\details{
+Convenience wrapper that runs Joint Robust PCA on one or more compositional
+tables and stores the resulting sample embedding in \code{reducedDim(x, name)},
+similar to \code{runMDS()} and \code{runPCA()}.
+}
diff --git a/man/ibdmdb_2omic_demo.Rd b/man/ibdmdb_2omic_demo.Rd
new file mode 100644
index 000000000..6d490683b
--- /dev/null
+++ b/man/ibdmdb_2omic_demo.Rd
@@ -0,0 +1,49 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/mia.R
+\docType{data}
+\name{ibdmdb_2omic_demo}
+\alias{ibdmdb_2omic_demo}
+\title{IBDMDB 2-omic demo dataset (MGX + MTX)}
+\format{
+A \code{MultiAssayExperiment} with two experiments:
+\describe{
+\item{se_mgx}{A \code{SummarizedExperiment} with MGX abundance data.}
+\item{se_mtx}{A \code{SummarizedExperiment} with MTX abundance data.}
+\item{mae2}{A \code{MultiAssayExperiment} containing both experiments.}
+}
+}
+\source{
+Prepared by \code{inst/scripts/prepare_ibdmdb_demo.R}. The script is for
+developers and downloads publicly available IBDMDB/HMP2 taxonomic profiles
+and metadata from Zenodo into a local cache (e.g. \code{tools/cache/}),
+then subsets and filters the raw data to create a small example suitable
+for vignettes.
+}
+\usage{
+data("ibdmdb_2omic_demo")
+}
+\description{
+A compact example derived from the Integrative Human Microbiome Project (iHMP)
+Inflammatory Bowel Disease (IBD) cohort.
+}
+\details{
+The dataset \code{ibdmdb_2omic_demo} is a named list with:
+\itemize{
+\item \code{se_mgx}: metagenomic taxonomic profiles (MGX)
+\item \code{se_mtx}: metatranscriptomic taxonomic profiles (MTX)
+\item \code{mae2}: a \link[MultiAssayExperiment]{MultiAssayExperiment}
+containing both experiments
+}
+
+These compact objects are intended for quick examples and vignettes.
+Load with:
+\code{data("ibdmdb_2omic_demo")}.
+}
+\references{
+Lloyd-Price J, Arze C, Ananthakrishnan AN, et al.
+\emph{Multi-omics of the gut microbial ecosystem in inflammatory bowel
+diseases.}
+Nature 569, 655–662 (2019).
+\doi{10.1038/s41586-019-1237-9}
+}
+\keyword{datasets}
diff --git a/man/mia-datasets.Rd b/man/mia-datasets.Rd
index 00b590f18..48dd28a1a 100644
--- a/man/mia-datasets.Rd
+++ b/man/mia-datasets.Rd
@@ -21,6 +21,8 @@ features and 280 samples}
 and 3 samples}
 \item{\code{\link{GlobalPatterns}}: A TreeSummarizedExperiment with 19216
 features and 26 samples}
+\item{\code{\link{ibdmdb_2omic_demo}}: A compact
+\code{MultiAssayExperiment} demo dataset with 2 experiments (MGX + MTX)}
 \item{\code{\link{HintikkaXOData}}: A MultiAssayExperiment with 3
 experiments (microbiota, metabolites and biomarkers)}
 \item{\code{\link{peerj13075}}: A TreeSummarizedExperiment with 674
diff --git a/man/mia-package.Rd b/man/mia-package.Rd
index 80908b87a..b42682002 100644
--- a/man/mia-package.Rd
+++ b/man/mia-package.Rd
@@ -55,6 +55,7 @@ Other contributors:
   \item Sam Hillman [contributor]
   \item Jesse Pasanen [contributor]
   \item Eetu Tammi [contributor]
+  \item Aituar Bektanov [contributor]
 }
 
 }
diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml
index 875ec9258..f1f2887a6 100644
--- a/pkgdown/_pkgdown.yml
+++ b/pkgdown/_pkgdown.yml
@@ -8,6 +8,10 @@ reference:
   - getMediation
   - getPERMANOVA
 
+- title: Joint-RPCA
+  contents:
+    - getJointRPCA
+
 - title: Clustering
 - contents:
   - calculateDMN
@@ -39,6 +43,7 @@ reference:
   - peerj13075
   - Tengeler2020
   - Tito2024QMP
+  - ibdmdb_2omic_demo
 - title: Dataset description
 - contents:
   - getPrevalence
diff --git a/tests/testthat/test-getJointRPCA.R b/tests/testthat/test-getJointRPCA.R
new file mode 100644
index 000000000..07eea3a63
--- /dev/null
+++ b/tests/testthat/test-getJointRPCA.R
@@ -0,0 +1,306 @@
+test_that("getJointRPCA stores embedding in reducedDim and metadata", {
+    skip_if_not(requireNamespace("SingleCellExperiment", quietly = TRUE))
+
+    set.seed(123)
+    X <- abs(matrix(rnorm(20 * 10), nrow = 20, ncol = 10))
+    rownames(X) <- paste0("f", 1:20)
+    colnames(X) <- paste0("s", 1:10)
+
+    sce <- SingleCellExperiment::SingleCellExperiment(
+        assays = list(counts = X)
+    )
+
+    sce2 <- mia:::getJointRPCA(
+        sce,
+        name = "JointRPCA_test",
+        n.components = 2,
+        max.iterations = 2,
+        transform = "none",
+        n.test.samples = 3
+    )
+
+    emb <- reducedDim(sce2, "JointRPCA_test")
+    expect_true(is.matrix(emb))
+    expect_equal(nrow(emb), ncol(X))
+    expect_equal(rownames(emb), colnames(X))
+
+    jr <- metadata(sce2)$JointRPCA[["JointRPCA_test"]]
+    expect_type(jr, "list")
+    expect_true(all(c("ord_res", "dist", "cv_stats", "rclr_tables") %in% names(jr)))
+})
+
+test_that("single-view .joint_rpca returns the expected structure", {
+    set.seed(123)
+    # Features x samples
+    X <- abs(matrix(rnorm(20 * 12), nrow = 20, ncol = 12))
+    rownames(X) <- paste0("f", 1:20)
+    colnames(X) <- paste0("s", 1:12)
+
+    fit <- mia:::.joint_rpca(
+        tables = list(assay1 = X),
+        n.components = 3,
+        max.iterations = 2,
+        transform = "none",
+        n.test.samples = 4
+    )
+
+    # Top-level structure
+    expect_type(fit, "list")
+    expect_true(all(c("ord_res", "dist", "cv_stats", "rclr_tables") %in% names(fit)))
+
+    # Ordination structure
+    OR <- fit$ord_res
+    expect_true(all(c("eigvals", "samples", "features", "proportion.explained") %in% names(OR)))
+
+    # Samples: should include both train + projected test = all original samples
+    expect_equal(nrow(OR$samples), ncol(X))
+    expect_equal(colnames(OR$samples), paste0("PC", 1:3))
+    expect_true(all(colnames(OR$samples) %in% names(OR$proportion.explained)))
+
+    # Features: now per-view list, even for single-view
+    expect_true(is.list(OR$features))
+    expect_true("assay1" %in% names(OR$features))
+
+    F1 <- OR$features$assay1
+    if (is.data.frame(F1)) F1 <- as.matrix(F1)
+    expect_equal(nrow(F1), nrow(X))
+    expect_equal(colnames(F1), paste0("PC", 1:3))
+
+    # CV stats & dist
+    expect_true(is.data.frame(fit$cv_stats))
+    expect_true(all(c("mean_CV", "std_CV", "run", "iteration") %in% names(fit$cv_stats)))
+
+    expect_s3_class(fit$dist, "DistanceMatrix")
+    expect_true(is.matrix(fit$dist$data))
+    expect_equal(nrow(fit$dist$data), ncol(X))
+    expect_equal(colnames(fit$dist$data), colnames(X))
+})
+
+test_that("multi-view .joint_rpca preserves per-view feature loadings", {
+    set.seed(42)
+    # Two views, same samples
+    S <- paste0("s", 1:10)
+    A <- abs(matrix(rnorm(25 * 10), 25, 10,
+                    dimnames = list(paste0("a", 1:25), S)))
+    B <- abs(matrix(rnorm(30 * 10), 30, 10,
+                    dimnames = list(paste0("b", 1:30), S)))
+
+    fit <- mia:::.joint_rpca(
+        tables = list(MGX = A, MTX = B),
+        n.components = 2,
+        max.iterations = 2,
+        transform = "none",
+        n.test.samples = 3
+    )
+
+    OR <- fit$ord_res
+    expect_true(is.list(OR$features))
+    expect_true(all(c("MGX", "MTX") %in% names(OR$features)))
+
+    # Check each view's loading matrix dimensions & rownames
+    MGXv <- OR$features$MGX; MTXv <- OR$features$MTX
+    if (is.data.frame(MGXv)) MGXv <- as.matrix(MGXv)
+    if (is.data.frame(MTXv)) MTXv <- as.matrix(MTXv)
+
+    expect_equal(nrow(MGXv), nrow(A))
+    expect_equal(nrow(MTXv), nrow(B))
+    expect_equal(colnames(MGXv), paste0("PC", 1:2))
+    expect_equal(colnames(MTXv), paste0("PC", 1:2))
+    expect_true(all(rownames(MGXv) %in% rownames(A)))
+    expect_true(all(rownames(MTXv) %in% rownames(B)))
+
+    # Samples include all training + projected test
+    expect_equal(nrow(OR$samples), length(S))
+    expect_equal(colnames(OR$samples), paste0("PC", 1:2))
+})
+
+test_that("unshared samples are dropped with a warning and alignment is correct", {
+    set.seed(7)
+    S1 <- paste0("s", 1:8)
+    S2 <- c(paste0("s", 3:10))
+    A <- abs(matrix(rnorm(20 * length(S1)), 20, length(S1),
+                    dimnames = list(paste0("a", 1:20), S1)))
+    B <- abs(matrix(rnorm(15 * length(S2)), 15, length(S2),
+                    dimnames = list(paste0("b", 1:15), S2)))
+
+    expect_warning(
+        fit <- mia:::.joint_rpca(
+            tables = list(MGX = A, MTX = B),
+            n.components = 2,
+            max.iterations = 2,
+            transform = "none",
+            n.test.samples = 2
+        ),
+        regexp = "Removing.*sample\\(s\\).*overlap",
+        all = FALSE
+    )
+
+    OR <- fit$ord_res
+    expect_equal(nrow(OR$samples), 6)
+})
+
+test_that("projection of new samples via .transform appends rows and keeps component names", {
+    set.seed(99)
+    S_train <- paste0("s", 1:8)
+    featsA <- paste0("a", 1:18)
+    featsB <- paste0("b", 1:22)
+
+    A <- abs(matrix(rnorm(length(featsA) * length(S_train)), length(featsA), length(S_train),
+                    dimnames = list(featsA, S_train)))
+    B <- abs(matrix(rnorm(length(featsB) * length(S_train)), length(featsB), length(S_train),
+                    dimnames = list(featsB, S_train)))
+
+    fit <- mia:::.joint_rpca(
+        tables = list(MGX = A, MTX = B),
+        n.components = 3,
+        max.iterations = 2,
+        transform = "none",
+        n.test.samples = 3
+    )
+
+    OR <- fit$ord_res
+    n_before <- nrow(OR$samples)
+
+    # Create new samples (same features, new sample IDs)
+    S_new <- c("s9", "s10")
+    A_new <- abs(matrix(rnorm(length(featsA) * length(S_new)), length(featsA), length(S_new),
+                        dimnames = list(featsA, S_new)))
+    B_new <- abs(matrix(rnorm(length(featsB) * length(S_new)), length(featsB), length(S_new),
+                        dimnames = list(featsB, S_new)))
+
+    # Project new samples
+    OR2 <- mia:::.transform(
+        ordination = OR,
+        tables = list(MGX = A_new, MTX = B_new),
+        apply.rclr = FALSE
+    )
+
+    expect_true(is.list(OR2))
+    expect_true(all(c("samples", "features", "eigvals", "proportion.explained") %in% names(OR2)))
+    expect_equal(colnames(OR2$samples), colnames(OR$samples))
+    expect_true(all(S_new %in% rownames(OR2$samples)))
+    expect_equal(nrow(OR2$samples), n_before + length(S_new))
+})
+
+test_that("errors surface for duplicated sample IDs during preprocessing", {
+    set.seed(101)
+    X <- abs(matrix(rpois(24, lambda = 5), 6, 4))
+    rownames(X) <- paste0("f", 1:6)
+    colnames(X) <- c("s1", "s1", "s2", "s3")
+
+    expect_error(
+        mia:::.joint_rpca(
+            tables = list(assay1 = X),
+            n.components = 2,
+            max.iterations = 2,
+            transform = "none",
+            n.test.samples = 2
+        ),
+        regexp = "duplicate sample \\(column\\) IDs",
+        ignore.case = TRUE
+    )
+})
+
+test_that("jointRPCAuniversal works on MultiAssayExperiment", {
+    set.seed(2025)
+
+    # Two views, same samples
+    S <- paste0("s", 1:8)
+    MGX_mat <- abs(matrix(
+        rnorm(15 * length(S)),
+        nrow = 15, ncol = length(S),
+        dimnames = list(paste0("mgx", 1:15), S)
+    ))
+    MTX_mat <- abs(matrix(
+        rnorm(10 * length(S)),
+        nrow = 10, ncol = length(S),
+        dimnames = list(paste0("mtx", 1:10), S)
+    ))
+
+    se_mgx <- SummarizedExperiment::SummarizedExperiment(
+        assays = list(counts = MGX_mat)
+    )
+    se_mtx <- SummarizedExperiment::SummarizedExperiment(
+        assays = list(counts = MTX_mat)
+    )
+
+    mae2 <- MultiAssayExperiment::MultiAssayExperiment(
+        experiments = list(MGX = se_mgx, MTX = se_mtx)
+    )
+
+    fit <- mia:::jointRPCAuniversal(
+        x                     = mae2,
+        experiments           = c("MGX", "MTX"),
+        n.components          = 2,
+        max.iterations        = 2,
+        transform = "none",
+        n.test.samples        = 3
+    )
+
+    # Basic structure
+    expect_type(fit, "list")
+    expect_true(all(c("ord_res", "dist", "cv_stats", "rclr_tables") %in% names(fit)))
+
+    # rclr_tables should be named by experiment
+    expect_true(is.list(fit$rclr_tables))
+    expect_equal(names(fit$rclr_tables), c("MGX", "MTX"))
+
+    # Per-view feature loadings should also carry MGX/MTX names
+    OR <- fit$ord_res
+    expect_true(is.list(OR$features))
+    expect_true(all(c("MGX", "MTX") %in% names(OR$features)))
+
+    MGXv <- OR$features$MGX
+    MTXv <- OR$features$MTX
+    if (is.data.frame(MGXv)) MGXv <- as.matrix(MGXv)
+    if (is.data.frame(MTXv)) MTXv <- as.matrix(MTXv)
+
+    expect_equal(nrow(MGXv), nrow(MGX_mat))
+    expect_equal(nrow(MTXv), nrow(MTX_mat))
+    expect_equal(colnames(MGXv), paste0("PC", 1:2))
+    expect_equal(colnames(MTXv), paste0("PC", 1:2))
+})
+
+test_that("jointRPCAuniversal uses default assays per experiment in a MAE and records them", {
+    skip_if_not_installed("MultiAssayExperiment")
+    skip_if_not_installed("SummarizedExperiment")
+
+    S <- paste0("s", 1:6)
+    A1_counts <- matrix(abs(rnorm(10 * 6)), nrow = 10,
+                        dimnames = list(paste0("a", 1:10), S))
+    A1_alt    <- A1_counts * 2
+
+    A2_counts <- matrix(abs(rnorm(8 * 6)), nrow = 8,
+                        dimnames = list(paste0("b", 1:8), S))
+    A2_alt    <- A2_counts * 3
+
+    se1 <- SummarizedExperiment::SummarizedExperiment(
+        assays = list(counts = A1_counts, alt = A1_alt)
+    )
+    se2 <- SummarizedExperiment::SummarizedExperiment(
+        assays = list(counts = A2_counts, alt = A2_alt)
+    )
+
+    mae <- MultiAssayExperiment::MultiAssayExperiment(
+        experiments = list(MGX = se1, MTX = se2)
+    )
+
+    fit <- mia:::jointRPCAuniversal(
+        x = mae,
+        experiments = c("MGX", "MTX"),
+        n.components = 2,
+        max.iterations = 2,
+        transform = "none",
+        n.test.samples = 2
+    )
+
+    # Basic structure
+    expect_true(is.list(fit))
+    expect_true(all(c("MGX", "MTX") %in% names(fit$rclr_tables)))
+
+    expect_true(all(c("experiment_names", "assay_names_used") %in% names(fit)))
+    expect_equal(fit$experiment_names, c("MGX", "MTX"))
+    expect_named(fit$assay_names_used, c("MGX", "MTX"))
+    expect_equal(fit$assay_names_used[["MGX"]], "counts")
+    expect_equal(fit$assay_names_used[["MTX"]], "counts")
+})
\ No newline at end of file