Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: FRASER
Type: Package
Title: Find RAre Splicing Events in RNA-Seq Data
Version: 2.5.6
Date: 2025-10-24
Version: 2.5.7
Date: 2025-10-29
Authors@R: c(
person("Christian", "Mertes", role=c("aut", "cre"),
email="mertes@in.tum.de", comment=c(ORCID="0000-0002-1091-205X")),
Expand Down Expand Up @@ -120,6 +120,7 @@ Collate:
getURLs.R
helper-functions.R
mergeExternalData.R
mergeFds.R
saveHDF5Objects.R
RcppExports.R
autoencoder.R
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ export(makeSimulatedFraserDataSet)
export(mapSeqlevels)
export(mergeCounts)
export(mergeExternalData)
export(mergeFDS)
export(name)
export(nonSplicedReads)
export(pVals)
Expand Down Expand Up @@ -168,6 +169,7 @@ importFrom(GenomeInfoDb,keepStandardChromosomes)
importFrom(GenomeInfoDb,seqinfo)
importFrom(GenomeInfoDb,seqlengths)
importFrom(GenomeInfoDb,seqlevels)
importFrom(GenomeInfoDb,seqlevelsInUse)
importFrom(GenomeInfoDb,seqlevelsStyle)
importFrom(GenomeInfoDb,seqnames)
importFrom(GenomeInfoDb,sortSeqlevels)
Expand Down
4 changes: 4 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
CHANGES IN VERSION 2.4.7
-------------------------
o Added mergeFDS function, which merges two fds FraserDataSet objects.

CHANGES IN VERSION 2.4.6
-------------------------
o Fixing a bug in the annotation, where we miss the annotation for junction,
Expand Down
144 changes: 144 additions & 0 deletions R/mergeFds.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
#' Merge two FraserDataSet objects with different splice sites and junctions
#'
#' This function merges two FraserDataSet objects that have different
#' splice sites and junctions but no overlapping samples. It combines
#' only the rawCountsJ and rawCountsSS assays and recalculates startID
#' and endID for the merged rowRanges.
#'
#' @param fds1 First FraserDataSet object.
#' @param fds2 Second FraserDataSet object.
#' @param join_type Type of join for rowRanges: "outer" (default) or "inner".
#' @param fds_name Name for the merged FraserDataSet.
#' @param workingDir Directory where to store HDF5 and RDS files. Defaults to
#' \code{FRASER_output} in the current working directory.
#' @return A merged FraserDataSet object containing combined samples and assays.
#' @examples
#' # merged_fds <- mergeFDS(fds1, fds2, join_type="outer")
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does it get saved already? if not, maybe add how to save it

#' @export
#' @importFrom GenomeInfoDb seqlevelsInUse
#' @importFrom SummarizedExperiment rowRanges rowData assays
mergeFDS <- function(fds1, fds2, join_type = c("outer", "inner"), fds_name="merged_fds", workingDir="FRASER_output") {
join_type <- match.arg(join_type)
# create a True/False flag for join types
all_flag <- if (join_type == "outer") TRUE else FALSE

create_junction_counts <- function(fds){
chr_levels <- seqlevelsInUse(fds)

row_ranges <- as.data.table(rowRanges(fds))

junction_counts <- as.data.table(assays(fds)$rawCountsJ)
junction_counts <- cbind(junction_counts, row_ranges[, c("seqnames", "start", "end", "width", "strand", "startID", "endID")])
# Enforce order
junction_counts[, seqnames := factor(as.character(seqnames), levels = chr_levels)]
setorder(junction_counts, seqnames, start, end)
return (junction_counts)
}

create_split_counts <- function(fds){

chr_levels <- seqlevelsInUse(fds)

row_ranges <- as.data.table(rowRanges(fds))

splice_sites <- rowData(nonSplicedReads(fds))
splice_site_counts <- as.data.table(assays(fds)$rawCountsSS)
splice_site_counts <- as.data.table(cbind(splice_sites, splice_site_counts))

splice_site_lookup <- unique(rbind(
row_ranges[, .(spliceSiteID = startID, seqnames, position = start - 1)], # donor positions
row_ranges[, .(spliceSiteID = endID, seqnames, position = end)] # acceptor positions
))

splice_site_lookup[, start := position]
splice_site_lookup[, end := start + 1]
splice_site_lookup[, width := 2]
splice_site_lookup[, position := NULL]

splice_site_counts <- merge(splice_site_counts, splice_site_lookup, by = "spliceSiteID", all.x = TRUE)
splice_site_counts[, seqnames := factor(as.character(seqnames), levels = chr_levels)]
setorder(splice_site_counts, seqnames, start)

return (splice_site_counts)

}

junction_counts_1 <- create_junction_counts(fds1)
junction_counts_2 <- create_junction_counts(fds2)

# Join the junction counts based on join_type
junction_counts_merged <- merge(junction_counts_1, junction_counts_2, by=c("start", "end", "seqnames", "width", "strand"), all=all_flag)

# If outer join, missing numeric counts should become 0.
# For inner join we do NOT coerce NAs to 0 (there shouldn't be rows
# coming from only one side when all=FALSE), so skip replacement.
if (all_flag) {
junction_counts_merged[is.na(junction_counts_merged)] <- 0
}
positions <- unique(rbind(
junction_counts_merged[, .(seqnames, pos = start)],
junction_counts_merged[, .(seqnames, pos = end)]
))
setorder(positions, seqnames, pos)
positions[, spliceSiteID := .I]

junction_counts_merged <- merge(junction_counts_merged, positions, by.x = c("seqnames", "start"), by.y = c("seqnames", "pos"), all.x = TRUE)
setnames(junction_counts_merged, "spliceSiteID", "startID")

junction_counts_merged <- merge(junction_counts_merged, positions, by.x = c("seqnames", "end"), by.y = c("seqnames", "pos"), all.x = TRUE)
setnames(junction_counts_merged, "spliceSiteID", "endID")

junction_counts_merged[ ,c("startID.x","startID.y", "endID.x", "endID.y") := NULL]

splice_site_counts_1 <- create_split_counts(fds1)
splice_site_counts_2 <- create_split_counts(fds2)

splice_site_counts_merged <- merge(splice_site_counts_1, splice_site_counts_2, by=c("start", "end", "seqnames", "width", "type"), all=all_flag)
splice_site_counts_merged[ ,c("spliceSiteID.x","spliceSiteID.y") := NULL]

# When outer, fill NA -> 0 (samples absent in one side)
if (all_flag) {
splice_site_counts_merged[is.na(splice_site_counts_merged)] <- 0
}

splice_site_counts_merged <- merge(splice_site_counts_merged, positions, by.x = c("seqnames", "start"), by.y = c("seqnames", "pos"), all.x = TRUE)
positions[, pos := pos - 1] # to merge donor values
splice_site_counts_merged <- merge(splice_site_counts_merged, positions, by.x = c("seqnames", "start"), by.y = c("seqnames", "pos"), all.x = TRUE)

# Combine spliceSiteID.x and spliceSiteID.y into a single column.
# For each row, use spliceSiteID.x if it’s not NA; otherwise take spliceSiteID.y.
# This keeps whichever ID exists after merging.
splice_site_counts_merged[, spliceSiteID := fcoalesce(spliceSiteID.x, spliceSiteID.y)]

# Set the rest to NULL
splice_site_counts_merged[ ,c("spliceSiteID.x","spliceSiteID.y") := NULL]

# when using inner join, ensure we only keep splice sites that are in positions
if (!all_flag) {
splice_site_counts_merged <- splice_site_counts_merged[spliceSiteID %in% positions$spliceSiteID]
junction_counts_merged <- junction_counts_merged[startID %in% positions$spliceSiteID & endID %in% positions$spliceSiteID]
}

# Subset both colData
# --- Combine colData ---
# Get intersection of column names
shared_cols <- intersect(colnames(colData(fds1)), colnames(colData(fds2)))

cd1 <- colData(fds1)[, shared_cols, drop = FALSE]
cd2 <- colData(fds2)[, shared_cols, drop = FALSE]

merged_colData <- rbind(cd1, cd2)

# --- Create merged FraserDataSet ---
merged_fds <- FraserDataSet(
colData = as.data.table(merged_colData),
junctions = junction_counts_merged,
spliceSites = splice_site_counts_merged,
name = fds_name,
workingDir = workingDir
)

merged_fds <- calculatePSIValues(merged_fds, types=fitMetrics(merged_fds))

return(merged_fds)
}
38 changes: 38 additions & 0 deletions man/mergeFDS.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions tests/testthat/test_merge_fds.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
context("Test merge fds function")

test_that("mergeFDS", {
fds <- createTestFraserDataSet()

fds_1 <- fds[, 1]
fds_2 <- fds[, c(2,3)]

merged_fds <- mergeFDS(fds_1, fds_2)

# Check number of samples
expect_equal(ncol(merged_fds), ncol(fds))

# Check number of junctions (features)
expect_equal(nrow(merged_fds), nrow(fds))

# Check number of splice sites
expect_equal(nrow(assay(merged_fds, "rawCountsSS")), nrow(assay(fds, "rawCountsSS")))
})
Loading