From c094feb1d821ac768a55adacb17e9585bdf3b817 Mon Sep 17 00:00:00 2001 From: LiNk-NY Date: Thu, 6 Nov 2025 16:55:05 -0500 Subject: [PATCH 01/12] restore use of mirToRanges --- R/simplifyTCGA.R | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/R/simplifyTCGA.R b/R/simplifyTCGA.R index 7b1ee17..b6450be 100644 --- a/R/simplifyTCGA.R +++ b/R/simplifyTCGA.R @@ -275,10 +275,19 @@ symbolsToRanges <- function(obj, keep.assay = FALSE, unmapped = TRUE) { #' #' @export mirToRanges <- function(obj, keep.assay = FALSE, unmapped = TRUE) { - lifeCycle( - package = "TCGAutils", - cycle = "defunct", - title = "simplifyTCGA" + can.fix <- vapply( + experiments(obj), + function(y) { + .checkHas(y, "^hsa") & .isSummarizedExperiment(y) + }, + logical(1L) + ) + .convertTo( + x = obj, + which = can.fix, + FUN = .getRangesOfMir, + keep = keep.assay, + unmap = unmapped ) } From 7c7be7dc424d9f8c96d8e7fb08221f6e4044c78b Mon Sep 17 00:00:00 2001 From: LiNk-NY Date: Wed, 5 Nov 2025 17:36:42 -0500 Subject: [PATCH 02/12] create helper to identify viable assays --- R/simplifyTCGA.R | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/R/simplifyTCGA.R b/R/simplifyTCGA.R index b6450be..2e4534d 100644 --- a/R/simplifyTCGA.R +++ b/R/simplifyTCGA.R @@ -275,13 +275,8 @@ symbolsToRanges <- function(obj, keep.assay = FALSE, unmapped = TRUE) { #' #' @export mirToRanges <- function(obj, keep.assay = FALSE, unmapped = TRUE) { - can.fix <- vapply( - experiments(obj), - function(y) { - .checkHas(y, "^hsa") & .isSummarizedExperiment(y) - }, - logical(1L) - ) + can.fix <- .isFixable(mae = obj, pattern = "^hsa") + .convertTo( x = obj, which = can.fix, From 8df0b1d33dd7cd9de24fe7439b212ed8d35de35f Mon Sep 17 00:00:00 2001 From: LiNk-NY Date: Wed, 5 Nov 2025 17:53:36 -0500 Subject: [PATCH 03/12] use miRNAmeConverter to translate and map miRNA IDs --- DESCRIPTION | 1 + R/simplifyTCGA.R | 22 ++++++++++++++++++---- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index c84f83a..1b49250 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -52,6 +52,7 @@ Suggests: impute, knitr, magrittr, + miRNAmeConverter, org.Hs.eg.db, RColorBrewer, readr, diff --git a/R/simplifyTCGA.R b/R/simplifyTCGA.R index 2e4534d..b41d245 100644 --- a/R/simplifyTCGA.R +++ b/R/simplifyTCGA.R @@ -32,7 +32,12 @@ NULL for (i in which(which)) { lookup <- FUN(rownames(x[[i]])) ranges <- lookup[["mapped"]] - rse <- x[[i]][names(ranges), ] + if (!is.null(mcols(ranges)[["rowIdx"]])) + rse <- `rownames<-`( + x[[i]][mcols(ranges)[["rowIdx"]], ], names(ranges) + ) + else + rse <- x[[i]][names(ranges), ] # rowData not merged with mcols of RHS in `rowRanges<-` method mcols(ranges) <- S4Vectors::DataFrame(rowData(rse), S4Vectors::mcols(ranges)) @@ -60,9 +65,18 @@ NULL #' `GRanges()` object with ranges of mapped symbols #' @keywords internal .makeListRanges <- function(x, gn) { - res <- list(unmapped = x[!x %in% names(gn)]) - x <- x[x %in% names(gn)] - gn <- gn[match(x, names(gn))] + checkInstalled("miRNAmeConverter") + nc <- miRNAmeConverter::MiRNANameConverter() + mirna_version <- + miRNAmeConverter::assessVersion(nc, names(gn))[1L, "version"] + trout <- miRNAmeConverter::translateMiRNAName( + nc, x, versions = mirna_version + ) + new_x <- trout[[paste0("v", mirna_version, ".0")]] + res <- list(unmapped = setdiff(x, trout[["input"]])) + rowIdx <- match(tolower(trout[["input"]]), x) + gn <- gn[match(new_x, names(gn))] + mcols(gn)[["rowIdx"]] <- rowIdx res[["mapped"]] <- gn res } From 502b76ca7f18cdece265d745dbc2662b22cdbcf7 Mon Sep 17 00:00:00 2001 From: LiNk-NY Date: Wed, 5 Nov 2025 17:53:55 -0500 Subject: [PATCH 04/12] update authors in simplifyTCGA --- R/simplifyTCGA.R | 4 +--- man/simplifyTCGA.Rd | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/R/simplifyTCGA.R b/R/simplifyTCGA.R index b41d245..ca8a5d0 100644 --- a/R/simplifyTCGA.R +++ b/R/simplifyTCGA.R @@ -223,9 +223,7 @@ NULL #' [`RangedSummarizedExperiment`][SummarizedExperiment::RangedSummarizedExperiment-class] #' objects #' -#' @author L. Waldron -#' -#' @md +#' @author L. Waldron, M. Ramos #' #' @examples #' diff --git a/man/simplifyTCGA.Rd b/man/simplifyTCGA.Rd index 04191e8..163c1bc 100644 --- a/man/simplifyTCGA.Rd +++ b/man/simplifyTCGA.Rd @@ -117,5 +117,5 @@ simplifyTCGA(accmae) } \author{ -L. Waldron +L. Waldron, M. Ramos } From 4c037bd942533736e987a8f4ffe7a3915e199528 Mon Sep 17 00:00:00 2001 From: LiNk-NY Date: Wed, 5 Nov 2025 17:57:42 -0500 Subject: [PATCH 05/12] check for Bioc.gff installation in mirToRanges --- R/simplifyTCGA.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/simplifyTCGA.R b/R/simplifyTCGA.R index ca8a5d0..0749eca 100644 --- a/R/simplifyTCGA.R +++ b/R/simplifyTCGA.R @@ -287,6 +287,8 @@ symbolsToRanges <- function(obj, keep.assay = FALSE, unmapped = TRUE) { #' #' @export mirToRanges <- function(obj, keep.assay = FALSE, unmapped = TRUE) { + checkInstalled("Bioc.gff") + can.fix <- .isFixable(mae = obj, pattern = "^hsa") .convertTo( From 53d5c13e2aaede31a27fc76cdd0ab81fb8ffb9d5 Mon Sep 17 00:00:00 2001 From: LiNk-NY Date: Mon, 10 Nov 2025 15:21:12 -0500 Subject: [PATCH 06/12] fix .getRangesOfMir check --- NAMESPACE | 1 + R/simplifyTCGA.R | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index ceb863d..34625fb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -34,6 +34,7 @@ export(trimColData) export(uniformBuilds) import(methods) importFrom(BiocBaseUtils,checkInstalled) +importFrom(BiocBaseUtils,isCharacter) importFrom(BiocBaseUtils,isScalarCharacter) importFrom(BiocBaseUtils,isScalarNumber) importFrom(BiocBaseUtils,lifeCycle) diff --git a/R/simplifyTCGA.R b/R/simplifyTCGA.R index 0749eca..cfc1e13 100644 --- a/R/simplifyTCGA.R +++ b/R/simplifyTCGA.R @@ -1,7 +1,7 @@ #' @importFrom GenomicFeatures genes #' @importFrom GenomeInfoDb keepStandardChromosomes seqlevelsStyle #' seqlevelsStyle<- -#' @importFrom BiocBaseUtils isScalarCharacter +#' @importFrom BiocBaseUtils isScalarCharacter isCharacter NULL .checkHas <- @@ -106,7 +106,7 @@ NULL #' @rdname hidden-helpers #' @keywords internal .getRangesOfMir <- function(x) { - stopifnot(isScalarCharacter(x)) + stopifnot(isCharacter(x)) mirnas_gr <- .get_hsa_gff3(x) From 742bfb36d18a1019bd4eed263d3ebf8fee833138 Mon Sep 17 00:00:00 2001 From: LiNk-NY Date: Mon, 10 Nov 2025 15:21:32 -0500 Subject: [PATCH 07/12] use default "hg19" for .get_hsa_gff3 --- R/simplifyTCGA.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/simplifyTCGA.R b/R/simplifyTCGA.R index cfc1e13..1262e92 100644 --- a/R/simplifyTCGA.R +++ b/R/simplifyTCGA.R @@ -108,7 +108,7 @@ NULL .getRangesOfMir <- function(x) { stopifnot(isCharacter(x)) - mirnas_gr <- .get_hsa_gff3(x) + mirnas_gr <- .get_hsa_gff3("hg19") miR <- mirnas_gr[ mcols(mirnas_gr)[["type"]] %in% c("miRNA", "microRNA", "tRNA") From 5a8dc77ddb9883ca78556f7f4e6231edc44ca289 Mon Sep 17 00:00:00 2001 From: LiNk-NY Date: Tue, 11 Nov 2025 09:40:41 -0500 Subject: [PATCH 08/12] match tolower for rowIdx --- R/simplifyTCGA.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/simplifyTCGA.R b/R/simplifyTCGA.R index 1262e92..b03e8af 100644 --- a/R/simplifyTCGA.R +++ b/R/simplifyTCGA.R @@ -74,7 +74,7 @@ NULL ) new_x <- trout[[paste0("v", mirna_version, ".0")]] res <- list(unmapped = setdiff(x, trout[["input"]])) - rowIdx <- match(tolower(trout[["input"]]), x) + rowIdx <- match(tolower(trout[["input"]]), tolower(x)) gn <- gn[match(new_x, names(gn))] mcols(gn)[["rowIdx"]] <- rowIdx res[["mapped"]] <- gn From c211a012dfd41e7fd8bec61bf8fdddde0451aa29 Mon Sep 17 00:00:00 2001 From: LiNk-NY Date: Tue, 11 Nov 2025 09:42:49 -0500 Subject: [PATCH 09/12] separate .makeMiRNAListRanges from .makeListRanges --- R/simplifyTCGA.R | 12 +++++++++++- man/hidden-helpers.Rd | 3 +++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/R/simplifyTCGA.R b/R/simplifyTCGA.R index b03e8af..17f91f9 100644 --- a/R/simplifyTCGA.R +++ b/R/simplifyTCGA.R @@ -65,6 +65,16 @@ NULL #' `GRanges()` object with ranges of mapped symbols #' @keywords internal .makeListRanges <- function(x, gn) { + res <- list(unmapped = x[!x %in% names(gn)]) + x <- x[x %in% names(gn)] + gn <- gn[match(x, names(gn))] + res[["mapped"]] <- gn + res +} + +#' @name hidden-helpers +#' @keywords internal +.makeMiRNAListRanges <- function(x, gn) { checkInstalled("miRNAmeConverter") nc <- miRNAmeConverter::MiRNANameConverter() mirna_version <- @@ -117,7 +127,7 @@ NULL seqlevelsStyle(miR) <- "NCBI" names(miR) <- mcols(miR)[["Name"]] - .makeListRanges(x, miR) + .makeMiRNAListRanges(x, miR) } #' @rdname hidden-helpers diff --git a/man/hidden-helpers.Rd b/man/hidden-helpers.Rd index 9ac732b..e6525bf 100644 --- a/man/hidden-helpers.Rd +++ b/man/hidden-helpers.Rd @@ -3,6 +3,7 @@ \name{hidden-helpers} \alias{hidden-helpers} \alias{.makeListRanges} +\alias{.makeMiRNAListRanges} \alias{.getRangesOfMir} \alias{.getRangesOfSYMBOLS} \alias{.getRangesOfCpG} @@ -10,6 +11,8 @@ \usage{ .makeListRanges(x, gn) +.makeMiRNAListRanges(x, gn) + .getRangesOfMir(x) .getRangesOfSYMBOLS(x) From f086dcebbcb49b1784df6cc21231fa67ca100ff9 Mon Sep 17 00:00:00 2001 From: LiNk-NY Date: Tue, 11 Nov 2025 09:49:30 -0500 Subject: [PATCH 10/12] simplify and use rowidx for mcols col --- R/simplifyTCGA.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/simplifyTCGA.R b/R/simplifyTCGA.R index 17f91f9..99b87ff 100644 --- a/R/simplifyTCGA.R +++ b/R/simplifyTCGA.R @@ -32,10 +32,10 @@ NULL for (i in which(which)) { lookup <- FUN(rownames(x[[i]])) ranges <- lookup[["mapped"]] - if (!is.null(mcols(ranges)[["rowIdx"]])) - rse <- `rownames<-`( - x[[i]][mcols(ranges)[["rowIdx"]], ], names(ranges) - ) + rowidx <- mcols(ranges)[["rowIdx"]] + rowidx <- Filter(Negate(is.na), rowidx) + if (!is.null(rowidx) && length(rowidx)) + rse <- `rownames<-`(x[[i]][rowidx, ], names(ranges)) else rse <- x[[i]][names(ranges), ] # rowData not merged with mcols of RHS in `rowRanges<-` method From 87e31a650ba1809d64ec149b19421b0104fd1fd6 Mon Sep 17 00:00:00 2001 From: LiNk-NY Date: Tue, 11 Nov 2025 09:49:47 -0500 Subject: [PATCH 11/12] use assay instead of x[[i]] --- R/simplifyTCGA.R | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/R/simplifyTCGA.R b/R/simplifyTCGA.R index 99b87ff..b709052 100644 --- a/R/simplifyTCGA.R +++ b/R/simplifyTCGA.R @@ -30,14 +30,15 @@ NULL .convertTo <- function(x, which, FUN, keep, unmap) { for (i in which(which)) { - lookup <- FUN(rownames(x[[i]])) + assay <- x[[i]] + lookup <- FUN(rownames(assay)) ranges <- lookup[["mapped"]] rowidx <- mcols(ranges)[["rowIdx"]] rowidx <- Filter(Negate(is.na), rowidx) if (!is.null(rowidx) && length(rowidx)) - rse <- `rownames<-`(x[[i]][rowidx, ], names(ranges)) + rse <- `rownames<-`(assay[rowidx, ], names(ranges)) else - rse <- x[[i]][names(ranges), ] + rse <- assay[names(ranges), ] # rowData not merged with mcols of RHS in `rowRanges<-` method mcols(ranges) <- S4Vectors::DataFrame(rowData(rse), S4Vectors::mcols(ranges)) @@ -45,7 +46,7 @@ NULL x <- c(x, setNames(S4Vectors::List(rse), paste0(names(x)[i], "_ranged"))) if (length(lookup[["unmapped"]]) && unmap) { - se <- x[[i]][lookup[["unmapped"]], ] + se <- assay[lookup[["unmapped"]], ] x <- c(x, setNames(S4Vectors::List(se), paste0(names(x)[i], "_unranged"))) } From 140490ce4d972fbf2dfa45c8a5903ac787dd5bd8 Mon Sep 17 00:00:00 2001 From: LiNk-NY Date: Tue, 11 Nov 2025 09:54:00 -0500 Subject: [PATCH 12/12] use utils::tail --- R/utils.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/utils.R b/R/utils.R index 67ac7af..c02b035 100644 --- a/R/utils.R +++ b/R/utils.R @@ -45,7 +45,7 @@ gff_lines <- readLines(file, n = 50) genome_line <- grepv("genome-build-id", gff_lines) gnm <- strsplit(genome_line, ":\\s+")[[1L]] |> - tail(n = 1L) |> + utils::tail(n = 1L) |> trimws() if (!length(gnm)) NA_character_