Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions R/gUtils.R
Original file line number Diff line number Diff line change
Expand Up @@ -4128,18 +4128,22 @@ parse.gr = function(...)
#'
#' @param x character vector representing a GRangesList with UCSC style coordinates (chr:start-end[+-]) representing a [signed] Granges and ";" separators within each item of x separating individaul each GRAnges
#' @param seqlengths named integer vector representing genome (default = hg_seqlengths())
#' @param separator/s charachters used to separate between distinct ranges in each gr (default = c(';')). A single character could be provided or a vector that includes various optional separators
#' @author Marcin Imielinski
#' @return GRangesList parsed from IGV-/UCSC-style strings
#' @export
parse.grl = function(x, seqlengths = hg_seqlengths(), meta = NULL)
parse.grl = function(x, seqlengths = hg_seqlengths(), meta = NULL, separators = c(';'))
{
nm = names(x)
tmp = strsplit(x, '\\s*[;\\,\\|]\\s*')
split_chr = paste0('\\s*[\\', paste(separators, collapse = '\\'), ']\\s*')
tmp = strsplit(x,split_chr)
tmp.u = unlist(tmp)
tmp.u = gsub('\\,', '', tmp.u)
tmp.id = rep(1:length(tmp), sapply(tmp, length))
str = gsub('.*([\\+\\-])$','\\1', tmp.u)
spl = strsplit(tmp.u, "[\\-\\+\\:]", perl = T)
tmp.l = strsplit(tmp.u, ':', perl = T) # treat seqnames and ranges separately in order to work well with seqnames that contain "-"
l1 = lapply(tmp.l, function(s){strsplit(s[2], "[\\-\\+]", perl = T)[[1]]}) # split according to "-" and also get rid of the trailing "-" and "+"
spl = lapply(seq_along(tmp.l), function(ix){c(tmp.l[[ix]][1], l1[[ix]])})

if (any(ix <- sapply(spl, length)==2)){
spl[ix] = lapply(which(ix), function(x) spl[[x]][c(1:2,2)])
Expand Down
6 changes: 6 additions & 0 deletions tests/testthat/test_rangeops.R
Original file line number Diff line number Diff line change
Expand Up @@ -1429,6 +1429,12 @@ test_that("parse.grl", {
expect_equal(width(grl_example[[2]][1]), 3000001)
expect_equal(width(grl_example[[2]][2]), 79)

grl_example_alt_sep = parse.grl(c('chr1:1e6-5e6+,5:10-2000', 'chr2:2e6-5e6-|chr10:100231321-100231399'),
separators = c('|', ','))
expect_equal(width(grl_example_alt_sep[[1]][1]), 4000001)
expect_equal(width(grl_example_alt_sep[[1]][2]), 1991)
expect_equal(width(grl_example_alt_sep[[2]][1]), 3000001)
expect_equal(width(grl_example_alt_sep[[2]][2]), 79)
})


Expand Down