Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 12 additions & 16 deletions CovertingCoordinates/openASO_position.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ genseq_byaso$transcript_id <- gsub("\\..*","",genseq_byaso$hg38.knownGene.name)


# make smol
genseq_byaso <- tail(genseq_byaso,1000)
# genseq_byaso <- head(genseq_byaso,1000)
```

### Map the transcriptomic coordinates
Expand Down Expand Up @@ -81,22 +81,17 @@ aso_txt_pos <- na.omit(aso_txt_pos, cols = "start")
# convert the transcript position information into an IRanges format.
# add an ASO group number 'aso_grp' to be able to connect two genome ranges corresponding to the same ASO, but spanning exons.

# grp_list <- lapply(aso_txt_pos, function(x){lapply(x, row.names)})
# grp_list <- lapply(aso_txt_pos, function(x){lapply(x, row.names); x})

# ir <- IRanges(start = aso_txt_pos$start, width = (aso_txt_pos$end - aso_txt_pos$start), names = aso_txt_pos$transcript_id, aso_grp = rownames(aso_txt_pos))

ir <- IRanges(start = aso_txt_pos$start, width = (aso_txt_pos$end - aso_txt_pos$start), names = aso_txt_pos$transcript_id, aso_grp = list(rownames(aso_txt_pos)))

# create an EnsDB object containing genomic position information.
dbfile <- system.file("extdata/EnsDb.Hsapiens.v86.sqlite", package = "EnsDb.Hsapiens.v86")
db <- EnsDb(dbfile)

# latest version of X.toGenome tries to replace id with a group identifier. Defaults to using 'NAMES'/transcript id which is not unique enough to join back at the very end.
# latest version of X.toGenome tries to replace id with a group identifier. Defaults to using 'NAMES' transcript id which is not unique enough to join back at the very end.

# gr <- transcriptToGenome(ir, db)
gr <- transcriptToGenome(ir, db)
# gr <- transcriptToGenome(ir, db, id = mcols(ir)[,1])
gr <- transcriptToGenome(ir, db, id = mcols(ir)$aso_grp)
# gr <- transcriptToGenome(ir, db, id = mcols(ir)$aso_grp)

```

Expand All @@ -107,9 +102,9 @@ gr <- transcriptToGenome(ir, db, id = mcols(ir)$aso_grp)
# unlist the GRanges object to be able to pull transcript start/end information without direct reference to transcript.
gr_unlist <- unlist(gr, recursive = TRUE, use.names = TRUE)

gnm_df <- data.frame(loci = seqnames(gr_unlist),
starts = start(gr_unlist) - 1,
ends = end(gr_unlist),
gnm_df <- data.frame(chrom = seqnames(gr_unlist),
chromStart = start(gr_unlist),
chromEnd = end(gr_unlist) + 1,
strand = strand(gr_unlist),
gr_unlist@ranges@width,
gr_unlist@elementMetadata@listData[["tx_id"]], gr_unlist@elementMetadata@listData[["tx_start"]], gr_unlist@elementMetadata@listData[["tx_end"]])
Expand All @@ -128,13 +123,14 @@ colnames(gnm_df)[colnames(gnm_df) ==

```{r GRanges to BED format, warning = FALSE}
# create new data frame that connects ASO info to generated genome range info.

final_df <- dplyr::full_join(aso_txt_pos, gnm_df, by = c('transcript_id' = 'tx_id', 'start' = 'tx_start', 'end' = 'tx_end', ))
df <- merge(aso_txt_pos, gnm_df, by.x = c('transcript_id', 'start'), by.y = c('tx_id', 'tx_start'))

# add a unique identifier
final_df$uniq_id <- paste(final_df$GeneID, "_", final_df$ASOseq)
df$uniq_id <- paste(df$GeneID, "_", df$ASOseq)

# convert the data frame to bed format.
# reorder columns to match BED format.
col_order <- c("chrom", "chromStart", "chromEnd", "uniq_id", "transcript_id", "ASOseq", "aso_rc_seq", "ASOeffective","strand","start", "end", "tx_end", "width", "GeneID", "OriginalName", "HGNC.ID", "hg38.knownGene.name", "Sequence")
final_df <- df[ , col_order]

# write.csv(final_df, file = "openASO_gr.bed", quote = F, sep = "\t", row.names = F, col.names = F)
```
Expand Down