-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathwrite.normalized.counts.R
More file actions
107 lines (95 loc) · 3.48 KB
/
write.normalized.counts.R
File metadata and controls
107 lines (95 loc) · 3.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#' Write a table of normalized counts to a file
#'
#' @param dds Input DESeq2 object
#' @param filename Name of output file
#' @param sample.attributes Vector of names of sample attributes to be included (Default: none)
#' @param identifier The type of gene identifier ('ENTREZID','ENSEMBL'). Default is 'ENTREZID'.
#' @param overwrite Boolean: whether or not to overwrite a preexisting file. Default: FALSE
#' @export
write.normalized.counts <-
function(dds,
filename = 'data/normalized_counts.tsv',
sample.attributes = NULL,
identifier = 'ENTREZID',
overwrite = FALSE) {
suppressPackageStartupMessages(OK <- require(DESeq2))
if (!OK)
stop("Error: DESeq2 package not found")
suppressPackageStartupMessages(OK <- require(org.Hs.eg.db))
if (!OK)
stop("Error: org.Hs.eg.db package not found")
suppressPackageStartupMessages(OK <- require(dplyr))
if (!OK)
stop("Error: dplyr package not found")
if (file.exists(filename)) {
if (!overwrite) {
warning.message <- paste(
c(
'Did not write to file ',
filename,
" because it exists already. Set overwrite=TRUE if you're sure you want to overwrite."
),
sep = ''
)
stop(warning.message)
} else {
warning.message <- paste(c('Overwriting contents of ', filename))
warning(warning.message)
}
}
norm.counts.df <- as.data.frame(counts(dds, normalize = TRUE))
if (identifier == 'ENTREZID') {
entrez.ids <- rownames(norm.counts.df)
}
if (identifier == 'ENSEMBL') {
entrez.ids <- AnnotationDbi::mapIds(
org.Hs.eg.db,
keys = rownames(norm.counts.df),
keytype = 'ENSEMBL',
column = "ENTREZID",
multiVals = 'first'
)
#This line would replace one of the samples' counts with ENSEMBL IDs!
#norm.counts.df <- cbind(ENSEMBL=rownames(norm.counts.df),norm.counts.df)
}
gene.df <- suppressMessages(info.from.entrez(entrez.ids))
if (!is.null(sample.attributes)) {
sample.data.df <-
as.data.frame(colData(dds))[, sample.attributes, drop = FALSE]
sample.data.df <- mutate_all(sample.data.df, as.character)
sample.order <- do.call(order, sample.data.df)
sample.data.reordered <- sample.data.df[sample.order, ]
norm.counts.df <- norm.counts.df[, sample.order]
sample.data.t <- t(sample.data.reordered)
sample.data.plus.counts <- rbind(sample.data.t,
sapply(norm.counts.df, as.character))
padding <- as.data.frame(matrix('', nrow = length(sample.attributes), ncol =
3))
#gene.df.plus.padding <- rbind(
# d1=c('','',''),
# d2=c('','',''),
# gene.df
#)
names(padding) <- colnames(gene.df)
gene.df.plus.padding <- rbind(padding,
gene.df)
colnames(gene.df.plus.padding) <-
c('ENTREZID', 'SYMBOL', 'GENENAME')
gene.df.plus.padding <-
mutate_all(gene.df.plus.padding, as.character)
gene.df.plus.padding[1:length(sample.attributes), 'ENTREZID'] <-
paste0(sample.attributes, ' -->')
res <-
cbind(gene.df.plus.padding,
sample.data.plus.counts,
row.names = NULL)
norm.counts.df <- res
}
write.table(
norm.counts.df,
file = filename,
sep = '\t',
row.names = FALSE,
quote = FALSE
)
}