Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file not shown.
Binary file not shown.
4,766 changes: 2,383 additions & 2,383 deletions archive/code_raw.txt

Large diffs are not rendered by default.

42 changes: 42 additions & 0 deletions data/expr_analyze.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/usr/bin/env Rscript

library(stringr)
library(dplyr)

## This script analyzes the data stored in the data frame
## expr_long_split.Rdata, containing gene expression data

load("expr_long_split.Rdata")
expr <- expr_long_split

## Given a data frame with columns for expression,
## genotype, and treatement, runs a linear model
## and returns a single-row data frame with p-values in columns
sub_df_to_pvals_df <- function(sub_df) {
lm1 <- lm(expression ~ genotype + treatment + genotype:treatment,
data = sub_df)

anova1 <- anova(lm1)
pvals1 <- anova1$"Pr(>F)"
pvals_list1 <- as.list(pvals1)
pvals_df1 <- data.frame(pvals_list1)
colnames(pvals_df1) <- rownames(anova1)

return(pvals_df1)
}

uniq_ids <- unique(expr$id)
expr1 <- expr[expr$id %in% uniq_ids[1], ]

pvals_df1 <- sub_df_to_pvals_df(expr1)

## Run all data
expr_by_id <- group_by(expr, id)
pvals_df <- do(expr_by_id, sub_df_to_pvals_df(.))

## Add BY-adjusted column
pvals_df$interaction_BY <- p.adjust(pvals_df$"genotype:treatment")

## Extract with BY-adjusted FDR < 0.05
pvals_interaction_sig <- pvals_df[pvals_df$interaction_BY < 0.05, ]
print(pvals_interaction_sig)
366,177 changes: 366,177 additions & 0 deletions data/expr_long_coded.txt

Large diffs are not rendered by default.

43 changes: 43 additions & 0 deletions data/expr_preprocess.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#!/usr/bin/env Rscript

library(stringr)

expr_long <- read.table("expr_long_coded.txt",
header = TRUE,
sep = "\t",
stringsAsFactors = FALSE)


# Split sample column into 3-column matrix on _'s
sample_split <- str_split_fixed(expr_long$sample, "_", 3)

# Turn the matrix into a data frame with appropriate column names
# and combine it with the original dataframe into a larger set
sample_split_df <- data.frame(sample_split)
colnames(sample_split_df) <- c("genotype", "treatment", "tissuerep")
expr_long_split <- cbind(expr_long, sample_split_df)

# Create an individual tissue column
expr_long_split$tissue <- NA
expr_long_split$tissue[str_detect(expr_long_split$tissuerep, "A")] <- "A"
expr_long_split$tissue[str_detect(expr_long_split$tissuerep, "B")] <- "B"
expr_long_split$tissue[str_detect(expr_long_split$tissuerep, "C")] <- "C"

# We've already checked for NAs
#print(expr_long_split[is.na(expr_long_split$tissue), ]) # should print 0 rows

# Create a new rep column
expr_long_split$rep <- NA
expr_long_split$rep[str_detect(expr_long_split$tissuerep, "1")] <- "1"
expr_long_split$rep[str_detect(expr_long_split$tissuerep, "2")] <- "2"
expr_long_split$rep[str_detect(expr_long_split$tissuerep, "3")] <- "3"

# We've already checked for NAs, but a few were left
#print(expr_long_split[is.na(expr_long_split$rep), ]) # should print 0 rows

# So we remove all rows with such "bad" IDs
bad_ids <- expr_long_split$id[is.na(expr_long_split$rep)]
bad_rows <- expr_long_split$id %in% bad_ids # logical vector
expr_long_split <- expr_long_split[!bad_rows, ] # logical selection

save(expr_long_split, file = "expr_long_split.Rdata")
11,439 changes: 11,439 additions & 0 deletions data/expr_wide.txt

Large diffs are not rendered by default.

26 changes: 26 additions & 0 deletions data/p450s_blastp_yeast_top1.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
sp|Q3LFU0|CP1A1_BALAC YHR007C 23.83 277 176 8 240 492 236 501 2e-12 67.4
sp|P56590|CP1A1_CANFA YHR007C 24.10 278 174 9 240 492 236 501 1e-14 74.7
sp|Q06367|CP1A1_CAVPO YDR402C 28.03 264 167 9 234 488 221 470 7e-13 68.6
sp|Q92039|CP1A1_CHACA YHR007C 23.81 231 142 7 284 492 281 499 8e-09 55.8
sp|P79716|CP1A1_DICLA YHR007C 23.34 287 175 11 242 498 236 507 3e-08 54.3
sp|Q5KQT7|CP1A1_FELCA YDR402C 28.20 266 166 10 242 497 220 470 4e-13 69.3
sp|P04798|CP1A1_HUMAN YHR007C 25.90 278 169 11 236 488 236 501 5e-13 69.3
sp|O42430|CP1A1_LIMLI YDR402C 27.80 277 152 12 243 498 221 470 8e-13 68.6
sp|O42231|CP1A1_LIZAU YDR402C 27.54 276 154 12 243 498 221 470 2e-12 67.0
sp|Q9W683|CP1A1_LIZSA YDR402C 24.13 460 276 19 66 498 57 470 8e-13 68.6
sp|P33616|CP1A1_MACFA YHR007C 24.82 278 172 10 236 488 236 501 5e-13 68.9
sp|Q6GUR1|CP1A1_MACMU YHR007C 24.82 278 172 10 236 488 236 501 3e-13 69.7
sp|Q00557|CP1A1_MESAU YHR007C 23.16 272 184 6 240 492 236 501 2e-13 70.5
sp|Q92148|CP1A1_MICTO YDR402C 27.07 266 168 12 239 494 221 470 7e-13 68.6
sp|P00184|CP1A1_MOUSE YDR402C 28.30 265 167 10 242 497 221 471 5e-14 72.0
sp|Q92110|CP1A1_ONCMY YDR402C 24.25 466 297 20 51 498 43 470 2e-12 67.4
sp|Q92095|CP1A1_OPSTA YDR402C 24.66 442 282 18 73 498 64 470 5e-13 69.3
sp|Q6JZS3|CP1A1_ORYLA YDR402C 25.48 263 176 9 243 498 221 470 7e-11 62.4
sp|P98181|CP1A1_PAGMA YDR402C 24.53 265 173 8 240 492 221 470 5e-09 56.2
sp|Q9YH64|CP1A1_PLAFE YDR402C 24.89 458 275 21 66 498 57 470 1e-13 71.2
sp|Q92100|CP1A1_PLEPL YDR402C 27.80 277 152 13 243 498 221 470 9e-13 68.2
sp|P05176|CP1A1_RABIT YDR402C 29.11 237 146 9 276 496 240 470 4e-13 69.3
sp|P00185|CP1A1_RAT YDR402C 27.55 265 169 9 242 497 221 471 1e-12 67.8
sp|P56591|CP1A1_SHEEP YDR402C 26.69 251 160 10 258 499 235 470 8e-11 62.4
sp|O42457|CP1A1_SPAAU YHR007C 22.68 291 172 10 241 497 236 507 2e-10 60.8
sp|Q92116|CP1A1_STECH YHR007C 24.22 289 170 11 241 497 236 507 6e-12 65.9
51 changes: 51 additions & 0 deletions data/states.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
name population income murder hs_grad region
Alabama 3615 3624 15.1 41.3 South
Alaska 365 6315 11.3 66.7 West
Arizona 2212 4530 7.8 58.1 West
Arkansas 2110 3378 10.1 39.9 South
California 21198 5114 10.3 62.6 West
Colorado 2541 4884 6.8 63.9 West
Connecticut 3100 5348 3.1 56 Northeast
Delaware 579 4809 6.2 54.6 South
Florida 8277 4815 10.7 52.6 South
Georgia 4931 4091 13.9 40.6 South
Hawaii 868 4963 6.2 61.9 West
Idaho 813 4119 5.3 59.5 West
Illinois 11197 5107 10.3 52.6 North Central
Indiana 5313 4458 7.1 52.9 North Central
Iowa 2861 4628 2.3 59 North Central
Kansas 2280 4669 4.5 59.9 North Central
Kentucky 3387 3712 10.6 38.5 South
Louisiana 3806 3545 13.2 42.2 South
Maine 1058 3694 2.7 54.7 Northeast
Maryland 4122 5299 8.5 52.3 South
Massachusetts 5814 4755 3.3 58.5 Northeast
Michigan 9111 4751 11.1 52.8 North Central # mitten
Minnesota 3921 4675 2.3 57.6 North Central
Mississippi 2341 3098 12.5 41 South
Missouri 4767 4254 9.3 48.8 North Central
Montana 746 4347 5 59.2 West
Nebraska 1544 4508 2.9 59.3 North Central
Nevada 590 5149 11.5 65.2 West
New Hampshire 812 4281 3.3 57.6 Northeast
New Jersey 7333 5237 5.2 52.5 Northeast
New Mexico 1144 3601 9.7 55.2 West
New York 18076 4903 10.9 52.7 Northeast
North Carolina 5441 3875 11.1 38.5 South
North Dakota 637 5087 1.4 50.3 North Central
Ohio 10735 4561 7.4 53.2 North Central
Oklahoma 2715 3983 6.4 51.6 South
Oregon 2284 4660 4.2 60 West
Pennsylvania 11860 4449 6.1 50.2 Northeast
Rhode Island 931 4558 2.4 46.4 Northeast
South Carolina 2816 3635 11.6 37.8 South
South Dakota 681 4167 1.7 53.3 North Central
Tennessee 4173 3821 11 41.8 South
Texas 12237 4188 12.2 47.4 South
Utah 1203 4022 4.5 67.3 West
Vermont 472 3907 5.5 57.1 Northeast
Virginia 4981 4701 9.5 47.8 South
Washington 3559 4864 4.3 63.5 West
West Virginia 1799 3617 6.7 41.6 South
Wisconsin 4589 4468 3 54.5 North Central
Wyoming 376 4566 6.9 62.9 West
Loading