oneilsh · Leslie-C · Jun 3, 2025 · May 19, 2025 · May 19, 2025 · May 19, 2025
diff --git a/_bookdown_files/bookdown-demo_files/figure-html/nice-fig-1.png b/_bookdown_files/bookdown-demo_files/figure-html/nice-fig-1.png
diff --git a/_bookdown_files/bookdown-demo_files/figure-latex/nice-fig-1.pdf b/_bookdown_files/bookdown-demo_files/figure-latex/nice-fig-1.pdf
diff --git a/archive/code_raw.txt b/archive/code_raw.txt
diff --git a/data/expr_analyze.R b/data/expr_analyze.R
@@ -0,0 +1,42 @@
+#!/usr/bin/env Rscript
+
+library(stringr)
+library(dplyr)
+
+## This script analyzes the data stored in the data frame
+## expr_long_split.Rdata, containing gene expression data
+
+load("expr_long_split.Rdata")
+expr <- expr_long_split
+
+## Given a data frame with columns for expression,
+## genotype, and treatement, runs a linear model
+## and returns a single-row data frame with p-values in columns
+sub_df_to_pvals_df <- function(sub_df) {
+  lm1 <- lm(expression ~ genotype + treatment + genotype:treatment,
+            data = sub_df)
+
+  anova1 <- anova(lm1)
+  pvals1 <- anova1$"Pr(>F)"
+  pvals_list1 <- as.list(pvals1)
+  pvals_df1 <- data.frame(pvals_list1)
+  colnames(pvals_df1) <- rownames(anova1)
+
+  return(pvals_df1)
+}
+
+uniq_ids <- unique(expr$id)
+expr1 <- expr[expr$id %in% uniq_ids[1], ]
+
+pvals_df1 <- sub_df_to_pvals_df(expr1)
+
+## Run all data
+expr_by_id <- group_by(expr, id)
+pvals_df <- do(expr_by_id, sub_df_to_pvals_df(.))
+
+## Add BY-adjusted column
+pvals_df$interaction_BY <- p.adjust(pvals_df$"genotype:treatment")
+
+## Extract with BY-adjusted FDR < 0.05
+pvals_interaction_sig <- pvals_df[pvals_df$interaction_BY < 0.05, ]
+print(pvals_interaction_sig)
diff --git a/data/expr_long_coded.txt b/data/expr_long_coded.txt
diff --git a/data/expr_preprocess.R b/data/expr_preprocess.R
@@ -0,0 +1,43 @@
+#!/usr/bin/env Rscript
+
+library(stringr)
+
+expr_long <- read.table("expr_long_coded.txt",
+                        header = TRUE,
+                        sep = "\t",
+                        stringsAsFactors = FALSE)
+
+
+# Split sample column into 3-column matrix on _'s
+sample_split <- str_split_fixed(expr_long$sample, "_", 3)
+
+# Turn the matrix into a data frame with appropriate column names
+# and combine it with the original dataframe into a larger set
+sample_split_df <- data.frame(sample_split)
+colnames(sample_split_df) <- c("genotype", "treatment", "tissuerep")
+expr_long_split <- cbind(expr_long, sample_split_df)
+
+# Create an individual tissue column
+expr_long_split$tissue <- NA
+expr_long_split$tissue[str_detect(expr_long_split$tissuerep, "A")] <- "A"
+expr_long_split$tissue[str_detect(expr_long_split$tissuerep, "B")] <- "B"
+expr_long_split$tissue[str_detect(expr_long_split$tissuerep, "C")] <- "C"
+
+# We've already checked for NAs
+#print(expr_long_split[is.na(expr_long_split$tissue), ]) # should print 0 rows
+
+# Create a new rep column
+expr_long_split$rep <- NA
+expr_long_split$rep[str_detect(expr_long_split$tissuerep, "1")] <- "1"
+expr_long_split$rep[str_detect(expr_long_split$tissuerep, "2")] <- "2"
+expr_long_split$rep[str_detect(expr_long_split$tissuerep, "3")] <- "3"
+
+# We've already checked for NAs, but a few were left
+#print(expr_long_split[is.na(expr_long_split$rep), ]) # should print 0 rows
+
+# So we remove all rows with such "bad" IDs
+bad_ids <- expr_long_split$id[is.na(expr_long_split$rep)]
+bad_rows <- expr_long_split$id %in% bad_ids      # logical vector
+expr_long_split <- expr_long_split[!bad_rows, ]  # logical selection
+
+save(expr_long_split, file = "expr_long_split.Rdata")
diff --git a/data/expr_wide.txt b/data/expr_wide.txt
diff --git a/data/p450s_blastp_yeast_top1.txt b/data/p450s_blastp_yeast_top1.txt
@@ -0,0 +1,26 @@
+sp|Q3LFU0|CP1A1_BALAC	YHR007C	23.83	277	176	8	240	492	236	501	2e-12	67.4
+sp|P56590|CP1A1_CANFA	YHR007C	24.10	278	174	9	240	492	236	501	1e-14	74.7
+sp|Q06367|CP1A1_CAVPO	YDR402C	28.03	264	167	9	234	488	221	470	7e-13	68.6
+sp|Q92039|CP1A1_CHACA	YHR007C	23.81	231	142	7	284	492	281	499	8e-09	55.8
+sp|P79716|CP1A1_DICLA	YHR007C	23.34	287	175	11	242	498	236	507	3e-08	54.3
+sp|Q5KQT7|CP1A1_FELCA	YDR402C	28.20	266	166	10	242	497	220	470	4e-13	69.3
+sp|P04798|CP1A1_HUMAN	YHR007C	25.90	278	169	11	236	488	236	501	5e-13	69.3
+sp|O42430|CP1A1_LIMLI	YDR402C	27.80	277	152	12	243	498	221	470	8e-13	68.6
+sp|O42231|CP1A1_LIZAU	YDR402C	27.54	276	154	12	243	498	221	470	2e-12	67.0
+sp|Q9W683|CP1A1_LIZSA	YDR402C	24.13	460	276	19	66	498	57	470	8e-13	68.6
+sp|P33616|CP1A1_MACFA	YHR007C	24.82	278	172	10	236	488	236	501	5e-13	68.9
+sp|Q6GUR1|CP1A1_MACMU	YHR007C	24.82	278	172	10	236	488	236	501	3e-13	69.7
+sp|Q00557|CP1A1_MESAU	YHR007C	23.16	272	184	6	240	492	236	501	2e-13	70.5
+sp|Q92148|CP1A1_MICTO	YDR402C	27.07	266	168	12	239	494	221	470	7e-13	68.6
+sp|P00184|CP1A1_MOUSE	YDR402C	28.30	265	167	10	242	497	221	471	5e-14	72.0
+sp|Q92110|CP1A1_ONCMY	YDR402C	24.25	466	297	20	51	498	43	470	2e-12	67.4
+sp|Q92095|CP1A1_OPSTA	YDR402C	24.66	442	282	18	73	498	64	470	5e-13	69.3
+sp|Q6JZS3|CP1A1_ORYLA	YDR402C	25.48	263	176	9	243	498	221	470	7e-11	62.4
+sp|P98181|CP1A1_PAGMA	YDR402C	24.53	265	173	8	240	492	221	470	5e-09	56.2
+sp|Q9YH64|CP1A1_PLAFE	YDR402C	24.89	458	275	21	66	498	57	470	1e-13	71.2
+sp|Q92100|CP1A1_PLEPL	YDR402C	27.80	277	152	13	243	498	221	470	9e-13	68.2
+sp|P05176|CP1A1_RABIT	YDR402C	29.11	237	146	9	276	496	240	470	4e-13	69.3
+sp|P00185|CP1A1_RAT	YDR402C	27.55	265	169	9	242	497	221	471	1e-12	67.8
+sp|P56591|CP1A1_SHEEP	YDR402C	26.69	251	160	10	258	499	235	470	8e-11	62.4
+sp|O42457|CP1A1_SPAAU	YHR007C	22.68	291	172	10	241	497	236	507	2e-10	60.8
+sp|Q92116|CP1A1_STECH	YHR007C	24.22	289	170	11	241	497	236	507	6e-12	65.9
diff --git a/data/states.txt b/data/states.txt
@@ -0,0 +1,51 @@
+name	population	income	murder	hs_grad	region
+Alabama	3615	3624	15.1	41.3	South
+Alaska	365	6315	11.3	66.7	West
+Arizona	2212	4530	7.8	58.1	West
+Arkansas	2110	3378	10.1	39.9	South
+California	21198	5114	10.3	62.6	West
+Colorado	2541	4884	6.8	63.9	West
+Connecticut	3100	5348	3.1	56	Northeast
+Delaware	579	4809	6.2	54.6	South
+Florida	8277	4815	10.7	52.6	South
+Georgia	4931	4091	13.9	40.6	South
+Hawaii	868	4963	6.2	61.9	West
+Idaho	813	4119	5.3	59.5	West
+Illinois	11197	5107	10.3	52.6	North Central
+Indiana	5313	4458	7.1	52.9	North Central
+Iowa	2861	4628	2.3	59	North Central
+Kansas	2280	4669	4.5	59.9	North Central
+Kentucky	3387	3712	10.6	38.5	South
+Louisiana	3806	3545	13.2	42.2	South
+Maine	1058	3694	2.7	54.7	Northeast
+Maryland	4122	5299	8.5	52.3	South
+Massachusetts	5814	4755	3.3	58.5	Northeast
+Michigan	9111	4751	11.1	52.8	North Central	# mitten
+Minnesota	3921	4675	2.3	57.6	North Central
+Mississippi	2341	3098	12.5	41	South
+Missouri	4767	4254	9.3	48.8	North Central
+Montana	746	4347	5	59.2	West
+Nebraska	1544	4508	2.9	59.3	North Central
+Nevada	590	5149	11.5	65.2	West
+New Hampshire	812	4281	3.3	57.6	Northeast
+New Jersey	7333	5237	5.2	52.5	Northeast
+New Mexico	1144	3601	9.7	55.2	West
+New York	18076	4903	10.9	52.7	Northeast
+North Carolina	5441	3875	11.1	38.5	South
+North Dakota	637	5087	1.4	50.3	North Central
+Ohio	10735	4561	7.4	53.2	North Central
+Oklahoma	2715	3983	6.4	51.6	South
+Oregon	2284	4660	4.2	60	West
+Pennsylvania	11860	4449	6.1	50.2	Northeast
+Rhode Island	931	4558	2.4	46.4	Northeast
+South Carolina	2816	3635	11.6	37.8	South
+South Dakota	681	4167	1.7	53.3	North Central
+Tennessee	4173	3821	11	41.8	South
+Texas	12237	4188	12.2	47.4	South
+Utah	1203	4022	4.5	67.3	West
+Vermont	472	3907	5.5	57.1	Northeast
+Virginia	4981	4701	9.5	47.8	South
+Washington	3559	4864	4.3	63.5	West
+West Virginia	1799	3617	6.7	41.6	South
+Wisconsin	4589	4468	3	54.5	North Central
+Wyoming	376	4566	6.9	62.9	West