From 2536bfe2407496642bb7ceb591c82df023267558 Mon Sep 17 00:00:00 2001
From: Kimberly Dill-McFarland <kdillmcfarland@gmail.com>
Date: Mon, 14 Apr 2025 10:12:52 -0700
Subject: [PATCH 01/10] typo

---
 vignette/SEAsnake_vignette.Rmd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vignette/SEAsnake_vignette.Rmd b/vignette/SEAsnake_vignette.Rmd
index f0ab049..7e6a7c1 100755
--- a/vignette/SEAsnake_vignette.Rmd
+++ b/vignette/SEAsnake_vignette.Rmd
@@ -341,7 +341,7 @@ Your directory structure will look like
 library(DiagrammeR)
 grViz(diagram = "digraph flowchart {
   graph[rankdir = LR]
-  node [fontname = arial, shape = retangle, fontsize=10, height=0.1]
+  node [fontname = arial, shape = rectangle, fontsize=10, height=0.1]
   dir1 [label = '@@1']
   dir2 [label = '@@2']
   dir3 [label = '@@3']

From e8c54a068d386d5137fa351d13b9af54323405c9 Mon Sep 17 00:00:00 2001
From: Kimberly Dill-McFarland <kdillmcfarland@gmail.com>
Date: Thu, 5 Jun 2025 16:14:15 -0700
Subject: [PATCH 02/10] Update pipeline with cutadapt

---
 Snakefile_step2             | 9 +++------
 Snakefile_step2_single      | 8 +++-----
 environment/Hissss_env.yaml | 2 +-
 3 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/Snakefile_step2 b/Snakefile_step2
index 7aeff73..033dba1 100644
--- a/Snakefile_step2
+++ b/Snakefile_step2
@@ -36,10 +36,7 @@ rule adapterremoval:
         R2 = lambda wildcards: config["SampleList"][wildcards.sample]["R2"]
     output:
         R1 = "result/1_trim/{sample}_R1_trim.fastq.gz",
-        R2 = "result/1_trim/{sample}_R2_trim.fastq.gz",
-        single = "result/1_trim/{sample}_singleton.truncated.gz",
-        discard = "result/1_trim/{sample}.settings",
-        set = "result/1_trim/{sample}_discarded.gz"
+        R2 = "result/1_trim/{sample}_R2_trim.fastq.gz"
     params:
         trim5p = config["trim5p"],
         trimAdapt = config["trimAdapt"],
@@ -50,9 +47,9 @@ rule adapterremoval:
 
     run: 
         if params.trimAdapt == True:
-            shell("AdapterRemoval --file1 {input.R1} --file2 {input.R2} --output1 {output.R1} --output2 {output.R2} --singleton {output.single} --discarded {output.discard} --settings {output.set} --threads {threads} --gzip --maxns 1 --minlength 15 --trimqualities --minquality 30 --trim5p {params.trim5p} --adapter1 {params.adapter1} --adapter2 {params.adapter2}")
+            shell("cutadapt -g {params.adapter1} -a {params.adapter2} --cut {params.trim5p} --quality-cutoff 30 --max-n 1 --minimum-length 15 -o {output.R1} -p {output.R2} --cores={threads} {input.R1} {input.R2}")
         if params.trimAdapt == False:
-            shell("AdapterRemoval --file1 {input.R1} --file2 {input.R2} --output1 {output.R1} --output2 {output.R2} --singleton {output.single} --discarded {output.discard} --settings {output.set} --threads {threads} --gzip --maxns 1 --minlength 15 --trimqualities --minquality 30 --trim5p {params.trim5p}")
+            shell("cutadapt --cut {params.trim5p} --quality-cutoff 30 --max-n 1 --minimum-length 15 -o {output.R1} -p {output.R2} --cores={threads} {input.R1} {input.R2} ")
         if params.trimAdapt != True and params.trimAdapt != False:
             print("Error: Config trimAdapt parameter must be set to True or False.")
 
diff --git a/Snakefile_step2_single b/Snakefile_step2_single
index 1b466e7..a77e7df 100644
--- a/Snakefile_step2_single
+++ b/Snakefile_step2_single
@@ -30,9 +30,7 @@ rule adapterremoval:
     input:
         R1 = lambda wildcards: config["SampleList"][wildcards.sample]["R1"]
     output:
-        R1 = "result/1_trim/{sample}_R1_trim.fastq.gz",
-        discard = "result/1_trim/{sample}.settings",
-        set = "result/1_trim/{sample}_discarded.gz"
+        R1 = "result/1_trim/{sample}_R1_trim.fastq.gz"
     params:
         trim5p = config["trim5p"],
         trimAdapt = config["trimAdapt"],
@@ -42,9 +40,9 @@ rule adapterremoval:
 
     run: 
         if params.trimAdapt == True:
-            shell("AdapterRemoval --file1 {input.R1} --output1 {output.R1} --discarded {output.discard} --settings {output.set} --threads {threads} --gzip --maxns 1 --minlength 15 --trimqualities --minquality 30 --trim5p {params.trim5p} --adapter1 {params.adapter1}")
+            shell("cutadapt -g {params.adapter1} --cut {params.trim5p} --quality-cutoff 30 --max-n 1 --minimum-length 15 -o {output.R1} --cores={threads} {input.R1}")
         if params.trimAdapt == False:
-            shell("AdapterRemoval --file1 {input.R1} --output1 {output.R1} --discarded {output.discard} --settings {output.set} --threads {threads} --gzip --maxns 1 --minlength 15 --trimqualities --minquality 30 --trim5p {params.trim5p}")
+            shell("cutadapt --quality-cutoff 30 --max-n 1 --minimum-length 15 -o {output.R1} --cores={threads} {input.R1}")
         if params.trimAdapt != True and params.trimAdapt != False:
             print("Error: Config trimAdapt parameter must be set to True or False.")
 
diff --git a/environment/Hissss_env.yaml b/environment/Hissss_env.yaml
index df60ab9..0ea9690 100644
--- a/environment/Hissss_env.yaml
+++ b/environment/Hissss_env.yaml
@@ -12,7 +12,7 @@ dependencies:
   - bwa >=0.7
   - pysam >=0.15
   - fastqc >=0.11.9
-  - adapterremoval >=2.3.2
+  - cutadapt >=5.1
   - bedtools >=2.30.0
   - picard >=2.27.1
   - star >=2.7.9a

From 97b349992bb23ea0f7bdfc4eb40389156d5cc0b2 Mon Sep 17 00:00:00 2001
From: Kimberly Dill-McFarland <kdillmcfarland@gmail.com>
Date: Thu, 5 Jun 2025 16:15:39 -0700
Subject: [PATCH 03/10] update vignette

---
 vignette/SEAsnake_vignette.Rmd  |  6 +++---
 vignette/SEAsnake_vignette.html | 16 ++++++++--------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/vignette/SEAsnake_vignette.Rmd b/vignette/SEAsnake_vignette.Rmd
index 7e6a7c1..fc593f8 100755
--- a/vignette/SEAsnake_vignette.Rmd
+++ b/vignette/SEAsnake_vignette.Rmd
@@ -19,7 +19,7 @@ urlcolor: blue
 [SEAsnake](https://github.com/BIGslu/SEAsnake) is a [snakemake](https://snakemake.readthedocs.io/en/stable/) pipeline to process bulk RNA-seq data from fastq sequences to gene counts. It includes the following steps. You can see an in-depth example of how to run these steps separately outside SEAsnake in our [first tutorial](https://github.com/BIGslu/tutorials/blob/main/RNAseq/1.Hawn_RNAseq_fastq.to.counts.pdf).
 
 1. Quality assess sequences with [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)
-2. Remove adapters and filter low quality sequences with [AdapterRemoval](https://adapterremoval.readthedocs.io/en/latest/)
+2. Remove adapters and filter low quality sequences with [cutadapt](https://cutadapt.readthedocs.io/en/stable/index.html)
     - max N = 1
     - min length = 15
     - min quality = 30
@@ -60,7 +60,7 @@ SEAsnake step 1 completes 1 fastq in 30 to 45 min. Since this step is run in par
 
 SEAsnake step 2 completes 1 set of paired end fastq in roughly 10 hours. Using the same example, if you have 40 fastq from 20 paired end samples running on 14 cores, this is 20 / 14 * 10 = 14.3 hours. Non-paired end samples takes about half the time.
 
-Step 2 will take less time if you do not have adapter contamination requiring alignment in `AdapterRemoval` (- 1 hour per single fastq or paired-end sample) or if you already have an indexed reference genome for `STAR` (- 1 hour from total time).
+Step 2 will take less time if you do not have adapter contamination requiring alignment in `cutadapt` (- 1 hour per single fastq or paired-end sample) or if you already have an indexed reference genome for `STAR` (- 1 hour from total time).
 
 We are working on improving speed so please check for updates periodically.
 
@@ -545,7 +545,7 @@ STAR_align            2              3              3
 STAR_index            1              3              3
 STAR_load             1              3              3
 STAR_remove           1              1              1
-adapterremoval        2              1              1
+cutadapt              2              1              1
 align_filter          2              1              1
 all                   1              1              1
 combine               1              1              1
diff --git a/vignette/SEAsnake_vignette.html b/vignette/SEAsnake_vignette.html
index 70ddf98..9b4a9db 100755
--- a/vignette/SEAsnake_vignette.html
+++ b/vignette/SEAsnake_vignette.html
@@ -2721,7 +2721,7 @@
 <h1 class="title toc-ignore">SEAsnake vignette</h1>
 <h3 class="subtitle">snakemake pipeline from fastq to counts</h3>
 <h4 class="author">Kim Dill-McFarland, <a href="mailto:kadm@uw.edu" class="email">kadm@uw.edu</a></h4>
-<h4 class="date">version February 10, 2025</h4>
+<h4 class="date">version June 05, 2025</h4>
 
 </div>
 
@@ -2735,7 +2735,7 @@ <h1>Introduction</h1>
 tutorial</a>.</p>
 <ol style="list-style-type: decimal">
 <li>Quality assess sequences with <a href="https://www.bioinformatics.babraham.ac.uk/projects/fastqc/">FastQC</a></li>
-<li>Remove adapters and filter low quality sequences with <a href="https://adapterremoval.readthedocs.io/en/latest/">AdapterRemoval</a>
+<li>Remove adapters and filter low quality sequences with <a href="https://cutadapt.readthedocs.io/en/stable/index.html">cutadapt</a>
 <ul>
 <li>max N = 1</li>
 <li>min length = 15</li>
@@ -2817,9 +2817,9 @@ <h3>Time</h3>
 samples running on 14 cores, this is 20 / 14 * 10 = 14.3 hours.
 Non-paired end samples takes about half the time.</p>
 <p>Step 2 will take less time if you do not have adapter contamination
-requiring alignment in <code>AdapterRemoval</code> (- 1 hour per single
-fastq or paired-end sample) or if you already have an indexed reference
-genome for <code>STAR</code> (- 1 hour from total time).</p>
+requiring alignment in <code>cutadapt</code> (- 1 hour per single fastq
+or paired-end sample) or if you already have an indexed reference genome
+for <code>STAR</code> (- 1 hour from total time).</p>
 <p>We are working on improving speed so please check for updates
 periodically.</p>
 </div>
@@ -3089,8 +3089,8 @@ <h3>Step 1</h3>
 retains all messages and errors in the log file.</em></p>
 <pre class="bash"><code>nohup snakemake --snakefile Snakefile_step1 --cores $cores &gt;&gt; log/SEAsnake_step1.log 2&gt;&amp;1 &amp;</code></pre>
 <p>Your directory structure will look like</p>
-<div class="grViz html-widget html-fill-item" id="htmlwidget-736e6129efc82c362d16" style="width:672px;height:576px;"></div>
-<script type="application/json" data-for="htmlwidget-736e6129efc82c362d16">{"x":{"diagram":"digraph flowchart {\n  graph[rankdir = LR]\n  node [fontname = arial, shape = retangle, fontsize=10, height=0.1]\n  dir1 [label = \"SEAsnake/\"]\n  dir2 [label = \"data/\"]\n  dir3 [label = \"1_trim/\"]\n  dir4 [label = \"2_bam/\"]\n  dir5 [label = \"3_bam_filter/\"]\n  dir6 [label = \"4_count/\"]\n  dir7 [label = \"5_combined/\"]\n  dir8 [label = \"qc/\"]\n  dir9 [label = \"1_fastqc_raw/\"]\n  dir10 [label = \"2_fastqc_trim/\"]\n  dir11 [label = \"3_flagstat/\"]\n  dir12 [label = \"4_picard/\"]\n  dir13 [label = \"log/\"]\n  dir14 [label = \"ref/\"]\n  dir15 [label = \"result/\"]\n  dir16 [label = \"PICARDref/\"]\n  dir17 [label = \"release###/\"]\n  dir18 [label = \"STARindex/\"]\n  dir19 [label = \"STARref/\"]\n\n  dir1 -> dir15,dir2,dir13,dir14;\n  dir15 -> dir3,dir4,dir5,dir6,dir7,dir8;\n  dir8 -> dir9,dir10,dir11,dir12;\n  dir14 -> dir16,dir17;\n  dir17 -> dir18,dir19;\n  }","config":{"engine":"dot","options":null}},"evals":[],"jsHooks":[]}</script>
+<div class="grViz html-widget html-fill-item" id="htmlwidget-13b8826d522447be3bd5" style="width:672px;height:576px;"></div>
+<script type="application/json" data-for="htmlwidget-13b8826d522447be3bd5">{"x":{"diagram":"digraph flowchart {\n  graph[rankdir = LR]\n  node [fontname = arial, shape = rectangle, fontsize=10, height=0.1]\n  dir1 [label = \"SEAsnake/\"]\n  dir2 [label = \"data/\"]\n  dir3 [label = \"1_trim/\"]\n  dir4 [label = \"2_bam/\"]\n  dir5 [label = \"3_bam_filter/\"]\n  dir6 [label = \"4_count/\"]\n  dir7 [label = \"5_combined/\"]\n  dir8 [label = \"qc/\"]\n  dir9 [label = \"1_fastqc_raw/\"]\n  dir10 [label = \"2_fastqc_trim/\"]\n  dir11 [label = \"3_flagstat/\"]\n  dir12 [label = \"4_picard/\"]\n  dir13 [label = \"log/\"]\n  dir14 [label = \"ref/\"]\n  dir15 [label = \"result/\"]\n  dir16 [label = \"PICARDref/\"]\n  dir17 [label = \"release###/\"]\n  dir18 [label = \"STARindex/\"]\n  dir19 [label = \"STARref/\"]\n\n  dir1 -> dir15,dir2,dir13,dir14;\n  dir15 -> dir3,dir4,dir5,dir6,dir7,dir8;\n  dir8 -> dir9,dir10,dir11,dir12;\n  dir14 -> dir16,dir17;\n  dir17 -> dir18,dir19;\n  }","config":{"engine":"dot","options":null}},"evals":[],"jsHooks":[]}</script>
 <div id="breaking-down-the-snakemake-command" class="section level4">
 <h4>Breaking down the <code>snakemake</code> command</h4>
 <p>For those less familiar with bash scripting, here is a detailed break
@@ -3275,7 +3275,7 @@ <h3>Step 2</h3>
 STAR_index            1              3              3
 STAR_load             1              3              3
 STAR_remove           1              1              1
-adapterremoval        2              1              1
+cutadapt              2              1              1
 align_filter          2              1              1
 all                   1              1              1
 combine               1              1              1

From f3e5919afdf78308024b32affdc885d1f81670f1 Mon Sep 17 00:00:00 2001
From: Kimberly Dill-McFarland <kdillmcfarland@gmail.com>
Date: Thu, 5 Jun 2025 16:24:41 -0700
Subject: [PATCH 04/10] typo

---
 Snakefile_step2        | 34 +++++++++++++++++-----------------
 Snakefile_step2_single | 26 +++++++++++++-------------
 2 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/Snakefile_step2 b/Snakefile_step2
index 033dba1..448d4c8 100644
--- a/Snakefile_step2
+++ b/Snakefile_step2
@@ -3,7 +3,7 @@ configfile: 'result/config.yaml'
 ## INPUTS / OUTPUTS
 SUFFIX = ["R1_trim", "R2_trim"]
 SAMPLES = config["SampleList"]
-    
+
 TRIM1 = expand('result/1_trim/{sample}_R1_trim.fastq.gz', sample=SAMPLES)
 TRIM2 = expand('result/1_trim/{sample}_R2_trim.fastq.gz', sample=SAMPLES)
 QC_trim_html = expand("result/qc/2_fastqc_trim/{sample}_{suf}_fastqc.html", sample=SAMPLES, suf=SUFFIX)
@@ -24,8 +24,8 @@ rule all:
         *( ['result/5_combined/combined_picard.tsv'] if config["picard"] else [] ),
         'log/benchmark/STAR_index.benchmark.txt',
         'log/benchmark/STAR_load.benchmark.txt'
-        
-        
+
+
 ##----------------------------------------##
 ## 1. Adapter removal & quality filtering ##
 ##----------------------------------------##
@@ -45,11 +45,11 @@ rule adapterremoval:
     threads: config["threads"] * 0.1
     message: "Adapter removal & quality filtering"
 
-    run: 
+    run:
         if params.trimAdapt == True:
-            shell("cutadapt -g {params.adapter1} -a {params.adapter2} --cut {params.trim5p} --quality-cutoff 30 --max-n 1 --minimum-length 15 -o {output.R1} -p {output.R2} --cores={threads} {input.R1} {input.R2}")
+            shell("cutadapt -g {params.adapter1} -a {params.adapter2} --cut {params.trim5p} --quality-cutoff 30 --max-n 1 --minimum-length 15 -o {output.R1} -p {output.R2} --cores {threads} {input.R1} {input.R2}")
         if params.trimAdapt == False:
-            shell("cutadapt --cut {params.trim5p} --quality-cutoff 30 --max-n 1 --minimum-length 15 -o {output.R1} -p {output.R2} --cores={threads} {input.R1} {input.R2} ")
+            shell("cutadapt --cut {params.trim5p} --quality-cutoff 30 --max-n 1 --minimum-length 15 -o {output.R1} -p {output.R2} --cores {threads} {input.R1} {input.R2} ")
         if params.trimAdapt != True and params.trimAdapt != False:
             print("Error: Config trimAdapt parameter must be set to True or False.")
 
@@ -77,7 +77,7 @@ STAR_index_SA = 'ref/release' + config["release"] + '/STARindex/SA'
 if (not os.path.exists(STAR_index_SA)):
     rule STAR_index:
         input: expand('result/qc/2_fastqc_trim/{sample}_{suf}_fastqc.html', sample=SAMPLES, suf=SUFFIX)
-        output: 
+        output:
             IDX_done = touch('log/benchmark/STAR_index.benchmark.txt')
         params:
             release = config["release"],
@@ -89,14 +89,14 @@ if (not os.path.exists(STAR_index_SA)):
 else:
     rule skip_index:
         input: expand('result/qc/2_fastqc_trim/{sample}_{suf}_fastqc.html', sample=SAMPLES, suf=SUFFIX)
-        output: 
+        output:
             IDX_done = touch('log/benchmark/STAR_index.benchmark.txt')
         message: "Genome index already present. Skipping indexing rule."
 
 rule STAR_load:
     input:
         IDX_done = 'log/benchmark/STAR_index.benchmark.txt'
-    output: 
+    output:
         LOAD_done = touch('log/benchmark/STAR_load.benchmark.txt')
     params:
         release = config["release"]
@@ -104,13 +104,13 @@ rule STAR_load:
     message: 'Loading genome index'
     run:
         shell("STAR --genomeDir 'ref/release{params.release}/STARindex' --runThreadN {threads} --runRNGseed 8756 --genomeLoad LoadAndExit")
-        
+
 rule STAR_align:
     input:
         R1 = "result/1_trim/{sample}_R1_trim.fastq.gz",
         R2 = "result/1_trim/{sample}_R2_trim.fastq.gz",
         LOAD_done = 'log/benchmark/STAR_load.benchmark.txt'
-    output: 
+    output:
         BAM = 'result/2_bam/{sample}_Aligned.sortedByCoord.out.bam'
     params:
         OUT = 'result/2_bam/{sample}_',
@@ -119,12 +119,12 @@ rule STAR_align:
     message: 'Aligning reads'
     run:
         shell("STAR --genomeDir 'ref/release{params.release}/STARindex' --readFilesIn {input.R1} {input.R2} --readFilesCommand zcat --outFileNamePrefix {params.OUT} --outSAMtype BAM SortedByCoordinate --runThreadN {threads} --runRNGseed 8756 --genomeLoad LoadAndKeep --limitBAMsortRAM 20000000000")
-  
+
 rule STAR_remove:
     input:
         LOAD_done = 'log/benchmark/STAR_load.benchmark.txt',
         BAM = expand('result/2_bam/{sample}_Aligned.sortedByCoord.out.bam', sample=SAMPLES)
-    output: 
+    output:
         REMOVE_done = touch('log/benchmark/STAR_remove.benchmark.txt')
     params:
         release = config["release"]
@@ -140,10 +140,10 @@ rule STAR_remove:
 ##----------------------------------------##
 
 rule align_filter:
-    input: 
+    input:
         BAM = 'result/2_bam/{sample}_Aligned.sortedByCoord.out.bam',
         REMOVE_done = 'log/benchmark/STAR_remove.benchmark.txt'
-    output: 
+    output:
         BAM2 = 'result/3_bam_filter/{sample}_Aligned.sortedByCoord.filter.bam',
         BAI = 'result/3_bam_filter/{sample}_Aligned.sortedByCoord.filter.bam.bai'
     threads: config["threads"] * 0.1
@@ -151,9 +151,9 @@ rule align_filter:
     shell:
         """
         samtools view {input.BAM} -h -b -f 3 -F 1284 -q 30 -@ {threads} > {output.BAM2}
-        samtools index -@ {threads} {output.BAM2} > {output.BAI} 
+        samtools index -@ {threads} {output.BAM2} > {output.BAI}
         """
- 
+
 ##----------------------------------------##
 ## 5. ALIGNMENT QC                        ##
 ##----------------------------------------##
diff --git a/Snakefile_step2_single b/Snakefile_step2_single
index a77e7df..3682716 100644
--- a/Snakefile_step2_single
+++ b/Snakefile_step2_single
@@ -2,7 +2,7 @@ configfile: 'result/config.yaml'
 
 ## INPUTS / OUTPUTS
 SAMPLES = config["SampleList"]
-    
+
 TRIM1 = expand('result/1_trim/{sample}_R1_trim.fastq.gz', sample=SAMPLES)
 QC_trim_html = expand("result/qc/2_fastqc_trim/{sample}_R1_trim_fastqc.html", sample=SAMPLES)
 QC_trim_zip = expand("result/qc/2_fastqc_trim/{sample}_R1_trim_fastqc.zip", sample=SAMPLES)
@@ -21,7 +21,7 @@ rule all:
         *( ['result/5_combined/combined_picard.tsv'] if config["picard"] else [] ),
         'log/benchmark/STAR_index.benchmark.txt',
         'log/benchmark/STAR_load.benchmark.txt'
-        
+
 ##----------------------------------------##
 ## 1. Adapter removal & quality filtering ##
 ##----------------------------------------##
@@ -38,11 +38,11 @@ rule adapterremoval:
     threads: config["threads"] * 0.1
     message: "Adapter removal & quality filtering"
 
-    run: 
+    run:
         if params.trimAdapt == True:
-            shell("cutadapt -g {params.adapter1} --cut {params.trim5p} --quality-cutoff 30 --max-n 1 --minimum-length 15 -o {output.R1} --cores={threads} {input.R1}")
+            shell("cutadapt -g {params.adapter1} --cut {params.trim5p} --quality-cutoff 30 --max-n 1 --minimum-length 15 -o {output.R1} --cores {threads} {input.R1}")
         if params.trimAdapt == False:
-            shell("cutadapt --quality-cutoff 30 --max-n 1 --minimum-length 15 -o {output.R1} --cores={threads} {input.R1}")
+            shell("cutadapt --quality-cutoff 30 --max-n 1 --minimum-length 15 -o {output.R1} --cores {threads} {input.R1}")
         if params.trimAdapt != True and params.trimAdapt != False:
             print("Error: Config trimAdapt parameter must be set to True or False.")
 
@@ -70,7 +70,7 @@ STAR_index_SA = 'ref/release' + config["release"] + '/STARindex/SA'
 if (not os.path.exists(STAR_index_SA)):
     rule STAR_index:
         input: expand('result/qc/2_fastqc_trim/{sample}_R1_trim_fastqc.html', sample=SAMPLES)
-        output: 
+        output:
             IDX_done = touch('log/benchmark/STAR_index.benchmark.txt')
         params:
             release = config["release"],
@@ -82,14 +82,14 @@ if (not os.path.exists(STAR_index_SA)):
 else:
     rule skip_index:
         input: expand('result/qc/2_fastqc_trim/{sample}_R1_trim_fastqc.html', sample=SAMPLES)
-        output: 
+        output:
             IDX_done = touch('log/benchmark/STAR_index.benchmark.txt')
         message: "Genome index already present. Skipping indexing rule."
 
 rule STAR_load:
     input:
         IDX_done = 'log/benchmark/STAR_index.benchmark.txt'
-    output: 
+    output:
         LOAD_done = touch('log/benchmark/STAR_load.benchmark.txt')
     params:
         release = config["release"]
@@ -97,12 +97,12 @@ rule STAR_load:
     message: 'Loading genome index'
     run:
         shell("STAR --genomeDir 'ref/release{params.release}/STARindex' --runThreadN {threads} --runRNGseed 8756 --genomeLoad LoadAndExit")
-        
+
 rule STAR_align:
     input:
         R1 = "result/1_trim/{sample}_R1_trim.fastq.gz",
         LOAD_done = 'log/benchmark/STAR_load.benchmark.txt'
-    output: 
+    output:
         BAM = 'result/2_bam/{sample}_Aligned.sortedByCoord.out.bam'
     params:
         OUT = 'result/2_bam/{sample}_',
@@ -111,12 +111,12 @@ rule STAR_align:
     message: 'Aligning reads'
     run:
         shell("STAR --genomeDir 'ref/release{params.release}/STARindex' --readFilesIn {input.R1} --readFilesCommand zcat --outFileNamePrefix {params.OUT} --outSAMtype BAM SortedByCoordinate --runThreadN {threads} --runRNGseed 8756 --genomeLoad LoadAndKeep --limitBAMsortRAM 20000000000")
-  
+
 rule STAR_remove:
     input:
         LOAD_done = 'log/benchmark/STAR_load.benchmark.txt',
         BAM = expand('result/2_bam/{sample}_Aligned.sortedByCoord.out.bam', sample=SAMPLES)
-    output: 
+    output:
         REMOVE_done = touch('log/benchmark/STAR_remove.benchmark.txt')
     params:
         release = config["release"]
@@ -132,7 +132,7 @@ rule STAR_remove:
 ##----------------------------------------##
 
 rule align_filter:
-    input: 
+    input:
         BAM = 'result/2_bam/{sample}_Aligned.sortedByCoord.out.bam',
         REMOVE_done = 'log/benchmark/STAR_remove.benchmark.txt'
     output: 'result/3_bam_filter/{sample}_Aligned.sortedByCoord.filter.bam'

From fdc2983a3180b55f871b99d44b070f65797c877b Mon Sep 17 00:00:00 2001
From: Kimberly Dill-McFarland <kdillmcfarland@gmail.com>
Date: Thu, 11 Sep 2025 16:45:27 -0700
Subject: [PATCH 05/10] Add python dependencies

---
 environment/Hissss_env.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/environment/Hissss_env.yaml b/environment/Hissss_env.yaml
index 0ea9690..3909331 100644
--- a/environment/Hissss_env.yaml
+++ b/environment/Hissss_env.yaml
@@ -17,3 +17,5 @@ dependencies:
   - picard >=2.27.1
   - star >=2.7.9a
   - subread >=2.0.1
+  - python >=3.11
+  - pandas >=1.6

From c5782f6624fa55bb3748fc37ddf7353c74bcb404 Mon Sep 17 00:00:00 2001
From: Kimberly Dill-McFarland <kdillmcfarland@gmail.com>
Date: Thu, 11 Sep 2025 16:45:40 -0700
Subject: [PATCH 06/10] rm sudo usage

---
 scripts/create_config.sh | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/scripts/create_config.sh b/scripts/create_config.sh
index 5ff0431..efe713c 100755
--- a/scripts/create_config.sh
+++ b/scripts/create_config.sh
@@ -6,7 +6,7 @@
 
 SampleList="data/*_R1*"
 
-sudo echo "SampleList:" > result/config.yaml
+echo "SampleList:" > result/config.yaml
 
 spacer1=": "
 
@@ -25,11 +25,11 @@ sample2=`echo "${sample/R1/R2}"`
 sample_name=`echo "$(basename $sample)" | grep -o '^.*_L[0-9][0-9][0-9]' | sed 's/_L[0-9][0-9][0-9]$//'`
 
 # Add sample name to config
-sudo echo "  " "$sample_name$spacer1" >> result/config.yaml
-sudo echo "    sample: '"$sample_name"'" >> result/config.yaml
+echo "  " "$sample_name$spacer1" >> result/config.yaml
+echo "    sample: '"$sample_name"'" >> result/config.yaml
 # Add fastq files to config
-sudo echo "    R1: '"$sample"'" >> result/config.yaml
-sudo echo "    R2: '"$sample2"'" >> result/config.yaml
+echo "    R1: '"$sample"'" >> result/config.yaml
+echo "    R2: '"$sample2"'" >> result/config.yaml
 done
 
 #################################
@@ -44,7 +44,7 @@ cores2=1
 fi
 
 # Add default param to config
-sudo echo "
+echo "
 
 # Adapter removal
 ## Base pairs to trim from 5' end
@@ -58,8 +58,8 @@ adapter2: AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT
 
 ## Species the format 'Homo_sapiens.GRCh38' or 'Mus_musculus.GRCm39'
 genome: 'Homo_sapiens.GRCh38'
-## Genome release number. Current as of 2022.04.14
-release: '106'
+## Genome release number. Current as of 2025.06.05
+release: '114'
 
 # Alignment metrics
 ## Run Picard?
@@ -71,17 +71,17 @@ threads: $cores2
 
 # Setup directory structure
 
-sudo mkdir -p -m 777 'result/qc/1_fastqc_raw'
-sudo mkdir -p -m 777 'result/qc/2_fastqc_trim'
-sudo mkdir -p -m 777 'result/qc/3_flagstat'
-sudo mkdir -p -m 777 'result/qc/4_picard'
+mkdir -p -m 777 'result/qc/1_fastqc_raw'
+mkdir -p -m 777 'result/qc/2_fastqc_trim'
+mkdir -p -m 777 'result/qc/3_flagstat'
+mkdir -p -m 777 'result/qc/4_picard'
 
-sudo mkdir -p -m 777 'result/1_trim'
-sudo mkdir -p -m 777 'result/2_bam'
-sudo mkdir -p -m 777 'result/3_bam_filter'
-sudo mkdir -p -m 777 'result/4_count'
-sudo mkdir -p -m 777 'result/5_combined'
+mkdir -p -m 777 'result/1_trim'
+mkdir -p -m 777 'result/2_bam'
+mkdir -p -m 777 'result/3_bam_filter'
+mkdir -p -m 777 'result/4_count'
+mkdir -p -m 777 'result/5_combined'
 
-sudo mkdir -p -m 777 'ref'
-sudo mkdir -p -m 777 'log'
+mkdir -p -m 777 'ref'
+mkdir -p -m 777 'log'
 

From 5ce03661de7abe532be9829481e1e609431eb2c1 Mon Sep 17 00:00:00 2001
From: Kimberly Dill-McFarland <kdillmcfarland@gmail.com>
Date: Thu, 11 Sep 2025 16:46:08 -0700
Subject: [PATCH 07/10] Make robust to small # of threads input

---
 Snakefile_step2        | 16 ++++++++--------
 Snakefile_step2_single | 16 ++++++++--------
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/Snakefile_step2 b/Snakefile_step2
index 448d4c8..dc3030e 100644
--- a/Snakefile_step2
+++ b/Snakefile_step2
@@ -42,7 +42,7 @@ rule adapterremoval:
         trimAdapt = config["trimAdapt"],
         adapter1 = config["adapter1"],
         adapter2 = config["adapter2"]
-    threads: config["threads"] * 0.1
+    threads: max(1, int(config["threads"]*0.1))
     message: "Adapter removal & quality filtering"
 
     run:
@@ -62,7 +62,7 @@ rule fastqc_trim:
     output:
         HTML = 'result/qc/2_fastqc_trim/{sample}_{suf}_fastqc.html',
         ZIP = 'result/qc/2_fastqc_trim/{sample}_{suf}_fastqc.zip'
-    threads: config["threads"] * 0.1
+    threads: max(1, int(config["threads"]*0.1))
     message: "Quality assessment, FastQC"
     shell:
         """
@@ -82,7 +82,7 @@ if (not os.path.exists(STAR_index_SA)):
         params:
             release = config["release"],
             genome = config["genome"]
-        threads: config["threads"] * 0.5
+        threads: max(1, int(config["threads"]*0.5))
         message: "Generating genome index"
         run:
             shell("scripts/STAR_index.sh {params.release} {params.genome} {threads}")
@@ -100,7 +100,7 @@ rule STAR_load:
         LOAD_done = touch('log/benchmark/STAR_load.benchmark.txt')
     params:
         release = config["release"]
-    threads: config["threads"] * 0.5
+    threads: max(1, int(config["threads"]*0.5))
     message: 'Loading genome index'
     run:
         shell("STAR --genomeDir 'ref/release{params.release}/STARindex' --runThreadN {threads} --runRNGseed 8756 --genomeLoad LoadAndExit")
@@ -115,7 +115,7 @@ rule STAR_align:
     params:
         OUT = 'result/2_bam/{sample}_',
         release = config["release"]
-    threads: config["threads"] * 0.25
+    threads: max(1, int(config["threads"]*0.25))
     message: 'Aligning reads'
     run:
         shell("STAR --genomeDir 'ref/release{params.release}/STARindex' --readFilesIn {input.R1} {input.R2} --readFilesCommand zcat --outFileNamePrefix {params.OUT} --outSAMtype BAM SortedByCoordinate --runThreadN {threads} --runRNGseed 8756 --genomeLoad LoadAndKeep --limitBAMsortRAM 20000000000")
@@ -146,7 +146,7 @@ rule align_filter:
     output:
         BAM2 = 'result/3_bam_filter/{sample}_Aligned.sortedByCoord.filter.bam',
         BAI = 'result/3_bam_filter/{sample}_Aligned.sortedByCoord.filter.bam.bai'
-    threads: config["threads"] * 0.1
+    threads: max(1, int(config["threads"]*0.1))
     message: "Filtering alignments"
     shell:
         """
@@ -161,7 +161,7 @@ rule align_filter:
 rule flagstat:
     input: 'result/3_bam_filter/{sample}_Aligned.sortedByCoord.filter.bam'
     output: 'result/qc/3_flagstat/{sample}_flagstat.tsv'
-    threads: config["threads"] * 0.1
+    threads: max(1, int(config["threads"]*0.1))
     message: "Quality control, flagstat"
     run:
         shell("samtools flagstat -@ {threads} {input} > {output}")
@@ -194,7 +194,7 @@ rule fcount:
     params:
         release = config["release"],
         genome = config["genome"]
-    threads: config["threads"] * 0.1
+    threads: max(1, int(config["threads"]*0.1))
     message: "Counting features"
     run:
         shell("featureCounts -T {threads} -g gene_id -t exon -p -a 'ref/release{params.release}/STARref/{params.genome}.{params.release}.gtf' -o {output} {input.bam}")
diff --git a/Snakefile_step2_single b/Snakefile_step2_single
index 3682716..3149a2d 100644
--- a/Snakefile_step2_single
+++ b/Snakefile_step2_single
@@ -35,7 +35,7 @@ rule adapterremoval:
         trim5p = config["trim5p"],
         trimAdapt = config["trimAdapt"],
         adapter1 = config["adapter1"]
-    threads: config["threads"] * 0.1
+    threads: max(1, int(config["threads"]*0.1))
     message: "Adapter removal & quality filtering"
 
     run:
@@ -55,7 +55,7 @@ rule fastqc_trim:
     output:
         HTML = 'result/qc/2_fastqc_trim/{sample}_R1_trim_fastqc.html',
         ZIP = 'result/qc/2_fastqc_trim/{sample}_R1_trim_fastqc.zip'
-    threads: config["threads"] * 0.1
+    threads: max(1, int(config["threads"]*0.1))
     message: "Quality assessment, FastQC"
     shell:
         """
@@ -75,7 +75,7 @@ if (not os.path.exists(STAR_index_SA)):
         params:
             release = config["release"],
             genome = config["genome"]
-        threads: config["threads"] * 0.5
+        threads: max(1, int(config["threads"]*0.5))
         message: "Generating genome index"
         run:
             shell("scripts/STAR_index.sh {params.release} {params.genome} {threads}")
@@ -93,7 +93,7 @@ rule STAR_load:
         LOAD_done = touch('log/benchmark/STAR_load.benchmark.txt')
     params:
         release = config["release"]
-    threads: config["threads"] * 0.5
+    threads: max(1, int(config["threads"]*0.5))
     message: 'Loading genome index'
     run:
         shell("STAR --genomeDir 'ref/release{params.release}/STARindex' --runThreadN {threads} --runRNGseed 8756 --genomeLoad LoadAndExit")
@@ -107,7 +107,7 @@ rule STAR_align:
     params:
         OUT = 'result/2_bam/{sample}_',
         release = config["release"]
-    threads: config["threads"] * 0.25
+    threads: max(1, int(config["threads"]*0.25))
     message: 'Aligning reads'
     run:
         shell("STAR --genomeDir 'ref/release{params.release}/STARindex' --readFilesIn {input.R1} --readFilesCommand zcat --outFileNamePrefix {params.OUT} --outSAMtype BAM SortedByCoordinate --runThreadN {threads} --runRNGseed 8756 --genomeLoad LoadAndKeep --limitBAMsortRAM 20000000000")
@@ -136,7 +136,7 @@ rule align_filter:
         BAM = 'result/2_bam/{sample}_Aligned.sortedByCoord.out.bam',
         REMOVE_done = 'log/benchmark/STAR_remove.benchmark.txt'
     output: 'result/3_bam_filter/{sample}_Aligned.sortedByCoord.filter.bam'
-    threads: config["threads"] * 0.1
+    threads: max(1, int(config["threads"]*0.1))
     message: "Filtering alignments"
     run:
         shell("samtools view {input.BAM} -b -h -F 1284 -q 30 -@ {threads} > {output}")
@@ -149,7 +149,7 @@ rule align_filter:
 rule flagstat:
     input: 'result/3_bam_filter/{sample}_Aligned.sortedByCoord.filter.bam'
     output: 'result/qc/3_flagstat/{sample}_flagstat.tsv'
-    threads: config["threads"] * 0.1
+    threads: max(1, int(config["threads"]*0.1))
     message: "Quality control, flagstat"
     run:
         shell("samtools flagstat -@ {threads} {input} > {output}")
@@ -182,7 +182,7 @@ rule fcount:
     params:
         release = config["release"],
         genome = config["genome"]
-    threads: config["threads"] * 0.1
+    threads: max(1, int(config["threads"]*0.1))
     message: "Counting features"
     run:
         shell("featureCounts -T {threads} -g gene_id -t exon -p -a 'ref/release{params.release}/STARref/{params.genome}.{params.release}.gtf' -o {output} {input.bam}")

From 935968109ffa9eb4a682a2e7b117fae32f446fad Mon Sep 17 00:00:00 2001
From: Kimberly Dill-McFarland <kdillmcfarland@gmail.com>
Date: Thu, 11 Sep 2025 16:46:22 -0700
Subject: [PATCH 08/10] clean star command

---
 Snakefile_step2        | 15 +++++++++++++--
 Snakefile_step2_single | 15 +++++++++++++--
 2 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/Snakefile_step2 b/Snakefile_step2
index dc3030e..42d89d3 100644
--- a/Snakefile_step2
+++ b/Snakefile_step2
@@ -117,8 +117,19 @@ rule STAR_align:
         release = config["release"]
     threads: max(1, int(config["threads"]*0.25))
     message: 'Aligning reads'
-    run:
-        shell("STAR --genomeDir 'ref/release{params.release}/STARindex' --readFilesIn {input.R1} {input.R2} --readFilesCommand zcat --outFileNamePrefix {params.OUT} --outSAMtype BAM SortedByCoordinate --runThreadN {threads} --runRNGseed 8756 --genomeLoad LoadAndKeep --limitBAMsortRAM 20000000000")
+    shell:
+        """
+        STAR \
+        --genomeDir ref/release{params.release}/STARindex \
+        --readFilesCommand zcat \
+        --readFilesIn {input.R1} {input.R2} \
+        --outFileNamePrefix {params.OUT} \
+        --outSAMtype BAM SortedByCoordinate \
+        --runThreadN {threads} \
+        --runRNGseed 8756 \
+        --genomeLoad LoadAndKeep \
+        --limitBAMsortRAM 20000000000
+        """
 
 rule STAR_remove:
     input:
diff --git a/Snakefile_step2_single b/Snakefile_step2_single
index 3149a2d..2542196 100644
--- a/Snakefile_step2_single
+++ b/Snakefile_step2_single
@@ -109,8 +109,19 @@ rule STAR_align:
         release = config["release"]
     threads: max(1, int(config["threads"]*0.25))
     message: 'Aligning reads'
-    run:
-        shell("STAR --genomeDir 'ref/release{params.release}/STARindex' --readFilesIn {input.R1} --readFilesCommand zcat --outFileNamePrefix {params.OUT} --outSAMtype BAM SortedByCoordinate --runThreadN {threads} --runRNGseed 8756 --genomeLoad LoadAndKeep --limitBAMsortRAM 20000000000")
+    shell:
+        """
+        STAR \
+        --genomeDir ref/release{params.release}/STARindex \
+        --readFilesCommand zcat \
+        --readFilesIn {input.R1} \
+        --outFileNamePrefix {params.OUT} \
+        --outSAMtype BAM SortedByCoordinate \
+        --runThreadN {threads} \
+        --runRNGseed 8756 \
+        --genomeLoad LoadAndKeep \
+        --limitBAMsortRAM 20000000000
+        """
 
 rule STAR_remove:
     input:

From d859e721df6066844999bf180c101c80b930a3f9 Mon Sep 17 00:00:00 2001
From: Kimberly Dill-McFarland <kdillmcfarland@gmail.com>
Date: Fri, 12 Sep 2025 16:17:33 -0700
Subject: [PATCH 09/10] fix cutadapt install issue on Mac

---
 environment/Hissss_env.yaml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/environment/Hissss_env.yaml b/environment/Hissss_env.yaml
index 3909331..98a6232 100644
--- a/environment/Hissss_env.yaml
+++ b/environment/Hissss_env.yaml
@@ -1,6 +1,7 @@
 channels:
   - bioconda
   - conda-forge
+  - defaults
 dependencies:
   - snakemake-minimal >=5.24.1
   - jinja2 >=2.11
@@ -12,10 +13,12 @@ dependencies:
   - bwa >=0.7
   - pysam >=0.15
   - fastqc >=0.11.9
-  - cutadapt >=5.1
   - bedtools >=2.30.0
   - picard >=2.27.1
   - star >=2.7.9a
   - subread >=2.0.1
   - python >=3.11
   - pandas >=1.6
+  - pip
+  - pip:
+    - cutadapt>=5.1

From f821d539dd67925a5349cab2fae668e56132ba75 Mon Sep 17 00:00:00 2001
From: Kimberly Dill-McFarland <kdillmcfarland@gmail.com>
Date: Fri, 12 Sep 2025 16:30:31 -0700
Subject: [PATCH 10/10] typo

---
 Snakefile_step2 | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Snakefile_step2 b/Snakefile_step2
index 42d89d3..0aaa604 100644
--- a/Snakefile_step2
+++ b/Snakefile_step2
@@ -44,7 +44,6 @@ rule adapterremoval:
         adapter2 = config["adapter2"]
     threads: max(1, int(config["threads"]*0.1))
     message: "Adapter removal & quality filtering"
-
     run:
         if params.trimAdapt == True:
             shell("cutadapt -g {params.adapter1} -a {params.adapter2} --cut {params.trim5p} --quality-cutoff 30 --max-n 1 --minimum-length 15 -o {output.R1} -p {output.R2} --cores {threads} {input.R1} {input.R2}")