Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 39 additions & 32 deletions Snakefile_step2
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ configfile: 'result/config.yaml'
## INPUTS / OUTPUTS
SUFFIX = ["R1_trim", "R2_trim"]
SAMPLES = config["SampleList"]

TRIM1 = expand('result/1_trim/{sample}_R1_trim.fastq.gz', sample=SAMPLES)
TRIM2 = expand('result/1_trim/{sample}_R2_trim.fastq.gz', sample=SAMPLES)
QC_trim_html = expand("result/qc/2_fastqc_trim/{sample}_{suf}_fastqc.html", sample=SAMPLES, suf=SUFFIX)
Expand All @@ -24,8 +24,8 @@ rule all:
*( ['result/5_combined/combined_picard.tsv'] if config["picard"] else [] ),
'log/benchmark/STAR_index.benchmark.txt',
'log/benchmark/STAR_load.benchmark.txt'


##----------------------------------------##
## 1. Adapter removal & quality filtering ##
##----------------------------------------##
Expand All @@ -36,23 +36,19 @@ rule adapterremoval:
R2 = lambda wildcards: config["SampleList"][wildcards.sample]["R2"]
output:
R1 = "result/1_trim/{sample}_R1_trim.fastq.gz",
R2 = "result/1_trim/{sample}_R2_trim.fastq.gz",
single = "result/1_trim/{sample}_singleton.truncated.gz",
discard = "result/1_trim/{sample}.settings",
set = "result/1_trim/{sample}_discarded.gz"
R2 = "result/1_trim/{sample}_R2_trim.fastq.gz"
params:
trim5p = config["trim5p"],
trimAdapt = config["trimAdapt"],
adapter1 = config["adapter1"],
adapter2 = config["adapter2"]
threads: config["threads"] * 0.1
threads: max(1, int(config["threads"]*0.1))
message: "Adapter removal & quality filtering"

run:
run:
if params.trimAdapt == True:
shell("AdapterRemoval --file1 {input.R1} --file2 {input.R2} --output1 {output.R1} --output2 {output.R2} --singleton {output.single} --discarded {output.discard} --settings {output.set} --threads {threads} --gzip --maxns 1 --minlength 15 --trimqualities --minquality 30 --trim5p {params.trim5p} --adapter1 {params.adapter1} --adapter2 {params.adapter2}")
shell("cutadapt -g {params.adapter1} -a {params.adapter2} --cut {params.trim5p} --quality-cutoff 30 --max-n 1 --minimum-length 15 -o {output.R1} -p {output.R2} --cores {threads} {input.R1} {input.R2}")
if params.trimAdapt == False:
shell("AdapterRemoval --file1 {input.R1} --file2 {input.R2} --output1 {output.R1} --output2 {output.R2} --singleton {output.single} --discarded {output.discard} --settings {output.set} --threads {threads} --gzip --maxns 1 --minlength 15 --trimqualities --minquality 30 --trim5p {params.trim5p}")
shell("cutadapt --cut {params.trim5p} --quality-cutoff 30 --max-n 1 --minimum-length 15 -o {output.R1} -p {output.R2} --cores {threads} {input.R1} {input.R2} ")
if params.trimAdapt != True and params.trimAdapt != False:
print("Error: Config trimAdapt parameter must be set to True or False.")

Expand All @@ -65,7 +61,7 @@ rule fastqc_trim:
output:
HTML = 'result/qc/2_fastqc_trim/{sample}_{suf}_fastqc.html',
ZIP = 'result/qc/2_fastqc_trim/{sample}_{suf}_fastqc.zip'
threads: config["threads"] * 0.1
threads: max(1, int(config["threads"]*0.1))
message: "Quality assessment, FastQC"
shell:
"""
Expand All @@ -80,54 +76,65 @@ STAR_index_SA = 'ref/release' + config["release"] + '/STARindex/SA'
if (not os.path.exists(STAR_index_SA)):
rule STAR_index:
input: expand('result/qc/2_fastqc_trim/{sample}_{suf}_fastqc.html', sample=SAMPLES, suf=SUFFIX)
output:
output:
IDX_done = touch('log/benchmark/STAR_index.benchmark.txt')
params:
release = config["release"],
genome = config["genome"]
threads: config["threads"] * 0.5
threads: max(1, int(config["threads"]*0.5))
message: "Generating genome index"
run:
shell("scripts/STAR_index.sh {params.release} {params.genome} {threads}")
else:
rule skip_index:
input: expand('result/qc/2_fastqc_trim/{sample}_{suf}_fastqc.html', sample=SAMPLES, suf=SUFFIX)
output:
output:
IDX_done = touch('log/benchmark/STAR_index.benchmark.txt')
message: "Genome index already present. Skipping indexing rule."

rule STAR_load:
input:
IDX_done = 'log/benchmark/STAR_index.benchmark.txt'
output:
output:
LOAD_done = touch('log/benchmark/STAR_load.benchmark.txt')
params:
release = config["release"]
threads: config["threads"] * 0.5
threads: max(1, int(config["threads"]*0.5))
message: 'Loading genome index'
run:
shell("STAR --genomeDir 'ref/release{params.release}/STARindex' --runThreadN {threads} --runRNGseed 8756 --genomeLoad LoadAndExit")

rule STAR_align:
input:
R1 = "result/1_trim/{sample}_R1_trim.fastq.gz",
R2 = "result/1_trim/{sample}_R2_trim.fastq.gz",
LOAD_done = 'log/benchmark/STAR_load.benchmark.txt'
output:
output:
BAM = 'result/2_bam/{sample}_Aligned.sortedByCoord.out.bam'
params:
OUT = 'result/2_bam/{sample}_',
release = config["release"]
threads: config["threads"] * 0.25
threads: max(1, int(config["threads"]*0.25))
message: 'Aligning reads'
run:
shell("STAR --genomeDir 'ref/release{params.release}/STARindex' --readFilesIn {input.R1} {input.R2} --readFilesCommand zcat --outFileNamePrefix {params.OUT} --outSAMtype BAM SortedByCoordinate --runThreadN {threads} --runRNGseed 8756 --genomeLoad LoadAndKeep --limitBAMsortRAM 20000000000")

shell:
"""
STAR \
--genomeDir ref/release{params.release}/STARindex \
--readFilesCommand zcat \
--readFilesIn {input.R1} {input.R2} \
--outFileNamePrefix {params.OUT} \
--outSAMtype BAM SortedByCoordinate \
--runThreadN {threads} \
--runRNGseed 8756 \
--genomeLoad LoadAndKeep \
--limitBAMsortRAM 20000000000
"""

rule STAR_remove:
input:
LOAD_done = 'log/benchmark/STAR_load.benchmark.txt',
BAM = expand('result/2_bam/{sample}_Aligned.sortedByCoord.out.bam', sample=SAMPLES)
output:
output:
REMOVE_done = touch('log/benchmark/STAR_remove.benchmark.txt')
params:
release = config["release"]
Expand All @@ -143,28 +150,28 @@ rule STAR_remove:
##----------------------------------------##

rule align_filter:
input:
input:
BAM = 'result/2_bam/{sample}_Aligned.sortedByCoord.out.bam',
REMOVE_done = 'log/benchmark/STAR_remove.benchmark.txt'
output:
output:
BAM2 = 'result/3_bam_filter/{sample}_Aligned.sortedByCoord.filter.bam',
BAI = 'result/3_bam_filter/{sample}_Aligned.sortedByCoord.filter.bam.bai'
threads: config["threads"] * 0.1
threads: max(1, int(config["threads"]*0.1))
message: "Filtering alignments"
shell:
"""
samtools view {input.BAM} -h -b -f 3 -F 1284 -q 30 -@ {threads} > {output.BAM2}
samtools index -@ {threads} {output.BAM2} > {output.BAI}
samtools index -@ {threads} {output.BAM2} > {output.BAI}
"""

##----------------------------------------##
## 5. ALIGNMENT QC ##
##----------------------------------------##

rule flagstat:
input: 'result/3_bam_filter/{sample}_Aligned.sortedByCoord.filter.bam'
output: 'result/qc/3_flagstat/{sample}_flagstat.tsv'
threads: config["threads"] * 0.1
threads: max(1, int(config["threads"]*0.1))
message: "Quality control, flagstat"
run:
shell("samtools flagstat -@ {threads} {input} > {output}")
Expand Down Expand Up @@ -197,7 +204,7 @@ rule fcount:
params:
release = config["release"],
genome = config["genome"]
threads: config["threads"] * 0.1
threads: max(1, int(config["threads"]*0.1))
message: "Counting features"
run:
shell("featureCounts -T {threads} -g gene_id -t exon -p -a 'ref/release{params.release}/STARref/{params.genome}.{params.release}.gtf' -o {output} {input.bam}")
Expand Down
61 changes: 35 additions & 26 deletions Snakefile_step2_single
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ configfile: 'result/config.yaml'

## INPUTS / OUTPUTS
SAMPLES = config["SampleList"]

TRIM1 = expand('result/1_trim/{sample}_R1_trim.fastq.gz', sample=SAMPLES)
QC_trim_html = expand("result/qc/2_fastqc_trim/{sample}_R1_trim_fastqc.html", sample=SAMPLES)
QC_trim_zip = expand("result/qc/2_fastqc_trim/{sample}_R1_trim_fastqc.zip", sample=SAMPLES)
Expand All @@ -21,7 +21,7 @@ rule all:
*( ['result/5_combined/combined_picard.tsv'] if config["picard"] else [] ),
'log/benchmark/STAR_index.benchmark.txt',
'log/benchmark/STAR_load.benchmark.txt'

##----------------------------------------##
## 1. Adapter removal & quality filtering ##
##----------------------------------------##
Expand All @@ -30,21 +30,19 @@ rule adapterremoval:
input:
R1 = lambda wildcards: config["SampleList"][wildcards.sample]["R1"]
output:
R1 = "result/1_trim/{sample}_R1_trim.fastq.gz",
discard = "result/1_trim/{sample}.settings",
set = "result/1_trim/{sample}_discarded.gz"
R1 = "result/1_trim/{sample}_R1_trim.fastq.gz"
params:
trim5p = config["trim5p"],
trimAdapt = config["trimAdapt"],
adapter1 = config["adapter1"]
threads: config["threads"] * 0.1
threads: max(1, int(config["threads"]*0.1))
message: "Adapter removal & quality filtering"

run:
run:
if params.trimAdapt == True:
shell("AdapterRemoval --file1 {input.R1} --output1 {output.R1} --discarded {output.discard} --settings {output.set} --threads {threads} --gzip --maxns 1 --minlength 15 --trimqualities --minquality 30 --trim5p {params.trim5p} --adapter1 {params.adapter1}")
shell("cutadapt -g {params.adapter1} --cut {params.trim5p} --quality-cutoff 30 --max-n 1 --minimum-length 15 -o {output.R1} --cores {threads} {input.R1}")
if params.trimAdapt == False:
shell("AdapterRemoval --file1 {input.R1} --output1 {output.R1} --discarded {output.discard} --settings {output.set} --threads {threads} --gzip --maxns 1 --minlength 15 --trimqualities --minquality 30 --trim5p {params.trim5p}")
shell("cutadapt --quality-cutoff 30 --max-n 1 --minimum-length 15 -o {output.R1} --cores {threads} {input.R1}")
if params.trimAdapt != True and params.trimAdapt != False:
print("Error: Config trimAdapt parameter must be set to True or False.")

Expand All @@ -57,7 +55,7 @@ rule fastqc_trim:
output:
HTML = 'result/qc/2_fastqc_trim/{sample}_R1_trim_fastqc.html',
ZIP = 'result/qc/2_fastqc_trim/{sample}_R1_trim_fastqc.zip'
threads: config["threads"] * 0.1
threads: max(1, int(config["threads"]*0.1))
message: "Quality assessment, FastQC"
shell:
"""
Expand All @@ -72,53 +70,64 @@ STAR_index_SA = 'ref/release' + config["release"] + '/STARindex/SA'
if (not os.path.exists(STAR_index_SA)):
rule STAR_index:
input: expand('result/qc/2_fastqc_trim/{sample}_R1_trim_fastqc.html', sample=SAMPLES)
output:
output:
IDX_done = touch('log/benchmark/STAR_index.benchmark.txt')
params:
release = config["release"],
genome = config["genome"]
threads: config["threads"] * 0.5
threads: max(1, int(config["threads"]*0.5))
message: "Generating genome index"
run:
shell("scripts/STAR_index.sh {params.release} {params.genome} {threads}")
else:
rule skip_index:
input: expand('result/qc/2_fastqc_trim/{sample}_R1_trim_fastqc.html', sample=SAMPLES)
output:
output:
IDX_done = touch('log/benchmark/STAR_index.benchmark.txt')
message: "Genome index already present. Skipping indexing rule."

rule STAR_load:
input:
IDX_done = 'log/benchmark/STAR_index.benchmark.txt'
output:
output:
LOAD_done = touch('log/benchmark/STAR_load.benchmark.txt')
params:
release = config["release"]
threads: config["threads"] * 0.5
threads: max(1, int(config["threads"]*0.5))
message: 'Loading genome index'
run:
shell("STAR --genomeDir 'ref/release{params.release}/STARindex' --runThreadN {threads} --runRNGseed 8756 --genomeLoad LoadAndExit")

rule STAR_align:
input:
R1 = "result/1_trim/{sample}_R1_trim.fastq.gz",
LOAD_done = 'log/benchmark/STAR_load.benchmark.txt'
output:
output:
BAM = 'result/2_bam/{sample}_Aligned.sortedByCoord.out.bam'
params:
OUT = 'result/2_bam/{sample}_',
release = config["release"]
threads: config["threads"] * 0.25
threads: max(1, int(config["threads"]*0.25))
message: 'Aligning reads'
run:
shell("STAR --genomeDir 'ref/release{params.release}/STARindex' --readFilesIn {input.R1} --readFilesCommand zcat --outFileNamePrefix {params.OUT} --outSAMtype BAM SortedByCoordinate --runThreadN {threads} --runRNGseed 8756 --genomeLoad LoadAndKeep --limitBAMsortRAM 20000000000")

shell:
"""
STAR \
--genomeDir ref/release{params.release}/STARindex \
--readFilesCommand zcat \
--readFilesIn {input.R1} \
--outFileNamePrefix {params.OUT} \
--outSAMtype BAM SortedByCoordinate \
--runThreadN {threads} \
--runRNGseed 8756 \
--genomeLoad LoadAndKeep \
--limitBAMsortRAM 20000000000
"""

rule STAR_remove:
input:
LOAD_done = 'log/benchmark/STAR_load.benchmark.txt',
BAM = expand('result/2_bam/{sample}_Aligned.sortedByCoord.out.bam', sample=SAMPLES)
output:
output:
REMOVE_done = touch('log/benchmark/STAR_remove.benchmark.txt')
params:
release = config["release"]
Expand All @@ -134,11 +143,11 @@ rule STAR_remove:
##----------------------------------------##

rule align_filter:
input:
input:
BAM = 'result/2_bam/{sample}_Aligned.sortedByCoord.out.bam',
REMOVE_done = 'log/benchmark/STAR_remove.benchmark.txt'
output: 'result/3_bam_filter/{sample}_Aligned.sortedByCoord.filter.bam'
threads: config["threads"] * 0.1
threads: max(1, int(config["threads"]*0.1))
message: "Filtering alignments"
run:
shell("samtools view {input.BAM} -b -h -F 1284 -q 30 -@ {threads} > {output}")
Expand All @@ -151,7 +160,7 @@ rule align_filter:
rule flagstat:
input: 'result/3_bam_filter/{sample}_Aligned.sortedByCoord.filter.bam'
output: 'result/qc/3_flagstat/{sample}_flagstat.tsv'
threads: config["threads"] * 0.1
threads: max(1, int(config["threads"]*0.1))
message: "Quality control, flagstat"
run:
shell("samtools flagstat -@ {threads} {input} > {output}")
Expand Down Expand Up @@ -184,7 +193,7 @@ rule fcount:
params:
release = config["release"],
genome = config["genome"]
threads: config["threads"] * 0.1
threads: max(1, int(config["threads"]*0.1))
message: "Counting features"
run:
shell("featureCounts -T {threads} -g gene_id -t exon -p -a 'ref/release{params.release}/STARref/{params.genome}.{params.release}.gtf' -o {output} {input.bam}")
Expand Down
7 changes: 6 additions & 1 deletion environment/Hissss_env.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
channels:
- bioconda
- conda-forge
- defaults
dependencies:
- snakemake-minimal >=5.24.1
- jinja2 >=2.11
Expand All @@ -12,8 +13,12 @@ dependencies:
- bwa >=0.7
- pysam >=0.15
- fastqc >=0.11.9
- adapterremoval >=2.3.2
- bedtools >=2.30.0
- picard >=2.27.1
- star >=2.7.9a
- subread >=2.0.1
- python >=3.11
- pandas >=1.6
- pip
- pip:
- cutadapt>=5.1
Loading