Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
192 changes: 0 additions & 192 deletions bin/convert_VCF_info_fields.py

This file was deleted.

31 changes: 31 additions & 0 deletions bin/patch_legacy_bed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/usr/bin/env python3

import sys
from Bio import SeqIO
import pandas as pd

reference_file = sys.argv[1]
bed_file = sys.argv[2]
patched_bed_file_name = f"{bed_file.replace('.bed', '.patched')}.bed"

print(f"Reading input files REF: '{reference_file}', BED: '{bed_file}' ...")

ref_seq = next(SeqIO.parse(reference_file, "fasta"))
bed_df = pd.read_csv(bed_file, sep="\t", names = ['chrom', 'chromStart', 'chromEnd', 'primer-name', 'pool', 'strand'])

if len(bed_df.columns) > 6:
print('BED file is not in deprecated format and does not have to be patched for artic, how did you end up in this script?')
quit()

print('Patching BED file primer sequences ...')

bed_df['primer-sequence'] = bed_df.apply(
lambda row: str(ref_seq.seq[row['chromStart']:row['chromEnd']]) if row['strand'] == "+" else str(ref_seq.seq[row['chromStart']:row['chromEnd']].reverse_complement()),
axis=1
)

print(f"Writing patched BED file '{patched_bed_file_name}' ...")

bed_df.to_csv(patched_bed_file_name, sep = '\t', header = False, index = False)

print(f"Done.")
4 changes: 2 additions & 2 deletions configs/container.config
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ process {
// DONT add GUPPY containers here !!! they are maintained via process
// pangolin container is maintained via params.defaultpangolin in nextflow.config
// nextclade container is maintained via params.defaultpangolin in nextflow.config
withLabel: artic { container = 'nanozoo/artic:1.3.0-dev--784493e' }
withLabel: artic { container = 'community.wave.seqera.io/library/artic:1.8.4--1f7aeb3df7a7d5e1' }
withLabel: bwa { container = 'nanozoo/bwa:0.7.17--d11c0a4' }
withLabel: covarplot { container = 'nanozoo/covarplot:0.0.2--2c6e300' }
withLabel: covarplot { container = 'nanozoo/covarplot:0.0.3--f7d5d8e' }
withLabel: demultiplex { container = 'nanozoo/guppy_cpu:5.0.7-1--47b84be' }
withLabel: fastcov { container = 'raverjay/fastcov:0.1.3--ba8c8cf6ae19' }
withLabel: freyja { container = 'staphb/freyja:1.5.0-03_27_2024-00-48-2024-03-27' }
Expand Down
1 change: 1 addition & 0 deletions data/external_primer_schemes/aliases.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"artic-chikungunya-virus": "artic-chikv-ecsa", "artic-yellow-fever": "artic-yfv-west-east-africa", "hepatitis-a-virus": "hav", "hepatitis-b-virus": "hbv", "hepatitis-e-virus": "hev", "ncov-2019": "artic-sars-cov-2", "sars-cov-2": "artic-sars-cov-2"}
1 change: 1 addition & 0 deletions data/external_primer_schemes/index.json

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
process add_alt_allele_ratio_vcf {
process count_mixed_sites {
label 'artic'
publishDir "${params.output}/${params.lineagedir}/${name}/", mode: 'copy'
input:
Expand All @@ -13,25 +13,14 @@ process add_alt_allele_ratio_vcf {
"""
# reheader failed VCF and change FILTER
echo '##FILTER=<ID=ARTICFAIL,Description="ARTIC filter failed">' > add-to-hdr.txt
# -c and --rename-annots add a header with a default/wrong description
bcftools annotate -h add-to-hdr.txt -c "FILTER/ARTICFAIL:=FILTER/PASS" ${failed_vcf} | sed '/##FILTER=<ID=ARTICFAIL,Description="All filters passed">/d' > tmp_failed_updated-filter.vcf

# concat failed and passed VCF
bcftools concat ${vcf} tmp_failed_updated-filter.vcf | bcftools sort -o tmp_merged.vcf

# call medaka tools annotate for each pool and add the alternate allele ratio
for pool in `cut -f5 ${primer_dir}/${primer_version_tag}/nCoV-2019.scheme.bed | sort | uniq`; do
bcftools view -i 'INFO/Pool="'\$pool'"' tmp_merged.vcf -o tmp_\$pool.vcf
medaka tools annotate --dpsp --pad 25 --RG \$pool tmp_\$pool.vcf ${primer_dir}/${primer_version_tag}/nCoV-2019.reference.fasta ${bam} tmp_annotated_\$pool.vcf
convert_VCF_info_fields.py tmp_annotated_\$pool.vcf tmp_aar_\$pool.vcf
done

# merge pool VCF and sort VCF
bcftools concat tmp_aar_*.vcf | bcftools sort -o ${name}_all-vars-with-aar.vcf
bcftools concat ${vcf} tmp_failed_updated-filter.vcf | bcftools sort -o ${name}_all-vars-with-aar.vcf

# count mixed sites
# thresholds for human geotyping: 0.35 <= x <= 0.65
NUM_MIXED_SITES=\$(bcftools view -H -i 'INFO/DP>${params.min_depth} & INFO/AF>=0.3 & INFO/AF<=0.8' ${name}_all-vars-with-aar.vcf | wc -l)
NUM_MIXED_SITES=\$(bcftools view -H -i 'FORMAT/DP>${params.min_depth} & FORMAT/AF>=0.3 & FORMAT/AF<=0.8' ${name}_all-vars-with-aar.vcf | wc -l)
echo sample,num_mixed_sites > mixed_sites_stats.csv
echo ${name},\$NUM_MIXED_SITES >> mixed_sites_stats.csv

Expand Down
13 changes: 11 additions & 2 deletions modules/plot_coverages.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,17 @@ process plot_coverages {
path("coverages_*.png")
script:
"""
fastcov.py -l -o coverages_\$(echo ${alignment_files} | tr ' ' '_').png ${alignment_files}
fastcov.py -l -p NC_045512.2:21563-25385 -o coverages_spike_\$(echo ${alignment_files} | tr ' ' '_').png ${alignment_files}
output_name=coverages_\$(echo ${alignment_files} | tr ' ' '_').png
output_name_no_ext=\${output_name//.bam/}
output_name=\$output_name_no_ext

if [ \${#output_name} -gt 255 ]; then
echo "Output file name exceeds filename character limit"
output_name="\${output_name:0:240}.png"
fi

fastcov.py -l -o \$output_name ${alignment_files}
fastcov.py -l -p NC_045512.2:21563-25385 -o \$output_name ${alignment_files}
"""
stub:
"""
Expand Down
17 changes: 12 additions & 5 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,9 @@ params {
krakendb = ''
lcs = false
localguppy = false
medaka_model = 'r941_min_hac_g507'
nanopolish = ''
clair3_model_name = 'r1041_e82_400bps_sup_v410'
// --model-dir defaults to CONDA_ROOT_PREFIX but somehow this is not set in seqera containers and instead points to MAMBA_ROOT...
clair3_model_dir = '/opt/conda/bin/models/'
one_end = false
single = false
update = false
Expand All @@ -57,7 +58,9 @@ params {
scorpio = false

// parameters
primerV = 'V3'
primerV = 'v1.0.0'
primerRef = ''
schemeLength = 400
minLength = false
maxLength = false
rapid = false
Expand All @@ -75,6 +78,10 @@ params {
seqrepair = "5.Genome-primer-repair"
jsondir = "6.json-summaries"
runinfodir = "X.Pipeline-runinfo"

// deprecated params
nanopolish = false
medaka_model = false
}

// runinfo
Expand All @@ -99,11 +106,11 @@ includeConfig ({
profiles {

test_fastq {
params.primerV = 'V1200'
params.primerV = 'v1.0.0'
}

stub {
params.primerV = 'V1200'
params.primerV = 'v1.0.0'
}

test_fasta {
Expand Down
Loading