replikation · fischer-hub · Jul 16, 2025 · Jul 21, 2025 · Jul 22, 2025 · Jul 22, 2025
diff --git a/bin/convert_VCF_info_fields.py b/bin/convert_VCF_info_fields.py
diff --git a/bin/patch_legacy_bed.py b/bin/patch_legacy_bed.py
@@ -0,0 +1,31 @@
+#!/usr/bin/env python3
+
+import sys
+from Bio import SeqIO
+import pandas as pd
+
+reference_file = sys.argv[1]
+bed_file = sys.argv[2]
+patched_bed_file_name = f"{bed_file.replace('.bed', '.patched')}.bed"
+
+print(f"Reading input files REF: '{reference_file}', BED: '{bed_file}' ...")
+
+ref_seq = next(SeqIO.parse(reference_file, "fasta"))
+bed_df = pd.read_csv(bed_file, sep="\t", names = ['chrom', 'chromStart', 'chromEnd', 'primer-name', 'pool', 'strand'])
+
+if len(bed_df.columns) > 6:
+    print('BED file is not in deprecated format and does not have to be patched for artic, how did you end up in this script?')
+    quit()
+
+print('Patching BED file primer sequences ...')
+
+bed_df['primer-sequence'] = bed_df.apply(
+    lambda row: str(ref_seq.seq[row['chromStart']:row['chromEnd']]) if row['strand'] == "+" else str(ref_seq.seq[row['chromStart']:row['chromEnd']].reverse_complement()),
+    axis=1
+)
+
+print(f"Writing patched BED file '{patched_bed_file_name}' ...")
+
+bed_df.to_csv(patched_bed_file_name, sep = '\t', header = False, index = False)
+
+print(f"Done.")
diff --git a/configs/container.config b/configs/container.config
@@ -2,9 +2,9 @@ process {
     // DONT add GUPPY containers here !!! they are maintained via process
     // pangolin container is maintained via params.defaultpangolin in nextflow.config
     // nextclade container is maintained via params.defaultpangolin in nextflow.config
-    withLabel:  artic       { container = 'nanozoo/artic:1.3.0-dev--784493e' }
+    withLabel:  artic       { container = 'community.wave.seqera.io/library/artic:1.8.4--1f7aeb3df7a7d5e1' }
     withLabel:  bwa         { container = 'nanozoo/bwa:0.7.17--d11c0a4' }
-    withLabel:  covarplot   { container = 'nanozoo/covarplot:0.0.2--2c6e300' }
+    withLabel:  covarplot   { container = 'nanozoo/covarplot:0.0.3--f7d5d8e' }
     withLabel:  demultiplex { container = 'nanozoo/guppy_cpu:5.0.7-1--47b84be' }
     withLabel:  fastcov     { container = 'raverjay/fastcov:0.1.3--ba8c8cf6ae19' }
     withLabel:  freyja      { container = 'staphb/freyja:1.5.0-03_27_2024-00-48-2024-03-27' }

diff --git a/data/external_primer_schemes/aliases.json b/data/external_primer_schemes/aliases.json
@@ -0,0 +1 @@
+{"artic-chikungunya-virus": "artic-chikv-ecsa", "artic-yellow-fever": "artic-yfv-west-east-africa", "hepatitis-a-virus": "hav", "hepatitis-b-virus": "hbv", "hepatitis-e-virus": "hev", "ncov-2019": "artic-sars-cov-2", "sars-cov-2": "artic-sars-cov-2"}
diff --git a/data/external_primer_schemes/index.json b/data/external_primer_schemes/index.json
diff --git a/modules/add_alt_allele_ratio_vcf.nf → modules/count_mixed_sites.nf b/modules/add_alt_allele_ratio_vcf.nf → modules/count_mixed_sites.nf
@@ -1,4 +1,4 @@
-process add_alt_allele_ratio_vcf {
+process count_mixed_sites {
     label 'artic'
     publishDir "${params.output}/${params.lineagedir}/${name}/", mode: 'copy'
     input:
@@ -13,25 +13,14 @@ process add_alt_allele_ratio_vcf {
     """
     # reheader failed VCF and change FILTER
     echo '##FILTER=<ID=ARTICFAIL,Description="ARTIC filter failed">' > add-to-hdr.txt
-    # -c and --rename-annots add a header with a default/wrong description
     bcftools annotate -h add-to-hdr.txt -c "FILTER/ARTICFAIL:=FILTER/PASS" ${failed_vcf} | sed '/##FILTER=<ID=ARTICFAIL,Description="All filters passed">/d' > tmp_failed_updated-filter.vcf
 
     # concat failed and passed VCF
-    bcftools concat ${vcf} tmp_failed_updated-filter.vcf | bcftools sort -o tmp_merged.vcf
-
-    # call medaka tools annotate for each pool and add the alternate allele ratio
-    for pool in `cut -f5 ${primer_dir}/${primer_version_tag}/nCoV-2019.scheme.bed | sort | uniq`; do
-        bcftools view -i 'INFO/Pool="'\$pool'"' tmp_merged.vcf -o tmp_\$pool.vcf
-        medaka tools annotate --dpsp --pad 25 --RG \$pool tmp_\$pool.vcf ${primer_dir}/${primer_version_tag}/nCoV-2019.reference.fasta ${bam} tmp_annotated_\$pool.vcf
-        convert_VCF_info_fields.py tmp_annotated_\$pool.vcf tmp_aar_\$pool.vcf
-    done
-
-    # merge pool VCF and sort VCF
-    bcftools concat tmp_aar_*.vcf | bcftools sort -o ${name}_all-vars-with-aar.vcf
+    bcftools concat ${vcf} tmp_failed_updated-filter.vcf | bcftools sort -o ${name}_all-vars-with-aar.vcf
 
     # count mixed sites
     # thresholds for human geotyping: 0.35 <= x <= 0.65 
-    NUM_MIXED_SITES=\$(bcftools view -H -i 'INFO/DP>${params.min_depth} & INFO/AF>=0.3 & INFO/AF<=0.8' ${name}_all-vars-with-aar.vcf | wc -l)
+    NUM_MIXED_SITES=\$(bcftools view -H -i 'FORMAT/DP>${params.min_depth} & FORMAT/AF>=0.3 & FORMAT/AF<=0.8' ${name}_all-vars-with-aar.vcf | wc -l)
     echo sample,num_mixed_sites > mixed_sites_stats.csv
     echo ${name},\$NUM_MIXED_SITES >> mixed_sites_stats.csv
 

diff --git a/modules/plot_coverages.nf b/modules/plot_coverages.nf
@@ -9,8 +9,17 @@ process plot_coverages {
 	    path("coverages_*.png")
 	script:
 	"""
-    fastcov.py -l -o coverages_\$(echo ${alignment_files} | tr ' ' '_').png ${alignment_files}
-	fastcov.py -l -p NC_045512.2:21563-25385 -o coverages_spike_\$(echo ${alignment_files} | tr ' ' '_').png ${alignment_files}
+	output_name=coverages_\$(echo ${alignment_files} | tr ' ' '_').png
+	output_name_no_ext=\${output_name//.bam/}
+	output_name=\$output_name_no_ext
+
+	if [ \${#output_name} -gt 255 ]; then
+		echo "Output file name exceeds filename character limit"
+		output_name="\${output_name:0:240}.png"
+	fi
+
+    fastcov.py -l -o \$output_name ${alignment_files}
+	fastcov.py -l -p NC_045512.2:21563-25385 -o \$output_name ${alignment_files}
 	"""
 	stub:
 	"""

diff --git a/nextflow.config b/nextflow.config
@@ -40,8 +40,9 @@ params {
     krakendb = ''
     lcs = false
     localguppy = false
-    medaka_model = 'r941_min_hac_g507'
-    nanopolish = ''
+    clair3_model_name = 'r1041_e82_400bps_sup_v410'
+    // --model-dir defaults to CONDA_ROOT_PREFIX but somehow this is not set in seqera containers and instead points to MAMBA_ROOT... 
+    clair3_model_dir = '/opt/conda/bin/models/'
     one_end = false
     single = false
     update = false
@@ -57,7 +58,9 @@ params {
     scorpio = false
 
     // parameters
-    primerV = 'V3'
+    primerV = 'v1.0.0'
+    primerRef = ''
+    schemeLength = 400
     minLength = false
     maxLength = false
     rapid = false
@@ -75,6 +78,10 @@ params {
     seqrepair = "5.Genome-primer-repair"
     jsondir = "6.json-summaries"
     runinfodir = "X.Pipeline-runinfo"
+
+    // deprecated params
+    nanopolish = false
+    medaka_model = false
 }
 
 // runinfo
@@ -99,11 +106,11 @@ includeConfig ({
 profiles {
 
     test_fastq {
-        params.primerV = 'V1200'
+        params.primerV = 'v1.0.0'
     }
 
     stub {
-        params.primerV = 'V1200'
+        params.primerV = 'v1.0.0'
     }
 
     test_fasta {
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"artic-chikungunya-virus": "artic-chikv-ecsa", "artic-yellow-fever": "artic-yfv-west-east-africa", "hepatitis-a-virus": "hav", "hepatitis-b-virus": "hbv", "hepatitis-e-virus": "hev", "ncov-2019": "artic-sars-cov-2", "sars-cov-2": "artic-sars-cov-2"}