From d2870755918163bec370fbb344a667ade735abf3 Mon Sep 17 00:00:00 2001 From: Jennifer Chang Date: Fri, 19 Sep 2025 12:14:47 -0700 Subject: [PATCH 1/6] Nextclade: Only use 'Clades' in Nextclade assignment The Mumps Nextclade dataset had been reporting both a "Clade" and a "MuV_genotype" column. This change only reports the "Clade" assignment as the second is redundant for the user. This change also allows us to drop the hard coded nextclade_extension specification in the auspice_config files since pathogen.json already has this information and having both would risk settings getting out of sync and confusion during debugging. --- nextclade/defaults/config.yaml | 4 +- nextclade/defaults/genome/auspice_config.json | 44 +------------------ nextclade/defaults/sh/auspice_config.json | 44 +------------------ 3 files changed, 6 insertions(+), 86 deletions(-) diff --git a/nextclade/defaults/config.yaml b/nextclade/defaults/config.yaml index c9ca633..84ad92b 100644 --- a/nextclade/defaults/config.yaml +++ b/nextclade/defaults/config.yaml @@ -40,8 +40,8 @@ ancestral: traits: sampling_bias_correction: 3 - sh: clade_membership MuV_genotype - genome: clade_membership MuV_genotype + sh: clade_membership + genome: clade_membership colors: # Note: Reusing files from the phylogenetic workflow diff --git a/nextclade/defaults/genome/auspice_config.json b/nextclade/defaults/genome/auspice_config.json index bfeb069..3a434bf 100644 --- a/nextclade/defaults/genome/auspice_config.json +++ b/nextclade/defaults/genome/auspice_config.json @@ -27,7 +27,7 @@ "type": "categorical" }, { - "key": "MuV_genotype", + "key": "clade_membership", "title": "MuV genotype", "type": "categorical" }, @@ -58,7 +58,6 @@ }, "filters": [ "clade_membership", - "MuV_genotype", "country", "region", "author" @@ -67,44 +66,5 @@ "strain", "isolate", "host" - ], - "extensions": { - "nextclade": { - "clade_node_attrs": [ - { - "name": "MuV_genotype", - "displayName": "MuV Genotype", - "description": "Mumps genotype" - } - ], - "pathogen": { - "schemaVersion":"3.0.0", - "attributes": { - "name": "Mumps genotype tree", - "reference name": "Jeryl-Lynn (genotype A)", - "reference accession": "D90232" - }, - "alignmentParams": { - "minLength": 80, - "penaltyGapExtend": 1, - "penaltyGapOpen": 4, - "penaltyGapOpenInFrame": 4, - "penaltyGapOpenOutOfFrame": 6, - "penaltyMismatch": 1, - "scoreMatch": 4, - "noTranslatePastStop": false, - "excessBandwidth": 9, - "terminalBandwidth": 80, - "allowedMismatches": 12, - "minMatchLength": 30, - "maxAlignmentAttempts": 5, - "includeReference": true, - "includeNearestNodeInfo": true, - "retryReverseComplement": true, - "minSeedCover": 0.01 - }, - "defaultCds": "SH" - } - } - } + ] } diff --git a/nextclade/defaults/sh/auspice_config.json b/nextclade/defaults/sh/auspice_config.json index bfeb069..3a434bf 100644 --- a/nextclade/defaults/sh/auspice_config.json +++ b/nextclade/defaults/sh/auspice_config.json @@ -27,7 +27,7 @@ "type": "categorical" }, { - "key": "MuV_genotype", + "key": "clade_membership", "title": "MuV genotype", "type": "categorical" }, @@ -58,7 +58,6 @@ }, "filters": [ "clade_membership", - "MuV_genotype", "country", "region", "author" @@ -67,44 +66,5 @@ "strain", "isolate", "host" - ], - "extensions": { - "nextclade": { - "clade_node_attrs": [ - { - "name": "MuV_genotype", - "displayName": "MuV Genotype", - "description": "Mumps genotype" - } - ], - "pathogen": { - "schemaVersion":"3.0.0", - "attributes": { - "name": "Mumps genotype tree", - "reference name": "Jeryl-Lynn (genotype A)", - "reference accession": "D90232" - }, - "alignmentParams": { - "minLength": 80, - "penaltyGapExtend": 1, - "penaltyGapOpen": 4, - "penaltyGapOpenInFrame": 4, - "penaltyGapOpenOutOfFrame": 6, - "penaltyMismatch": 1, - "scoreMatch": 4, - "noTranslatePastStop": false, - "excessBandwidth": 9, - "terminalBandwidth": 80, - "allowedMismatches": 12, - "minMatchLength": 30, - "maxAlignmentAttempts": 5, - "includeReference": true, - "includeNearestNodeInfo": true, - "retryReverseComplement": true, - "minSeedCover": 0.01 - }, - "defaultCds": "SH" - } - } - } + ] } From 0218182d566f91185f484e384cc52b6c83d17476 Mon Sep 17 00:00:00 2001 From: Jennifer Chang Date: Fri, 19 Sep 2025 12:24:29 -0700 Subject: [PATCH 2/6] Nextclade: config fixups Add documentation on where clade_membership is being spiked in or inferred from a fall back column. Clearly use clade_membership during the genome filtering step, even though MuV_genotype should have equivalent behavior. --- nextclade/defaults/config.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/nextclade/defaults/config.yaml b/nextclade/defaults/config.yaml index 84ad92b..ee14319 100644 --- a/nextclade/defaults/config.yaml +++ b/nextclade/defaults/config.yaml @@ -13,6 +13,8 @@ metadata_url: "https://data.nextstrain.org/files/workflows/mumps/metadata.tsv.zs strain_id_field: "accession" +# Merge in clade_membership column from {metadata} file +# If not defined, use {fallback} column clade_membership: metadata: "{build}/reference_strains.tsv" fallback: "MuV_genotype" @@ -21,7 +23,7 @@ filter: exclude: "{build}/exclude.txt" include: "{build}/include.txt" sh: '--exclude-all' - genome: --subsample-max-sequences 300 --min-date 1950 --group-by MuV_genotype --min-length 12000 --exclude-where clade_membership='' + genome: --subsample-max-sequences 300 --min-date 1950 --group-by clade_membership --min-length 12000 --exclude-where clade_membership='' align: sh: '--fill-gaps --remove-reference' From 603d383b5ec6cc784960708c0ac0bf0b8f158d8e Mon Sep 17 00:00:00 2001 From: Jennifer Chang Date: Fri, 19 Sep 2025 12:26:48 -0700 Subject: [PATCH 3/6] Nextclade: Update auspice titles --- nextclade/defaults/genome/auspice_config.json | 2 +- nextclade/defaults/sh/auspice_config.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/nextclade/defaults/genome/auspice_config.json b/nextclade/defaults/genome/auspice_config.json index 3a434bf..7ecb122 100644 --- a/nextclade/defaults/genome/auspice_config.json +++ b/nextclade/defaults/genome/auspice_config.json @@ -1,5 +1,5 @@ { - "title": "Real-time tracking of mumps virus evolution", + "title": "Mumps MuV Genotypes (Full genome)", "maintainers": [ {"name": "the Nextstrain team", "url": "https://nextstrain.org/team"} ], diff --git a/nextclade/defaults/sh/auspice_config.json b/nextclade/defaults/sh/auspice_config.json index 3a434bf..fce3089 100644 --- a/nextclade/defaults/sh/auspice_config.json +++ b/nextclade/defaults/sh/auspice_config.json @@ -1,5 +1,5 @@ { - "title": "Real-time tracking of mumps virus evolution", + "title": "Mumps MuV Genotypes (SH gene 315nt region)", "maintainers": [ {"name": "the Nextstrain team", "url": "https://nextstrain.org/team"} ], From 56096c8ddd5b5c2c13bc5c505403b759c20223b6 Mon Sep 17 00:00:00 2001 From: Jennifer Chang Date: Fri, 19 Sep 2025 13:35:56 -0700 Subject: [PATCH 4/6] Nextclade: Copy QC from genome to sh dataset --- nextclade/defaults/sh/pathogen.json | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/nextclade/defaults/sh/pathogen.json b/nextclade/defaults/sh/pathogen.json index 7a35513..c6cdd45 100644 --- a/nextclade/defaults/sh/pathogen.json +++ b/nextclade/defaults/sh/pathogen.json @@ -47,5 +47,26 @@ "version": { "tag": "unreleased" }, - "defaultCds": "SH" + "defaultCds": "SH", + "qc": { + "missingData": { + "enabled": true, + "missingDataThreshold": 2000, + "scoreBias": 500, + "scoreWeight": 50 + }, + "mixedSites": { + "enabled": true, + "mixedSitesThreshold": 15, + "scoreWeight": 50 + }, + "frameShifts": { + "enabled": true, + "scoreWeight": 20 + }, + "stopCodons": { + "enabled": true, + "scoreWeight": 50 + } + } } From f823220fd06f443047a7f9eaffa02edac61edca5 Mon Sep 17 00:00:00 2001 From: Jennifer Chang Date: Fri, 19 Sep 2025 13:36:56 -0700 Subject: [PATCH 5/6] Nextclade: Adjust QC threshold for sh region --- nextclade/defaults/sh/pathogen.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextclade/defaults/sh/pathogen.json b/nextclade/defaults/sh/pathogen.json index c6cdd45..7ebfc3d 100644 --- a/nextclade/defaults/sh/pathogen.json +++ b/nextclade/defaults/sh/pathogen.json @@ -51,7 +51,7 @@ "qc": { "missingData": { "enabled": true, - "missingDataThreshold": 2000, + "missingDataThreshold": 300, "scoreBias": 500, "scoreWeight": 50 }, From c213f1beb098bae703732d6a91cb2d4badb90cbc Mon Sep 17 00:00:00 2001 From: Jennifer Chang Date: Fri, 19 Sep 2025 13:38:15 -0700 Subject: [PATCH 6/6] Nextclade: Update SH QC docs --- nextclade/defaults/sh/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/nextclade/defaults/sh/README.md b/nextclade/defaults/sh/README.md index b36b786..9f7d6eb 100644 --- a/nextclade/defaults/sh/README.md +++ b/nextclade/defaults/sh/README.md @@ -27,6 +27,8 @@ This dataset supports: - Assignment of genotype - Phylogenetic placement +- Translation of annotated reading frames +- Quality metrics based on unexpected frameshifts, stop codons, and coverage. ## What are Nextclade datasets