From 26efb189383a035fcf8ccdecef7f91d06d6f75c6 Mon Sep 17 00:00:00 2001
From: edsu7 <22638361+edsu7@users.noreply.github.com>
Date: Tue, 9 May 2023 18:01:50 -0400
Subject: [PATCH 1/4] first commit
---
.../payload/germlinevariant/main.nf | 43 +++
.../payload/germlinevariant/meta.yml | 60 ++++
.../germlinevariant/resources/usr/bin/main.py | 314 ++++++++++++++++++
tests/config/nextflow.config | 14 +-
tests/config/test_data.config | 52 ++-
...-c24f-493b-b8d3-46c24f393b92.analysis.json | 117 +++++++
...af8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz | Bin 0 -> 751 bytes
...346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi | Bin 0 -> 105 bytes
.../data/qa/deepvariant/collated_versions.yml | 14 +
...-c24f-493b-b8d3-46c24f393b92.analysis.json | 117 +++++++
...af8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz | Bin 0 -> 3315 bytes
...346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi | Bin 0 -> 106 bytes
tests/data/qa/freebayes/collated_versions.yml | 24 ++
...-c24f-493b-b8d3-46c24f393b92.analysis.json | 117 +++++++
...c24f393b92.haplotypecaller.filtered.vcf.gz | Bin 0 -> 5034 bytes
...393b92.haplotypecaller.filtered.vcf.gz.tbi | Bin 0 -> 105 bytes
.../qa/haplotypecaller/collated_versions.yml | 22 ++
...-c24f-493b-b8d3-46c24f393b92.analysis.json | 117 +++++++
...af8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz | Bin 0 -> 1627 bytes
...346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi | Bin 0 -> 77 bytes
tests/data/qa/manta/collated_versions.yml | 14 +
...-c24f-493b-b8d3-46c24f393b92.analysis.json | 117 +++++++
...af8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz | Bin 0 -> 2162 bytes
...346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi | Bin 0 -> 105 bytes
tests/data/qa/strelka/collated_versions.yml | 18 +
...-c24f-493b-b8d3-46c24f393b92.analysis.json | 117 +++++++
...af8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz | Bin 0 -> 1172 bytes
...346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi | Bin 0 -> 72 bytes
tests/data/qa/tiddit/collated_versions.yml | 14 +
.../payload/germlinevariant/main.nf | 29 ++
.../payload/germlinevariant/nextflow.config | 6 +
.../payload/germlinevariant/test.yml | 55 +++
32 files changed, 1379 insertions(+), 2 deletions(-)
create mode 100644 modules/icgc-argo-workflows/payload/germlinevariant/main.nf
create mode 100644 modules/icgc-argo-workflows/payload/germlinevariant/meta.yml
create mode 100755 modules/icgc-argo-workflows/payload/germlinevariant/resources/usr/bin/main.py
create mode 100644 tests/data/qa/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json
create mode 100644 tests/data/qa/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz
create mode 100644 tests/data/qa/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi
create mode 100644 tests/data/qa/deepvariant/collated_versions.yml
create mode 100644 tests/data/qa/freebayes/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json
create mode 100644 tests/data/qa/freebayes/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz
create mode 100644 tests/data/qa/freebayes/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi
create mode 100644 tests/data/qa/freebayes/collated_versions.yml
create mode 100644 tests/data/qa/haplotypecaller/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json
create mode 100644 tests/data/qa/haplotypecaller/aaf8d346-c24f-493b-b8d3-46c24f393b92.haplotypecaller.filtered.vcf.gz
create mode 100644 tests/data/qa/haplotypecaller/aaf8d346-c24f-493b-b8d3-46c24f393b92.haplotypecaller.filtered.vcf.gz.tbi
create mode 100644 tests/data/qa/haplotypecaller/collated_versions.yml
create mode 100644 tests/data/qa/manta/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json
create mode 100644 tests/data/qa/manta/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz
create mode 100644 tests/data/qa/manta/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi
create mode 100644 tests/data/qa/manta/collated_versions.yml
create mode 100644 tests/data/qa/strelka/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json
create mode 100644 tests/data/qa/strelka/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz
create mode 100644 tests/data/qa/strelka/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi
create mode 100644 tests/data/qa/strelka/collated_versions.yml
create mode 100644 tests/data/qa/tiddit/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json
create mode 100644 tests/data/qa/tiddit/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz
create mode 100644 tests/data/qa/tiddit/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi
create mode 100644 tests/data/qa/tiddit/collated_versions.yml
create mode 100644 tests/modules/icgc-argo-workflows/payload/germlinevariant/main.nf
create mode 100644 tests/modules/icgc-argo-workflows/payload/germlinevariant/nextflow.config
create mode 100644 tests/modules/icgc-argo-workflows/payload/germlinevariant/test.yml
diff --git a/modules/icgc-argo-workflows/payload/germlinevariant/main.nf b/modules/icgc-argo-workflows/payload/germlinevariant/main.nf
new file mode 100644
index 0000000..6e127c8
--- /dev/null
+++ b/modules/icgc-argo-workflows/payload/germlinevariant/main.nf
@@ -0,0 +1,43 @@
+process PAYLOAD_GERMLINEVARIANT {
+ tag "$meta.id"
+ label 'process_single'
+
+
+ conda "bioconda::multiqc=1.13"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' :
+ 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }"
+
+ input: // input, make update as needed
+ tuple val(meta), path(files_to_upload), path(metadata_analysis)
+ val genome_annotation
+ val genome_build
+ path pipeline_yml
+ val tool
+
+
+ output: // output, make update as needed
+ tuple val(meta), path("*.payload.json"), path("out/*{vcf.gz,vcf.gz.tbi}"), emit: payload_files
+ path "versions.yml", emit: versions
+
+ script:
+ // add and initialize variables here as needed
+ def arg_pipeline_yml = pipeline_yml.name != 'NO_FILE' ? "-p $pipeline_yml" : ''
+ """
+ main.py \
+ -f ${files_to_upload} \
+ -a ${metadata_analysis} \
+ -g "${genome_annotation}" \
+ -b "${genome_build}" \
+ -w "DNA Seq Germline Workflow" \
+ -s "${workflow.sessionId}" \
+ -v "${workflow.manifest.version}" \
+ -t "${tool}" \
+ $arg_pipeline_yml
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ python: \$(python --version | sed 's/Python //g')
+ END_VERSIONS
+ """
+ }
diff --git a/modules/icgc-argo-workflows/payload/germlinevariant/meta.yml b/modules/icgc-argo-workflows/payload/germlinevariant/meta.yml
new file mode 100644
index 0000000..105e61b
--- /dev/null
+++ b/modules/icgc-argo-workflows/payload/germlinevariant/meta.yml
@@ -0,0 +1,60 @@
+name: "payload_germlinevariant"
+## TODO nf-core: Add a description of the module and list keywords
+description: write your description here
+keywords:
+ - sort
+tools:
+ - "payload_germlinevariant":
+ description: "A simple wrapper written in `nextflow` for the payload generation tool to generate ARGO Song payloads containing QC metrics files."
+ licence: ["MIT"]
+
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+
+ - files_to_upload:
+ type: file
+ description: QC metrics files
+
+ - metadata_analysis:
+ type: file
+ description: Song metadata in JSON format
+
+ - genome_annotation:
+ type: string
+ description: genome annotation name
+
+ - genome_build:
+ type: string
+ description: genome build name
+
+ - path pipeline_yml:
+ type: file
+ description: yml file collect from CUSTOM_DUMPSOFTWAREVERSIONS
+ - tool:
+ type: string
+ description: name of tool
+
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+
+ - payload_files:
+ type: file
+ description: Generated payload and QC files with normalized names
+
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+
+
+
+authors:
+ - "@edsu7"
diff --git a/modules/icgc-argo-workflows/payload/germlinevariant/resources/usr/bin/main.py b/modules/icgc-argo-workflows/payload/germlinevariant/resources/usr/bin/main.py
new file mode 100755
index 0000000..a33e5c4
--- /dev/null
+++ b/modules/icgc-argo-workflows/payload/germlinevariant/resources/usr/bin/main.py
@@ -0,0 +1,314 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""
+ Copyright (C) 2021, Ontario Institute for Cancer Research
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see .
+
+ Authors:
+ Edmund Su
+ Linda Xiang
+"""
+
+import os
+import sys
+import argparse
+import subprocess
+import json
+import re
+import hashlib
+import uuid
+import tarfile
+from datetime import date
+import copy
+from glob import glob
+import yaml
+import csv
+import io
+import shutil
+#LUCA-KR.DO231106.SA602282.wxs.20210112.gatk-mutect2.somatic.snv.open-filter.vcf.gz
+workflow_process_map = {
+ 'DNA Seq Germline Workflow': 'snv'
+}
+
+tool_list = ['strelka']
+
+def calculate_size(file_path):
+ return os.stat(file_path).st_size
+
+
+def calculate_md5(file_path):
+ md5 = hashlib.md5()
+ with open(file_path, 'rb') as f:
+ for chunk in iter(lambda: f.read(1024 * 1024), b''):
+ md5.update(chunk)
+ return md5.hexdigest()
+
+def get_files_info(file_to_upload, date_str, analysis_dict, process_indicator,tool,new_dir):
+ file_info = {
+ 'fileSize': calculate_size(file_to_upload),
+ 'fileMd5sum': calculate_md5(file_to_upload),
+ 'fileAccess': 'controlled',
+ 'info': {
+ 'data_category': "Simple Nucleotide Variation",
+ }
+ }
+ ### deepvariant
+ if tool=="deepvariant":
+ if re.match(r'.*.vcf.gz$', file_to_upload):
+ file_type = 'VCF'
+ file_info.update({'dataType': 'Raw SNV Calls'})
+ file_info['info'].update({'analysis_tools': ['DeepVariant']})
+ elif re.match(r'.*.vcf.gz.tbi$', file_to_upload):
+ file_type = 'TBI'
+ file_info.update({'dataType': 'VCF Index'})
+ file_info['info'].update({'analysis_tools': ['DeepVariant']})
+ else:
+ sys.exit('Error: unknown QC metrics file: %s' % file_to_upload)
+ elif tool=="strelka":
+ if re.match(r'.*.vcf.gz$', file_to_upload):
+ file_type = 'VCF'
+ file_info.update({'dataType': 'Raw SNV Calls'})
+ file_info['info'].update({'analysis_tools': ['Strelka']})
+ elif re.match(r'.*.vcf.gz.tbi$', file_to_upload):
+ file_type = 'TBI'
+ file_info.update({'dataType': 'VCF Index'})
+ file_info['info'].update({'analysis_tools': ['Strelka']})
+ else:
+ sys.exit('Error: unknown QC metrics file: %s' % file_to_upload)
+ elif tool=="tiddit":
+ if re.match(r'.*.vcf.gz$', file_to_upload):
+ file_type = 'VCF'
+ file_info.update({'dataType': 'Raw SNV Calls'})
+ file_info['info'].update({'analysis_tools': ['Tiddit']})
+ elif re.match(r'.*.vcf.gz.tbi$', file_to_upload):
+ file_type = 'TBI'
+ file_info.update({'dataType': 'VCF Index'})
+ file_info['info'].update({'analysis_tools': ['Tiddit']})
+ else:
+ sys.exit('Error: unknown QC metrics file: %s' % file_to_upload)
+ elif tool=="haplotypecaller":
+ if re.match(r'.*.vcf.gz$', file_to_upload):
+ file_type = 'VCF'
+ file_info.update({'dataType': 'Raw SNV Calls'})
+ file_info['info'].update({'analysis_tools': ['haplotypecaller']})
+ elif re.match(r'.*.vcf.gz.tbi$', file_to_upload):
+ file_type = 'TBI'
+ file_info.update({'dataType': 'VCF Index'})
+ file_info['info'].update({'analysis_tools': ['haplotypecaller']})
+ else:
+ sys.exit('Error: unknown QC metrics file: %s' % file_to_upload)
+ elif tool=="manta":
+ if re.match(r'.*.vcf.gz$', file_to_upload):
+ file_type = 'VCF'
+ file_info.update({'dataType': 'Raw SNV Calls'})
+ file_info['info'].update({'analysis_tools': ['Manta']})
+ elif re.match(r'.*.vcf.gz.tbi$', file_to_upload):
+ file_type = 'TBI'
+ file_info.update({'dataType': 'VCF Index'})
+ file_info['info'].update({'analysis_tools': ['Manta']})
+ else:
+ sys.exit('Error: unknown QC metrics file: %s' % file_to_upload)
+ elif tool=="freebayes":
+ if re.match(r'.*.vcf.gz$', file_to_upload):
+ file_type = 'VCF'
+ file_info.update({'dataType': 'Raw SNV Calls'})
+ file_info['info'].update({'analysis_tools': ['Freebayes']})
+ elif re.match(r'.*.vcf.gz.tbi$', file_to_upload):
+ file_type = 'TBI'
+ file_info.update({'dataType': 'VCF Index'})
+ file_info['info'].update({'analysis_tools': ['Freebayes']})
+ else:
+ sys.exit('Error: unknown QC metrics file: %s' % file_to_upload)
+ #elif tool=="cnvkit":
+ else:
+ sys.exit('Error: unknown QC metrics file: %s' % file_to_upload)
+
+ #LUCA-KR.DO231106.SA602282.wxs.20210112.gatk-mutect2.somatic.snv.open-filter.vcf.gz.tbi"
+ #"TEST-PR.DO250183.SA610228.wxs.20230501.snv-strelka.gvcf.gz",
+ suffix={
+ "VCF":"vcf.gz",
+ "TBI": "vcf.gz.tbi",
+ }
+ # file naming patterns:
+ # pattern: .......
+ # process_indicator: pre-alignment, alignment(aligner), post-alignment(caller)
+ # example: TEST-PR.DO250183.SA610229.rna-seq.20200319.star.genome_aln.cram
+ new_fname = '.'.join([
+ analysis_dict['studyId'],
+ analysis_dict['samples'][0]['donor']['donorId'],
+ analysis_dict['samples'][0]['sampleId'],
+ analysis_dict['experiment']['experimental_strategy'].lower() if analysis_dict['experiment'].get('experimental_strategy') else analysis_dict['experiment']['library_strategy'],
+ date_str,
+ process_indicator,
+ suffix[file_type]
+ ])
+
+ file_info['fileName'] = new_fname
+ file_info['fileType'] = file_type
+
+ if re.match(r'cnvkit', file_to_upload):
+ with tarfile.open(file_to_upload, 'r') as tar:
+ for member in tar.getmembers():
+ file_info['info']['files_in_tgz'].append(member.name)
+
+ new_dir = 'out'
+ try:
+ os.mkdir(new_dir)
+ except FileExistsError:
+ pass
+
+ dst = os.path.join(os.getcwd(), new_dir, new_fname)
+ os.symlink(os.path.abspath(file_to_upload), dst)
+ else:
+ shutil.copyfile(os.path.realpath(file_to_upload),"/".join([new_dir,new_fname]))
+ ##os.symlink(os.path.realpath(file_to_upload),"/".join([new_dir,new_fname]))
+
+ return file_info
+
+def get_basename(metadata):
+ study_id = metadata['studyId']
+ donor_id = metadata['samples'][0]['donor']['donorId']
+ sample_id = metadata['samples'][0]['sampleId']
+
+ if not sample_id or not donor_id or not study_id:
+ sys.exit('Error: missing study/donor/sample ID in the provided metadata')
+
+ return ".".join([study_id, donor_id, sample_id])
+
+def get_sample_info(sample_list):
+ samples = copy.deepcopy(sample_list)
+ for sample in samples:
+ for item in ['info', 'sampleId', 'specimenId', 'donorId', 'studyId']:
+ sample.pop(item, None)
+ sample['specimen'].pop(item, None)
+ sample['donor'].pop(item, None)
+
+ return samples
+
+def prepare_tarball(sampleId, qc_files, tool_list):
+
+ tgz_dir = 'tarball'
+ try:
+ os.mkdir(tgz_dir)
+ except FileExistsError:
+ pass
+
+ files_to_tar = {}
+ for tool in tool_list:
+ if not tool in files_to_tar: files_to_tar[tool] = []
+ for f in sorted(qc_files):
+ if tool in f:
+ files_to_tar[tool].append(f)
+
+ for tool in tool_list:
+ if not files_to_tar[tool]: continue
+ tarfile_name = f"{tgz_dir}/{sampleId}.{tool}.tgz"
+ with tarfile.open(tarfile_name, "w:gz", dereference=True) as tar:
+ for f in files_to_tar[tool]:
+ tar.add(f, arcname=os.path.basename(f))
+
+def main():
+ """
+ Python implementation of tool: payload-gen-qc
+ """
+
+ parser = argparse.ArgumentParser(description='Tool: payload-gen-qc')
+ parser.add_argument("-a", "--metatada-analysis", dest="metadata_analysis", required=True,
+ help="Input metadata analysis", type=str)
+ parser.add_argument("-f", "--files_to_upload", dest="files_to_upload", type=str, required=True,
+ nargs="+", help="All files to upload")
+ parser.add_argument("-g", "--genome_annotation", dest="genome_annotation", default="", help="Genome annotation")
+ parser.add_argument("-b", "--genome_build", dest="genome_build", default="", help="Genome build")
+ parser.add_argument("-w", "--wf-name", dest="wf_name", required=True, help="Workflow name")
+ parser.add_argument("-s", "--wf-session", dest="wf_session", required=True, help="workflow session ID")
+ parser.add_argument("-v", "--wf-version", dest="wf_version", required=True, help="Workflow version")
+ parser.add_argument("-p", "--pipeline_yml", dest="pipeline_yml", required=False, help="Pipeline info in yaml")
+ parser.add_argument("-t", "--tool", dest="tool", required=True,type=str, help="Tool used for variant calling",
+ choices=['strelka','cnvkit','deepvariant','tiddit','manta','haplotypecaller','freebayes']
+ )
+
+ args = parser.parse_args()
+
+ with open(args.metadata_analysis, 'r') as f:
+ analysis_dict = json.load(f)
+
+ pipeline_info = {}
+ if args.pipeline_yml:
+ with open(args.pipeline_yml, 'r') as f:
+ pipeline_info = yaml.safe_load(f)
+
+ payload = {
+ 'analysisType': {
+ 'name': 'variant_processing'
+ },
+ 'studyId': analysis_dict.get('studyId'),
+ 'info': {},
+ 'workflow': {
+ 'workflow_name': args.wf_name,
+ 'workflow_version': args.wf_version,
+ 'session_id': args.wf_session,
+ 'inputs': [
+ {
+ 'analysis_type': analysis_dict['analysisType']['name'],
+ 'input_analysis_id': analysis_dict.get('analysisId')
+ }
+ ],
+ 'info': pipeline_info
+ },
+ 'files': [],
+ 'experiment': analysis_dict.get('experiment'),
+ 'samples': get_sample_info(analysis_dict.get('samples'))
+ }
+ if args.genome_build:
+ payload['workflow']['genome_build'] = args.genome_build
+ if args.genome_annotation:
+ payload['workflow']['genome_annotation'] = args.genome_annotation
+
+ # pass `info` dict from seq_experiment payload to new payload
+ if 'info' in analysis_dict and isinstance(analysis_dict['info'], dict):
+ payload['info'] = analysis_dict['info']
+ else:
+ payload.pop('info')
+
+ if 'library_strategy' in payload['experiment']:
+ experimental_strategy = payload['experiment'].pop('library_strategy')
+ payload['experiment']['experimental_strategy'] = experimental_strategy
+
+ new_dir = 'out'
+ try:
+ os.mkdir(new_dir)
+ except FileExistsError:
+ pass
+
+ # generate date string
+ date_str = date.today().strftime("%Y%m%d")
+
+ # prepare tarball to include all QC files generated by one tool
+ ##prepare_tarball(analysis_dict['samples'][0]['sampleId'], args.files_to_upload, tool_list)
+
+ process_indicator = ".".join([args.tool,"germline",workflow_process_map.get(args.wf_name)])
+ for f in sorted(args.files_to_upload):
+ file_info = get_files_info(f, date_str, analysis_dict, process_indicator,args.tool,new_dir)
+ payload['files'].append(file_info)
+
+ with open("%s.%s.payload.json" % (str(uuid.uuid4()), args.wf_name.replace(" ","_")), 'w') as f:
+ f.write(json.dumps(payload, indent=2))
+
+
+
+if __name__ == "__main__":
+ main()
+
diff --git a/tests/config/nextflow.config b/tests/config/nextflow.config
index 4c14e4a..80f3bee 100644
--- a/tests/config/nextflow.config
+++ b/tests/config/nextflow.config
@@ -1,4 +1,5 @@
+
manifest {
homePage = 'https://github.com/icgc-argo-workflows/argo-modules'
description = 'ARGO Generic Modules to be shared across workflows for RDPC processing'
@@ -57,4 +58,15 @@ includeConfig 'modules.config'
includeConfig 'test_data.config'
// Enable locally defined binary scripts for modules
-nextflow.enable.moduleBinaries = true
\ No newline at end of file
+nextflow.enable.moduleBinaries = true
+
+process {
+ withName: 'PAYLOAD_GERMLINEVARIANT' {
+ publishDir = [
+ mode: params.publish_dir_mode,
+ path: { "${params.outdir}/variant_calling/"},
+ pattern: "{*payload.json,out/*vcf.gz,out/*vcf.gz.tbi}",
+ saveAs: { "${meta.tool}/${meta.id}/${it}" }
+ ]
+ }
+}
diff --git a/tests/config/test_data.config b/tests/config/test_data.config
index 2bcd53d..e16a301 100644
--- a/tests/config/test_data.config
+++ b/tests/config/test_data.config
@@ -30,4 +30,54 @@ params {
analysis_id_stage = "c62cee87-04ae-4988-acee-8704aec988d4"
}
}
-}
+ profiles {
+ deepvariant_vcf {
+ metadata_analysis="${params.test_data_base}/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json"
+ files_to_upload=["${params.test_data_base}/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz","${params.test_data_base}/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi"]
+ pipeline_yml="${params.test_data_base}/tiddit/collated_versions.yml"
+ tool="deepvariant"
+ study_id = "TEST-PR"
+ analysis_id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b"
+ }
+ freebayes_vcf {
+ metadata_analysis="${params.test_data_base}/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json"
+ files_to_upload=["${params.test_data_base}/freebayes/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz","${params.test_data_base}/freebayes/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi"]
+ pipeline_yml="${params.test_data_base}/tiddit/collated_versions.yml"
+ tool="freebayes"
+ study_id = "TEST-PR"
+ analysis_id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b"
+ }
+ haplotypecaller_vcf {
+ metadata_analysis="${params.test_data_base}/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json"
+ files_to_upload=["${params.test_data_base}/haplotypecaller/aaf8d346-c24f-493b-b8d3-46c24f393b92.haplotypecaller.filtered.vcf.gz","${params.test_data_base}/haplotypecaller/aaf8d346-c24f-493b-b8d3-46c24f393b92.haplotypecaller.filtered.vcf.gz.tbi"]
+ pipeline_yml="${params.test_data_base}/tiddit/collated_versions.yml"
+ tool="haplotypecaller"
+ study_id = "TEST-PR"
+ analysis_id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b"
+ }
+ manta_vcf {
+ metadata_analysis="${params.test_data_base}/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json"
+ files_to_upload=["${params.test_data_base}/manta/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz","${params.test_data_base}/manta/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi"]
+ pipeline_yml="${params.test_data_base}/tiddit/collated_versions.yml"
+ tool="manta"
+ study_id = "TEST-PR"
+ analysis_id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b"
+ }
+ strelka_vcf {
+ metadata_analysis="${params.test_data_base}/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json"
+ files_to_upload=["${params.test_data_base}/strelka/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz","${params.test_data_base}/strelka/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi"]
+ pipeline_yml="${params.test_data_base}/tiddit/collated_versions.yml"
+ tool="strelka"
+ study_id = "TEST-PR"
+ analysis_id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b"
+ }
+ tiddit_vcf {
+ metadata_analysis="${params.test_data_base}/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json"
+ files_to_upload=["${params.test_data_base}/tiddit/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz","${params.test_data_base}/tiddit/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi"]
+ pipeline_yml="${params.test_data_base}/tiddit/collated_versions.yml"
+ tool="tiddit"
+ study_id = "TEST-PR"
+ analysis_id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b"
+ }
+ }
+}
\ No newline at end of file
diff --git a/tests/data/qa/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json b/tests/data/qa/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json
new file mode 100644
index 0000000..dc1d78a
--- /dev/null
+++ b/tests/data/qa/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json
@@ -0,0 +1,117 @@
+{
+ "analysisId" : "aaf8d346-c24f-493b-b8d3-46c24f393b92",
+ "studyId" : "TEST-PR",
+ "analysisState" : "PUBLISHED",
+ "analysisType" : {
+ "name" : "sequencing_alignment",
+ "version" : 15
+ },
+ "samples" : [
+ {
+ "sampleId" : "SA610228",
+ "specimenId" : "SP210201",
+ "submitterSampleId" : "COLO-829-BL",
+ "sampleType" : "Total DNA",
+ "matchedNormalSubmitterSampleId" : null,
+ "specimen" : {
+ "specimenId" : "SP210201",
+ "donorId" : "DO250183",
+ "submitterSpecimenId" : "COLO-829-BL",
+ "tumourNormalDesignation" : "Normal",
+ "specimenTissueSource" : "Blood derived",
+ "specimenType" : "Normal"
+ },
+ "donor" : {
+ "donorId" : "DO250183",
+ "studyId" : "TEST-PR",
+ "submitterDonorId" : "COLO-829",
+ "gender" : "Female"
+ }
+ }
+ ],
+ "files" : [
+ {
+ "info" : {
+ "analysis_tools" : [
+ "BWA-MEM",
+ "biobambam2:bammarkduplicates2"
+ ],
+ "data_category" : "Sequencing Reads"
+ },
+ "objectId" : "581f8e5b-0601-5dde-8ace-78eef3a6db47",
+ "studyId" : "TEST-PR",
+ "analysisId" : "aaf8d346-c24f-493b-b8d3-46c24f393b92",
+ "fileName" : "test.paired_end.sorted.cram",
+ "fileType" : "CRAM",
+ "fileMd5sum" : "e0146aa3a5a196e4cb5593f545e95c31",
+ "fileSize" : 76810,
+ "fileAccess" : "controlled",
+ "dataType" : "Aligned Reads"
+ },
+ {
+ "info" : {
+ "analysis_tools" : [
+ "BWA-MEM",
+ "biobambam2:bammarkduplicates2"
+ ],
+ "data_category" : "Sequencing Reads"
+ },
+ "objectId" : "57a23de6-044c-555b-8070-bd278d449a63",
+ "studyId" : "TEST-PR",
+ "analysisId" : "aaf8d346-c24f-493b-b8d3-46c24f393b92",
+ "fileName" : "test.paired_end.sorted.cram.crai",
+ "fileType" : "CRAI",
+ "fileMd5sum" : "31ffc1b11f53210e4ac0d55092de0652",
+ "fileSize" : 62,
+ "fileAccess" : "controlled",
+ "dataType" : "Aligned Reads Index"
+ }
+ ],
+ "createdAt" : "2023-04-18T21:30:04.11549",
+ "updatedAt" : "2023-04-18T21:32:47.73069",
+ "firstPublishedAt" : "2023-04-18T21:32:47.728311",
+ "publishedAt" : "2023-04-18T21:32:47.728311",
+ "analysisStateHistory" : [
+ {
+ "initialState" : "UNPUBLISHED",
+ "updatedState" : "PUBLISHED",
+ "updatedAt" : "2023-04-18T21:32:47.728311"
+ }
+ ],
+ "experiment" : {
+ "experimental_strategy" : "WXS",
+ "platform" : "ILLUMINA",
+ "platform_model" : "HiSeq 2000",
+ "sequencing_center" : "EXT",
+ "sequencing_date" : "2014-12-12",
+ "submitter_sequencing_experiment_id" : "TEST_EXP"
+ },
+ "read_group_count" : 1,
+ "read_groups" : [
+ {
+ "file_r1" : "test.paired_end.sorted.cram",
+ "file_r2" : "test.paired_end.sorted.cram",
+ "insert_size" : 298,
+ "is_paired_end" : true,
+ "library_name" : "testN",
+ "platform_unit" : "1",
+ "read_length_r1" : 75,
+ "read_length_r2" : 75,
+ "sample_barcode" : null,
+ "submitter_read_group_id" : "1"
+ }
+ ],
+ "workflow" : {
+ "genome_build" : "GRCh38_hla_decoy_ebv",
+ "inputs" : [
+ {
+ "analysis_type" : "sequencing_experiment",
+ "input_analysis_id" : "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b"
+ }
+ ],
+ "run_id" : "wes-d4f902992b3d443595d05dbf1490ab9e",
+ "session_id" : "9da55181-2fb8-443e-a82f-b8b8e33d9f2a",
+ "workflow_name" : "DNA Seq Alignment",
+ "workflow_version" : "1.9.0"
+ }
+}
diff --git a/tests/data/qa/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz b/tests/data/qa/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz
new file mode 100644
index 0000000000000000000000000000000000000000..65cc12947d62c84930154b7cc945a32d179fb942
GIT binary patch
literal 751
zcmV1WxHWb!e1AR2-r_K7wc9D;
zVJ)l_+JAdP>!i_r7RDiMwcFWvvKlV>e~#1s#Js;8g$h!1g_Q?i|LtL5^j4Or3k0db
zSJJ`T2?ZOJdIuJT0_k9b%Dh7%l&atQ%A%``D#M58OO=@!2z4d0lpdiH4oW=us!ze9
zJ1Sp;qy}vo>V&^w>KrKI;MVg^}~Uy
zy-5caw`l7*oPEp}(`41mElGpfalb*^Uj(a<1NO(-;|}emw*`vwln=JX0*aynp%f~-
z=B4>3kKM()K0kW7=lsCvQq~;{1-sJ#5dJv|CH=<@adkypdMouVxCnTj;!T|CF#UF4
zq$!qCXh?G~8;AA{OL&)c7vL*|(Rz@R+2PO_OY
z!e171zWV2FgRK=s-Mc$w7|*hKv&O^O_dWjE-VJpL2jiO6fiLwcA00M5f&18RjFQD!
z$$f1E3ZuQ;H4;T-qbN|QcfRV!lv4KGY7hQe%%>!uFUdG1i(y8R$%?%EolMB-t3hgq
zB%9awjd*l^)@nF}{n3j^ND?w4l$1M=w0hd6M#VlJR^(=p*G3`+KhwaT-K4=+HB=6Lq^JBr7r?9U8`znUKp*1w{)kCz$-
h001A02m}BC000301^_}s0stET0{{R300000005x1XV?G$
literal 0
HcmV?d00001
diff --git a/tests/data/qa/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi b/tests/data/qa/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi
new file mode 100644
index 0000000000000000000000000000000000000000..5e4aeb4ddb80a14675b55474af89f0ee2af557ad
GIT binary patch
literal 105
zcmb2|=3rp}f&Xj_PR>jWJ`CK2pHfm%5)u-U5)v9N@&Lgpg{H>F#@@z8Ma#t}PH^bF
n);z+nEPg}76Nd*z9?D%34DJgvii3b=$fFr1&A<#c8$K}-(a4cH7xeOg{Epd^kr
zfRiY4da!#~3}}nCg^?wVB&R2H*x$Y=>cg_ExIL5Z1vZCRiKHS~PrX$|`r=|0Mq-s@
zTV6OnU*EoHn0>goxV@V_PkiTZcQ=mb&gX+0kuS3_Ey5&resZG-t-`1fS&mYk=OXy&
z@9@r}fA2oacifx7!*06}nd1+hUsK`S#f4al?9fj62ufBcS>z&n;b0!Ek_-vH+#nuB
zA`(ZYA8XU@3DYjjF6M=(tQ?H6Om=b6wR_{$cIgSbzTg465oxjMzCWGS?@!D3Z=-}4
zhYOyHI4NFJfo9>aA__N25)4q3tSK4hOCE^;MPYp-2fq{>fdXD|v<_cHEdN}>fRz
zO*QUHlROU>k=Wl_i&$j55CK|c$yUCdiIvDiycCGX0pcAm<*jjYzD*;6(kKao*Q3Qg
z)r&pltY*1SJ069_v6bhJm3fhc@w(q<-Iwq09)3BG*?ky?+uargfDMH)TK{}~i;`WD
z?h3SslI5?sYuxL??2lhiO%(nif&ub2nF#tQ0-u^lj(Cm&u?k}mz(jxu%q367GF*jA
zDS+-=p4@sarviSy+d{znR(cvP!T26j9?v;P(Ub&yqe-Od?0D)^>M1XcV*!f2y
zR;wV4*B^5f#-fVrju);wPUuzXb5iiA=9{cqngB;f0_@jLZ-m+z=(aOZPoPaI4?$K1
zvuKrx-*zHizJh&lOUC21K%dBf{<~xTp*DX?9mng4&YXQCKKF$AYbc6Nd%w8m%v>KW_vQzZW77M1bT=d!@-iVa%!tsK+ebIzV5;
zVk7-2eqRdNk)UG33-r}H=(vCK(DbX7w|X;GRcDOAYKExQbvy_CUJl~h~KS<3*_yytbN#F?7m}ihVYLAH1V6%Jegfi{C6bg
ziRU$0fd2Wl$}NCYj_g$1x#<7O4b$hlju(2vs(b`8UR8cTT44dLyG*}ON5U$HWM!Nu
z@r%gTrFSy4;9+ze)U!!bR;qaG9lerBB;s|oscE{5`-%JD&YC8@veFZ^_kzc*3cHKL
z-*y7UNj%(Btj1}>l)180u@t7vI?E)>M4l!w6#rJqDc{dKi~Rn(fc@&9?$2ELwn~Rp
zlY*LtNI)~tSwLt0PxogiMf&&DC|x&qs=>p&Ge&PDl#jsdOD0ujh5cv8+?V@SSX2W4
zhRG8#A&X0ux+?{UyevXcWUGAJ0snY&(_K_gU_ze}C2N!fL5`%JV0jQ`3y~MK-XQ3<
zi!g@!F{lh4$i*^=gCRWE8QOF2{7F+V-d2T|C25jLnA9N^eZ+D1g4tq`IAF52B;mpR_W>pbWv3XGO?6U1?cl
z<>3Zjf|q#|l}8MXguSD^^Enf!Wt{J}5Sb|y99fe3rqLbn3Frbj
z$BKE`_yuStR)yUvr=a`1P7P3Jii4A#>S!aV6IUcw1^T3a{9LEH{iBd$s}t2mBmOyU
zDa@~&-u8djm-=4WnEKwnGxfdMrSH9`H}%izP2XOy%mjPvEh*(cusQuvzGz%=W^DT5
zdh?#(%;0_WNj*C{zns;?`nwpF9jq-wem_4rbrZ{75x#&}Q^AY;t&vfEPSe1J{$ShD
z%iQfA2KA{sJ-fVS`}jB_}(js_^nUaCsDF~#bFffwqeXWqxT@c
z&B}fjHM<76F*(r!luwo4DV5zq`K>}Vq@2?-*=~6pL}4tPpH`U=3;rr{G%QaYwgRts
zUT`!V7Cc*v0xdTgW4|y$2pg(|5P}yXK*QlSjE7A^8P>fwBA8Y;t9|OQZo(+yd}Rfy
zW(=21TMf05x)?63KpkoZ{6ih7ixI=ijBkIE^NfpdEx+H9Qy7$a-O&gk6c(}yBXPqE
z;V{Bfq7&=G#YMW|aIovdNi05GTr~0FwAc3G;v!FW*-|*L{42Tq7m8JEe7Ly&hyQr5
zc#m`C?ndz^w~9M^R-V4NGo}0{T!DT`DZk-X3XvBNALQ&5I;vlTpiQE)ziIW5cu%n^I`7L@q^r~J9G>?W`|;v=_(zU
z`$ea|@a5H`L&n|{jOso4@Y{#~Jh=|lDIGugj%iXd_8%RlT4UdvIb`g6o&yT6w+^MF@!a3v@qPI(P^y#1>9|Kj4ppYl
z_Kjz|H@5$1Kl!%%Xg|63bf%C?YBS5QnQ9F*3qEX^wXIn;tim4Z1XF8321A3Gc%*6-
zM@E^dVC5P()(|zxPsN(1>7!nL>;RuG;lb}-bS74Hm1*NXb_}37-(p888yOWcNenK&
z!A{qJoUTc3k{Sb2PCXk*ZT|;w-$SEaW>g|05?KAx-1i+eeg`kzW&>t3Ti428Llv8v
zWl^A1URID)s!4?@Q$@9~O5ys-Arcb-Knw&%8+je#VG<8}3lHf9b114_O_^9VO|yp!
zImtx@E>ce9Ei7WcVrUFY@tkI%AHskPcjYknXyX`})F?AuKaWG9^zu+gm>Ep}lRQja?*O4ADHLjA
z!y4&uEpL0iW3-8|S|f7mkl3i&Qpxv#VgMDr#G>1#uG>Sy!jz7z!O+Ay(@lL3MkBg{
zm8tTqkRv7p^g-Ux2UR5!`O!ww)E>&P0adM!OQ{px9G^;H44W#0Y}EyWM`T1s^psvy
z3-CZ?^dB7x1*Wb)bsmy{=m0H5f96Rm;-%uu2t86@sZ*@xo00$h1avOiXRuSA4VsK~bxuXSM0+
zm|D~{OMDjAM;aNOz*pBFeS~$BYQ5sZjOm2+GGvx+o@7WV1Bhx_ji}awNGUc(25$h!nEl^VC(QQ&&+{OpKbOYLSs%_JKOqN4nbIN9@}`b}t)2Dq%8;4Z|?o
zL@C3jMMou318bIM7`;SQK-qK@`Uv;pG_gS`r8Q21EwfIX{^`DgTBRU0Q?djp&K!`X
zN=u?-1gp?30!mQgtpoTxr(W2*Q8H>%GO%n)Y^IsEWmC(thm>NS
xP>GTn*kXiz_%H4lFVP4q001A02m}BC000301^_}s0stET0{{R3000000038@MQ;EA
literal 0
HcmV?d00001
diff --git a/tests/data/qa/freebayes/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi b/tests/data/qa/freebayes/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi
new file mode 100644
index 0000000000000000000000000000000000000000..89a3b5ee6aa18ea21c7c7a7074ab5bd55e575b1d
GIT binary patch
literal 106
zcmb2|=3rp}f&Xj_PR>jWz6{)jpHfm%5)u-U5)v9N@&Lgpg{H>F#@@z8Ma#t}PH^bF
o);z+nZ0!@rB87sv>@G^(j0{^^FU<%78X}KonluA5*l-X50HX~YX#fBK
literal 0
HcmV?d00001
diff --git a/tests/data/qa/freebayes/collated_versions.yml b/tests/data/qa/freebayes/collated_versions.yml
new file mode 100644
index 0000000..c3e25f0
--- /dev/null
+++ b/tests/data/qa/freebayes/collated_versions.yml
@@ -0,0 +1,24 @@
+"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:GERMLINE_VARIANT_FREEBAYES:BCFTOOLS_SORT":
+ bcftools: 1.16
+"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:GERMLINE_VARIANT_FREEBAYES:FREEBAYES":
+ freebayes: 1.3.6
+"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:NORMAL_GATK4_RECALIBRATE:GATK4_APPLYBQSR":
+ gatk4: 4.3.0.0
+"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:GERMLINE_VARIANT_FREEBAYES:MERGE_FREEBAYES":
+ gatk4: 4.3.0.0
+"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:NORMAL_SONG_SCORE_DOWNLOAD:scoreDn":
+ score-client: 5.8.1
+"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:NORMAL_SONG_SCORE_DOWNLOAD:songGet":
+ song-client: 5.0.2
+"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:GERMLINE_VARIANT_FREEBAYES:BCFTOOLS_SORT":
+ bcftools: 1.16
+"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:GERMLINE_VARIANT_FREEBAYES:BCFTOOLS_SORT":
+ bcftools: 1.16
+"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:GERMLINE_VARIANT_FREEBAYES:FREEBAYES":
+ freebayes: 1.3.6
+"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:NORMAL_GATK4_RECALIBRATE:GATK4_BASERECALIBRATOR":
+ gatk4: 4.3.0.0
+"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:NORMAL_GATK4_RECALIBRATE:SAMTOOLS_INDEX":
+ samtools: 1.16.1
+"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:GERMLINE_VARIANT_FREEBAYES:FREEBAYES":
+ freebayes: 1.3.6
diff --git a/tests/data/qa/haplotypecaller/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json b/tests/data/qa/haplotypecaller/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json
new file mode 100644
index 0000000..dc1d78a
--- /dev/null
+++ b/tests/data/qa/haplotypecaller/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json
@@ -0,0 +1,117 @@
+{
+ "analysisId" : "aaf8d346-c24f-493b-b8d3-46c24f393b92",
+ "studyId" : "TEST-PR",
+ "analysisState" : "PUBLISHED",
+ "analysisType" : {
+ "name" : "sequencing_alignment",
+ "version" : 15
+ },
+ "samples" : [
+ {
+ "sampleId" : "SA610228",
+ "specimenId" : "SP210201",
+ "submitterSampleId" : "COLO-829-BL",
+ "sampleType" : "Total DNA",
+ "matchedNormalSubmitterSampleId" : null,
+ "specimen" : {
+ "specimenId" : "SP210201",
+ "donorId" : "DO250183",
+ "submitterSpecimenId" : "COLO-829-BL",
+ "tumourNormalDesignation" : "Normal",
+ "specimenTissueSource" : "Blood derived",
+ "specimenType" : "Normal"
+ },
+ "donor" : {
+ "donorId" : "DO250183",
+ "studyId" : "TEST-PR",
+ "submitterDonorId" : "COLO-829",
+ "gender" : "Female"
+ }
+ }
+ ],
+ "files" : [
+ {
+ "info" : {
+ "analysis_tools" : [
+ "BWA-MEM",
+ "biobambam2:bammarkduplicates2"
+ ],
+ "data_category" : "Sequencing Reads"
+ },
+ "objectId" : "581f8e5b-0601-5dde-8ace-78eef3a6db47",
+ "studyId" : "TEST-PR",
+ "analysisId" : "aaf8d346-c24f-493b-b8d3-46c24f393b92",
+ "fileName" : "test.paired_end.sorted.cram",
+ "fileType" : "CRAM",
+ "fileMd5sum" : "e0146aa3a5a196e4cb5593f545e95c31",
+ "fileSize" : 76810,
+ "fileAccess" : "controlled",
+ "dataType" : "Aligned Reads"
+ },
+ {
+ "info" : {
+ "analysis_tools" : [
+ "BWA-MEM",
+ "biobambam2:bammarkduplicates2"
+ ],
+ "data_category" : "Sequencing Reads"
+ },
+ "objectId" : "57a23de6-044c-555b-8070-bd278d449a63",
+ "studyId" : "TEST-PR",
+ "analysisId" : "aaf8d346-c24f-493b-b8d3-46c24f393b92",
+ "fileName" : "test.paired_end.sorted.cram.crai",
+ "fileType" : "CRAI",
+ "fileMd5sum" : "31ffc1b11f53210e4ac0d55092de0652",
+ "fileSize" : 62,
+ "fileAccess" : "controlled",
+ "dataType" : "Aligned Reads Index"
+ }
+ ],
+ "createdAt" : "2023-04-18T21:30:04.11549",
+ "updatedAt" : "2023-04-18T21:32:47.73069",
+ "firstPublishedAt" : "2023-04-18T21:32:47.728311",
+ "publishedAt" : "2023-04-18T21:32:47.728311",
+ "analysisStateHistory" : [
+ {
+ "initialState" : "UNPUBLISHED",
+ "updatedState" : "PUBLISHED",
+ "updatedAt" : "2023-04-18T21:32:47.728311"
+ }
+ ],
+ "experiment" : {
+ "experimental_strategy" : "WXS",
+ "platform" : "ILLUMINA",
+ "platform_model" : "HiSeq 2000",
+ "sequencing_center" : "EXT",
+ "sequencing_date" : "2014-12-12",
+ "submitter_sequencing_experiment_id" : "TEST_EXP"
+ },
+ "read_group_count" : 1,
+ "read_groups" : [
+ {
+ "file_r1" : "test.paired_end.sorted.cram",
+ "file_r2" : "test.paired_end.sorted.cram",
+ "insert_size" : 298,
+ "is_paired_end" : true,
+ "library_name" : "testN",
+ "platform_unit" : "1",
+ "read_length_r1" : 75,
+ "read_length_r2" : 75,
+ "sample_barcode" : null,
+ "submitter_read_group_id" : "1"
+ }
+ ],
+ "workflow" : {
+ "genome_build" : "GRCh38_hla_decoy_ebv",
+ "inputs" : [
+ {
+ "analysis_type" : "sequencing_experiment",
+ "input_analysis_id" : "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b"
+ }
+ ],
+ "run_id" : "wes-d4f902992b3d443595d05dbf1490ab9e",
+ "session_id" : "9da55181-2fb8-443e-a82f-b8b8e33d9f2a",
+ "workflow_name" : "DNA Seq Alignment",
+ "workflow_version" : "1.9.0"
+ }
+}
diff --git a/tests/data/qa/haplotypecaller/aaf8d346-c24f-493b-b8d3-46c24f393b92.haplotypecaller.filtered.vcf.gz b/tests/data/qa/haplotypecaller/aaf8d346-c24f-493b-b8d3-46c24f393b92.haplotypecaller.filtered.vcf.gz
new file mode 100644
index 0000000000000000000000000000000000000000..2721fbff9a8989d962bf7b950135cc92b5211082
GIT binary patch
literal 5034
zcmZvg=Q|q?qlQ&u?^U&F&DxFG8bwR(+Db%YQ#+{@Gl(Ka&88?nDKTQNM2ad^ilQh=
z?5$SKn(ukfbw2&B^L%)&`+5F^TOgSlK=$8UQ<7=xkdvjbZQC0{msBz|Dx)Zh&uW1z
z$bnp8L$F_5PtBj|xxk6;-(POL(WmiYesLq+d%R}uY4ivFtV$4x4ZdlZO)s48D9VaX
z1e|WwsB)%DI>^EQtQ0+s%kz2Q^
zPGAX5F5!-B?Va~q*y~@(u7U85M3>3wWS2nrwH9yJtmz`&x`v5Pv{|IXx@LI`LE~$~
ze?nRsJnh@?4-fJuikB9d@-w}+7Rd+c(*cL4*--Ihofa8do@+fDUEgCodim5ur65nq
zk^cI%h8y$>EHk#fL80$S6KY+%`%;Egvmu>Q_npx0CRfM~hR~HD6JLIlVsM)H_H}+e
zN{}FAbhD38TO@RTE_2?WkXG9PDF0mnKy1!n@cr0==c3k7)#;!1>U}%&=UZWWE5Qo>
zm9S30HRaR|LgCBaGJSM3iu#HiF_iu7!fxQ65>ig*elH;o2YxM!7qD+%;
zS@bX-^{hgOis3#5qC%o559rMG&)6-ILA&
z3WMOBv1XWUE_=>lArfMM9w>XBkxz3o&is%gOTQd9suB^CEu_q}dz4S=m;e&s;Wb-M
zf51asQ*d=(N=6#^8rOEx+T#&%MSe-^@gs
zpkw($F@9s-EhF~N+F0>&aX(NzN*s@-{-l&bpE`24Az*wRmGL=73WKs_B2CY-6xPV!
zxluXy0Tf@9gQ*2ONMI5vOsXC0^1NpSN4d_kAk9G+yVcx6BvW4sTy^JlbeuS@b3wRA
zwxvKR1V=g0Acw*^4Hg524+d^XYb|7s-Jrs49ZuKPaO>MJ7G3GKm>N3lDGIA1F)G>W
zHbRZ|dU30w>M0gi1ajNc#es`Nnu*D{lkf9hHj#dacP`KjMB6yeuVGdNMQAAyYkngm
zwvj=izn;4s5^r-uvJ7dtfkS@OYBYkr3ln=xk^3z3gSPa2yT#Hcqj@racop>1OLhbA
zG*Wxu|C!UZQgGeiO)1j##8c>Gvr8tCsm+%LGZuSa*&=jxJg=0XM`vhvE?izt5%$}Q
zuz*+a06p=D)9uZR@(u9qq7HXig;e
zw)Od+8Yk=QuneOY8QG)51LQHnUNIP4dCoX}~;C^c}Pz-@w!ppFfZGd>RhXvuEsB`AZa4+dP2
zT(16q)hzD7m2&z}8@OajS)*W$hLQRbeSE`ZP)iFRzH_&;GDsO&C-&LH}BB
zXM3odW9?%hMv(drHDm^!J6)q=57RDL8RzKMxOt&i7*q*5kS`ftLJoU#}
zP50Sz1-9spK+xKv@s{xcF&iyqYPq(9ok%S3w{9CWDK&~1%bnMm0dP=mWGro*&nPum
zRfE!~ymuM^B@Vb+-@cZ&N8QY=b{6fC^M5*~%fLFMHyLa2MIo`&OGqA@$Q%cGA`zXV
zz3=^GuRE?@$jL>
z5+}NNx&or6ed1}uPTaF>CZj4BlCLcObf|Vjc3e07?4FNAE@h)LMq%&~+0`s%tk|s>
z6iLcbPW`6wcD}wJkA1h3S=M`hI4eU=zy`|vEV4$#(UDXttqe=$5L4S;m1;7`lq-7z
zqtJL)ZGOWtI)6%4e^X<_%ieMYCS)<33JeOF6AIb*&1GK(`aqtw!X+5`y{YbDY%kkC
zV|@EquNWTK%3GX`ce4@n!MOp}$PGEq?cMUYDhEHF(P$S|l@nR%I_#zD)kJSEZIbZN
z*k=sF6ExYw*Bq1Jvkc)0o9y9l?o9AGG~8b6WIyD>izPr9d
z^7XkTwj&!8IH7%M0=Q+&+oNCGK&&KM;{#lkBpu6{?D*UcaR46pWN%IsLV>w4$0bA(sRRm#5`%ev>sc%`!8j|*4&LHQ(
z`KuUS;69&_Qf7)Bt&v%9uaXI^hgNTq#DeN^$-R9*>{L$2s1}%m5~1N?)0buFt2)c%
zk-}m9F&CHF?#0V-&pr>B*c!lmlp5A4$IY1e>FQKpSmp;^>Y;XUlS
zdl*O^B^1Ex@~VK|kIo+g$dyeWpAXtt{}OVvV64aYP=9s1+aPLhyW{&&HLE^
z25ejZ0ImM=?MMWNd3XTd%D825-TY#S!|n}6dMq*JXH4|JssABUOKrzcs=7qZQ;PF=
z?cYZG(MFpOQoQ*S_uM(1>+GumV3GDb@_EHzB2CjXU`71^bC?Je^l@A!dk)QDpYJ_|CI&uMMiR%jzeo<)wBv))%jJee1u80kfjB8FneTi)1UsQH;NW9Z_PYYnj-!
z{oMWWqf>#2wWoR3UxA#N0M#~PLB3`rm!?BR0Evfl7A88DMbiS}&p2tNPQv3#nh%dU
z>Ed%k@GKY8ImtM23v1ts{Md*2>^R&+5!a1KM;FOH+V^41L|Xp}bd`>c`tf{#Sgr@mfXEEV;IVh*@PtIL=#g1z+ps&Wfo{Qo3A$_
zh_em7+$pI_tAr&tE|cF9@i|A480{o}-mDDUe@rBR)~SyPvMqKYR%LYM3i28c2Zxs2kYAyIAq#zGe@0
zrgo*ibW?sg;AgG(D)bO_2Y>rHEOFpT*2
zH(-|xT!w{5L_{86goZ~&oE+AiL&Hr*@Mf4mLyOLSR)5`wH^whMjo4QAD(3x@fO(2)
ze3-`JKV&??=(gfecT!XOiFQqLcI%c=#x;Kw7iL0Q1>CnN@((U4?SsL%JdL=^6fU+u
z7+PXcgm0tz3Ig&3n2ZpM*itLG+oJ@h^}MW;W`9B=^y<1VwRcSyCUw@Bg_JSwx9E7`8_ul
zFtB2dzu{P1=EQ@2i`kl$0v^()TrC2^o3;k)4%r3j?z}#eA81f#O<3OuLN2jlB<*7N
z(%Tx(*@^Dm{HmJweSc=iT`_k|@lOp8gXk;6Ho7fL;p^3yAZ>-sBEtqEiRv+d$enrl92TN
zKEbUhsmcU-BA>O*UFPBjR-^pAC$?;UHdcK_=&{1r72Smk{XB>8+fPVuNK`la8%0d&
z?^W$%>@w{JlbwKF3*$}vmsbkD9#oby2
zT5JYnXY6&*)mI8b)9=y40-`s>uX%o2#`)7ZD}?eqqo2&7!R-EMlv
zV$)!*iY7)0F_=>qBKwyzpa
zfBEzLn$q;Ljj=h8fAO$T-o@g=X@xTV+dwD**jad*zx4LDRrGm#S307qH(xr&XkU`I
z(ALT^O`$U6DhDB7*txSe(-3#U!y#;LURb~CQS2wxG{}m`d*j_fKjjuC|2KB#*3Q*D
zB2RVwS}qS&b*KIJBl=qg!22uSU)`#gEn_QL3-9$F22Elb5)`-s
zj#16#yR3IY910ZxP;SR4u^MPPV9ryHfeQlcDXIJZO#@g^wXPY{FR{+G-=v+p=(E&8
z(71lfNrd>V_bI1)n;2XLK_jk$*X+|D?gW?-)O4;yvqf0_DbTA7Shye@sG{k1
zzzTzF`Kz_7C#EW6uJwnF&)S?d`)L?5cU}xX)F5gmV%G6Aa$+HG84yd!iPx`Ckz`^$
z*INWXev`9F_>6aQ(&@r);orI=`^^pok4YZ4`Am2CO%cA=WfO#O?vx>2_Y+Ki!pP+-
zF%^0}GhTjSJ&cDOFGB)Gg^L$J)n_J$Yh;Nxo7o65DNt(d(prmDLx(?FghV|v0kAr@{g-;T?>}g>0U-+r$aaMgTi7V%Inw6
z-jm6ElgkMnm~WH$-6>V-`2~;ll~GaDN?cF+JeX6$x=vp(SFwl*y&JYzs~;7(czDdl
zKs4JeQh($
literal 0
HcmV?d00001
diff --git a/tests/data/qa/haplotypecaller/aaf8d346-c24f-493b-b8d3-46c24f393b92.haplotypecaller.filtered.vcf.gz.tbi b/tests/data/qa/haplotypecaller/aaf8d346-c24f-493b-b8d3-46c24f393b92.haplotypecaller.filtered.vcf.gz.tbi
new file mode 100644
index 0000000000000000000000000000000000000000..b782b83eedf503c70308758f374bbc2b72f4fd46
GIT binary patch
literal 105
zcmb2|=3rp}f&Xj_PR>jWJ`CK2pHfm%5)u-U5)v9N@&Lgpg{H>F#@@z8Ma#t}PH^bF
n);z+nZ01^rB83a;JW3OfGMs1mx*-T?hCG^K(hSUCvq1y^i6I>Aren%4gKm0c7GbS6sA>>If3bB@pbuwAXjTu`iZinjC68*JP4
z=2^Ac9j`jSj>1mZA76xIBF!_)mF#>9Ns#;X>9^Oj?&rU(y3?>fIi9>i$tZ;|&jgR~
zhh?Mb@_7D88q$YpgVA`^Uw9d}DqT=~Jb9y1^?jG9+<>N-G|7k{LLuNh=E9N&%re1=
z0a`GaW`aP_mc(bT{T2EIzlD_9%`v_o0&XDDgbvPbP(te)*bPBSjH85TqgASmN(q>s
zE(tX#NuV^WmEFJ=HAiV-tqGqR*GM_KJ
zkXU2n90L`DCS(S<1h=Nxv4Ll9H!$hW?*c%ra>;+Mo=6_^
z2&EM}5U7(fl+L~kxhCeFgIHsdlC)s>ce((nUVk-U3M%h9y8M!8-7i6cyy9!_Q$nbL>FhGK%a=5x&a*rB^*z9SfYQ{dOQjW5~FzdUeHUO}JWuBIpLr(fW@u
zC&6GaTTHsE#$dItHUHOF@?do_&C@mMPW@uF%c!%on#<&vdbr>J5yvUg@UBF;mQ+O{``j6vROAClY+4
z6f+Pj4KHCw1B_xWiMh;jzCQE@U^0q)D!&Br#n}0x)KY&(%Q8bLT_zQ&UGs@wX13Gd
zxHQFpk^u_qf=+@=DMM*S8iY$QcIw9Rg9-$Y{{-QMUr;Q4-jF7kpBMW6>jppn
z7SwNUZoI6~bTISz_Rnef820C@+fv)*JMJu;8$}~W3qnOGz-pAn{dA@un&vS{Z`7ZL
zB|-BYf}WBwXa1z((vseyD7$ih8lHjvNy}aCxGRcK
z$qvx&**#yTxU|JZ;E<=cv&pi5=xBS&x5B-oKXRWy>Th$7Ci6wVH(K^f+Mu8v2s~-X
zqcqbLaYJCN&Lf*^+}}!E@Q!!#uls3lHJVKy-C15Dzn2H2BJ|4#_&t}A0|ag$Lv5X}
zD53Ad4CkEcfRN_G@*1+=F(Mi53Ki1&>kv0koIjaXTh7Q`P6(V1*rLeE*M=KFw#qJWpe2jQ`r}gA0;blO@kjtn25|>BE
zyZP}ck6
zd#jtc4=#C5%UW6w7^O^bNu5S+bi*BjqOs<(5xHY5nV_^-&vsC&tuY0}_?oraSGB0w
zj%)3k*1ER7X02NL$_=z`TI-u;J<^y$t@iR@b9bJU>MitAP6qq9%3#iL`$7!&};=?g6p$t
z^}ye8Uw>&d8ux~@X^_bqA(sH+NR88kOsaIzy7A(bDpvhmvuImJD3huj;{B
zwRgLiO)B%*vN8%Qi~gX}9j_{`fmMq8dd0P-N^!leSTc5c_CK(G%}Bxz001A02m}BC
Z000301^_}s0stET0{{R300000003V7Fed;2
literal 0
HcmV?d00001
diff --git a/tests/data/qa/manta/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi b/tests/data/qa/manta/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi
new file mode 100644
index 0000000000000000000000000000000000000000..8ab61e09d27e124409b2dbad7c44816a0c593ffb
GIT binary patch
literal 77
zcmb2|=3rp}f&Xj_PR>jW1`OPVpHfm1fFLO$fx*zhghQj5LxY8(@ONR23Q(Oqng(eG
JX0Uz`0RVK44YU9N
literal 0
HcmV?d00001
diff --git a/tests/data/qa/manta/collated_versions.yml b/tests/data/qa/manta/collated_versions.yml
new file mode 100644
index 0000000..7199083
--- /dev/null
+++ b/tests/data/qa/manta/collated_versions.yml
@@ -0,0 +1,14 @@
+"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:GERMLINE_VARIANT_MANTA:MERGE_MANTA_DIPLOID":
+ gatk4: 4.3.0.0
+"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:NORMAL_GATK4_RECALIBRATE:GATK4_APPLYBQSR":
+ gatk4: 4.3.0.0
+"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:NORMAL_SONG_SCORE_DOWNLOAD:scoreDn":
+ score-client: 5.8.1
+"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:GERMLINE_VARIANT_MANTA:MANTA_GERMLINE":
+ manta: 1.6.0
+"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:NORMAL_SONG_SCORE_DOWNLOAD:songGet":
+ song-client: 5.0.2
+"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:NORMAL_GATK4_RECALIBRATE:GATK4_BASERECALIBRATOR":
+ gatk4: 4.3.0.0
+"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:NORMAL_GATK4_RECALIBRATE:SAMTOOLS_INDEX":
+ samtools: 1.16.1
diff --git a/tests/data/qa/strelka/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json b/tests/data/qa/strelka/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json
new file mode 100644
index 0000000..dc1d78a
--- /dev/null
+++ b/tests/data/qa/strelka/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json
@@ -0,0 +1,117 @@
+{
+ "analysisId" : "aaf8d346-c24f-493b-b8d3-46c24f393b92",
+ "studyId" : "TEST-PR",
+ "analysisState" : "PUBLISHED",
+ "analysisType" : {
+ "name" : "sequencing_alignment",
+ "version" : 15
+ },
+ "samples" : [
+ {
+ "sampleId" : "SA610228",
+ "specimenId" : "SP210201",
+ "submitterSampleId" : "COLO-829-BL",
+ "sampleType" : "Total DNA",
+ "matchedNormalSubmitterSampleId" : null,
+ "specimen" : {
+ "specimenId" : "SP210201",
+ "donorId" : "DO250183",
+ "submitterSpecimenId" : "COLO-829-BL",
+ "tumourNormalDesignation" : "Normal",
+ "specimenTissueSource" : "Blood derived",
+ "specimenType" : "Normal"
+ },
+ "donor" : {
+ "donorId" : "DO250183",
+ "studyId" : "TEST-PR",
+ "submitterDonorId" : "COLO-829",
+ "gender" : "Female"
+ }
+ }
+ ],
+ "files" : [
+ {
+ "info" : {
+ "analysis_tools" : [
+ "BWA-MEM",
+ "biobambam2:bammarkduplicates2"
+ ],
+ "data_category" : "Sequencing Reads"
+ },
+ "objectId" : "581f8e5b-0601-5dde-8ace-78eef3a6db47",
+ "studyId" : "TEST-PR",
+ "analysisId" : "aaf8d346-c24f-493b-b8d3-46c24f393b92",
+ "fileName" : "test.paired_end.sorted.cram",
+ "fileType" : "CRAM",
+ "fileMd5sum" : "e0146aa3a5a196e4cb5593f545e95c31",
+ "fileSize" : 76810,
+ "fileAccess" : "controlled",
+ "dataType" : "Aligned Reads"
+ },
+ {
+ "info" : {
+ "analysis_tools" : [
+ "BWA-MEM",
+ "biobambam2:bammarkduplicates2"
+ ],
+ "data_category" : "Sequencing Reads"
+ },
+ "objectId" : "57a23de6-044c-555b-8070-bd278d449a63",
+ "studyId" : "TEST-PR",
+ "analysisId" : "aaf8d346-c24f-493b-b8d3-46c24f393b92",
+ "fileName" : "test.paired_end.sorted.cram.crai",
+ "fileType" : "CRAI",
+ "fileMd5sum" : "31ffc1b11f53210e4ac0d55092de0652",
+ "fileSize" : 62,
+ "fileAccess" : "controlled",
+ "dataType" : "Aligned Reads Index"
+ }
+ ],
+ "createdAt" : "2023-04-18T21:30:04.11549",
+ "updatedAt" : "2023-04-18T21:32:47.73069",
+ "firstPublishedAt" : "2023-04-18T21:32:47.728311",
+ "publishedAt" : "2023-04-18T21:32:47.728311",
+ "analysisStateHistory" : [
+ {
+ "initialState" : "UNPUBLISHED",
+ "updatedState" : "PUBLISHED",
+ "updatedAt" : "2023-04-18T21:32:47.728311"
+ }
+ ],
+ "experiment" : {
+ "experimental_strategy" : "WXS",
+ "platform" : "ILLUMINA",
+ "platform_model" : "HiSeq 2000",
+ "sequencing_center" : "EXT",
+ "sequencing_date" : "2014-12-12",
+ "submitter_sequencing_experiment_id" : "TEST_EXP"
+ },
+ "read_group_count" : 1,
+ "read_groups" : [
+ {
+ "file_r1" : "test.paired_end.sorted.cram",
+ "file_r2" : "test.paired_end.sorted.cram",
+ "insert_size" : 298,
+ "is_paired_end" : true,
+ "library_name" : "testN",
+ "platform_unit" : "1",
+ "read_length_r1" : 75,
+ "read_length_r2" : 75,
+ "sample_barcode" : null,
+ "submitter_read_group_id" : "1"
+ }
+ ],
+ "workflow" : {
+ "genome_build" : "GRCh38_hla_decoy_ebv",
+ "inputs" : [
+ {
+ "analysis_type" : "sequencing_experiment",
+ "input_analysis_id" : "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b"
+ }
+ ],
+ "run_id" : "wes-d4f902992b3d443595d05dbf1490ab9e",
+ "session_id" : "9da55181-2fb8-443e-a82f-b8b8e33d9f2a",
+ "workflow_name" : "DNA Seq Alignment",
+ "workflow_version" : "1.9.0"
+ }
+}
diff --git a/tests/data/qa/strelka/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz b/tests/data/qa/strelka/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz
new file mode 100644
index 0000000000000000000000000000000000000000..c062997744dcca308be8488b4981a71683cde8be
GIT binary patch
literal 2162
zcmV-&2#xn2iwFb&00000{{{d;LjnL*2*p@kZ{kW4eRh6D>E>l-*BF06g6Fal34w_=
zAq1H0NUPPTu?uL$?&fwk`M6j2zptuoGl68X%xa~5Knd>ds#B*R^QT<3d3#~pL)K3bbLIxyqaB1y1!obyPx@T)gKQg%y8-UQKyQp
z4VR+(%WQ?PP%NcyAQv!a8dH|%8Wub^s4xS$F~AHk(A*$!4NHa0paQdE0(>w23(-8RkCND*X`MDH}oTp~jtX+a73Td9#s9~XG0p?MHR8UBT
zkjs=qTAc+hh^ri#HXsFpwy5^~UCBqAvRzku1kEv*j}X0ac6j^st&_vsuS9Hl&a4#b
zmCQ2&Dx`t6LXE=gn;Xe7(;BlorZ^L3IMchPN3x1RQ($JrHATG9ux45V%kyfv(u)RS
ze#IUT7(A3Vx9q+4ZHCoSw(GT|B9+20E;KBpN--PGfQvP$G0!na4Ua3Ht{|10JhLIr
zaqBAE_chKX&z|4TaSZ}U`Cv0#L#HQL5|~P%xi%;aJaV&osnfMJzVC9%4ZiJ=Rwz{^
zXP7~@K`;`S0QrC_XX`Z=OW;;}1fkkldbpWf_hz=PdwqAbDdwoU6LFNX<1E{Oj~RU7dBZe|IR2Ec1fxX7336c_S)y5cIqAORF;MpQav)2RTcW$^yI%c
zt?&Vr#vhpOk88`pJD>-aWz4p1u;dIj8Z*nAZGn}>xLA5{DS&~HqEQQ`*yM&c?7n#}
z)BCby$}Xg^Dnf0^U=OI+5~=M{is_7|210B0thY~4f4t@Xy)e!y^e+TvMO{|UT5(c<
zv<~Zn`M(5nc}Or5WLcF4rnO8tGnkbXXzRlT!|qX(WlPlAu+|Dw%&3Uyk=?jY=3rL4
zuXH>$ii_oL?bNNIk>dmYs0@dQC-p4~-{Qxr;#hNQl>3ybt#plWr
zvaGHDR$2$0((Oc|t))rCXcweep?c&Rq2PiSn}P)R#}-|Eo@u=)h`{>zT53*Xg8SpG
zFAJgG{^zSWx4c(uy~q3h;p@xMx5H}rx(cpL+2J*9rN#eKS?}W-U)7j~Wl(Q+;*nGZ
z%lYq^xiDTS%o;jnIcAXad(8Pt%B<|qbv89H8krVcU{<$=yYm6)HKu&QQyS>RD=gC*
zOYjbs@rqHmL<2lSVfcch+P5@4+p{#tB{RDfQq5f?uB!Li!ONPu91U*lMRj&{bN=_<
z-SAt%MdYs|R@?PUgoB*z72fEDl=ge72QOD`tJJ_0zF;cDH42I=f3}1oOIOvT;z93a
zx7C|;1*~ki)(GA6Mlnmdkub-XLU7C3hMJ0I1y&6KY=3v{b!yj3Pvv6`e^6=n$8LW-
z_>;1zm{=nG^T~bwmkZDEYZnUk^x^5Rk^AB4FM$_*{OIk{cYZnSZLh{&N!eRfWA_2e
z`BD@p%=gF{p%jd!4r+yT-(WSCZgX@o+D;Si>E=RYP}eqf4x&QglHMOOo4|Q3Rjxr{
zDpj`Y=dwS!*j8}w9Z%&J>w_1lVe$p6$u+%xSRX$Q=6b&+zoFd8^%SnFQR`(--ZC<|
z82rzmDqNsKk?xT``LbvJt+Y-H%9l1(?1We2ZyT}(8^I0WlOeTdH0#sQOfPgLL#Dp{
zs+5DmoEc6MmR0I1TpmomlvPB-uEMoc*2V&IDVFu?Ug+CCs&}K9j_y8>Z?-M)J^4&W
zcNQW(nGMTfB@4Nh`Ex<@CW@t5S<7jWI5xzVbb%arBKITy`TiUnV86i$u8{GJU_
z6}0akQr$0d`RJ{mq0yML0vKDIW>MU3q+z^h#GPo~n4e}*BW}}!s1waQp_eLFK%=ou
z&46jc>_k+cx3FIt4Z3M3R^OUN{xJL&`Mw`EV&C@zZ;qL_{GE(!M4v0rB`^wsf)u7(
zJ%MF~6*OgY+r7xVuD0`~ZI;%HTe8u$Zitcv})8c0)gm{FZ-ubbP!;-lf%~*=&B%
zsB{zcrrAe*Z{)f;M@e3a_G8J;x#6U3AuTVSi@w}VdBo@b0<
zJH{4k#m$$D933BPxlt+Bfo&grqxqXl(G9(h7tk*d+Aw8iyuj|Y6mZR+0Zt(dlUAI>
zAy6Dg$LF6XH`mViX6jt_oyo<(>0Qm7+b_KQ7hlad`Xn)9bfd=lZtW_CHlh?VdQpSu(s$hPPjn{y6E62T8A=^!oH?
zl1$H%!7LeHIes%pqQp;v#7}%Ts6Kw;dwx>6s$C~h=ti!;Mb{1;&pCHyPH-T)R%?qc
zJRDs}@WLcW0=NA5?%VLfQ`dLHlL}uLgptKJbVA=5_omZ5f})>*&`N?Zi3o)oT8aoq
zqvdrHGMuzpMgD^by>(e^s8GS`mW_BL}-
z*lt^0ojZ{q6Tsmif{G8v)$Sx=62^qh4P$~v{{>;vh&tQEB~cu>C*gNkQHNmloO8!_
zt_}qC6G;qO2{9HVv0Hw2b(91ruJ5+nHJaEbG*o;+`z`sL9_~5rByke7NIroZCmlCR
zLdsOjYgPL3UEd9&EfaA-(k0*RgPHgVJkcptQ=GJuj@wGwZYzmgKMB2{&P;6a_@_tz
o2C$e?u8tJ|03VA81ONa4009360763o02=@U00000000000113Nt^fc4
literal 0
HcmV?d00001
diff --git a/tests/data/qa/strelka/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi b/tests/data/qa/strelka/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi
new file mode 100644
index 0000000000000000000000000000000000000000..0c846432a310f343296d3fbf6c942f45c16b3bfa
GIT binary patch
literal 105
zcmb2|=3rp}f&Xj_PR>jWJ`CK2pHfm%5)u-U5)v9N@&Lgpg{H>F#@@z8Ma#t}PH^bF
n);z+nERxl+NI~Ejqte8q41!u~{saNdkVi92nt>T?Hi!TKK^_{u
literal 0
HcmV?d00001
diff --git a/tests/data/qa/strelka/collated_versions.yml b/tests/data/qa/strelka/collated_versions.yml
new file mode 100644
index 0000000..b6a800d
--- /dev/null
+++ b/tests/data/qa/strelka/collated_versions.yml
@@ -0,0 +1,18 @@
+"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:GERMLINE_VARIANT_STRELKA:MERGE_STRELKA":
+ gatk4: 4.3.0.0
+"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:GERMLINE_VARIANT_STRELKA:STRELKA_GERMLINE":
+ strelka: 2.9.10
+"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:NORMAL_GATK4_RECALIBRATE:GATK4_APPLYBQSR":
+ gatk4: 4.3.0.0
+"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:NORMAL_SONG_SCORE_DOWNLOAD:scoreDn":
+ score-client: 5.8.1
+"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:GERMLINE_VARIANT_STRELKA:STRELKA_GERMLINE":
+ strelka: 2.9.10
+"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:GERMLINE_VARIANT_STRELKA:STRELKA_GERMLINE":
+ strelka: 2.9.10
+"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:NORMAL_SONG_SCORE_DOWNLOAD:songGet":
+ song-client: 5.0.2
+"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:NORMAL_GATK4_RECALIBRATE:GATK4_BASERECALIBRATOR":
+ gatk4: 4.3.0.0
+"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:NORMAL_GATK4_RECALIBRATE:SAMTOOLS_INDEX":
+ samtools: 1.16.1
diff --git a/tests/data/qa/tiddit/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json b/tests/data/qa/tiddit/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json
new file mode 100644
index 0000000..dc1d78a
--- /dev/null
+++ b/tests/data/qa/tiddit/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json
@@ -0,0 +1,117 @@
+{
+ "analysisId" : "aaf8d346-c24f-493b-b8d3-46c24f393b92",
+ "studyId" : "TEST-PR",
+ "analysisState" : "PUBLISHED",
+ "analysisType" : {
+ "name" : "sequencing_alignment",
+ "version" : 15
+ },
+ "samples" : [
+ {
+ "sampleId" : "SA610228",
+ "specimenId" : "SP210201",
+ "submitterSampleId" : "COLO-829-BL",
+ "sampleType" : "Total DNA",
+ "matchedNormalSubmitterSampleId" : null,
+ "specimen" : {
+ "specimenId" : "SP210201",
+ "donorId" : "DO250183",
+ "submitterSpecimenId" : "COLO-829-BL",
+ "tumourNormalDesignation" : "Normal",
+ "specimenTissueSource" : "Blood derived",
+ "specimenType" : "Normal"
+ },
+ "donor" : {
+ "donorId" : "DO250183",
+ "studyId" : "TEST-PR",
+ "submitterDonorId" : "COLO-829",
+ "gender" : "Female"
+ }
+ }
+ ],
+ "files" : [
+ {
+ "info" : {
+ "analysis_tools" : [
+ "BWA-MEM",
+ "biobambam2:bammarkduplicates2"
+ ],
+ "data_category" : "Sequencing Reads"
+ },
+ "objectId" : "581f8e5b-0601-5dde-8ace-78eef3a6db47",
+ "studyId" : "TEST-PR",
+ "analysisId" : "aaf8d346-c24f-493b-b8d3-46c24f393b92",
+ "fileName" : "test.paired_end.sorted.cram",
+ "fileType" : "CRAM",
+ "fileMd5sum" : "e0146aa3a5a196e4cb5593f545e95c31",
+ "fileSize" : 76810,
+ "fileAccess" : "controlled",
+ "dataType" : "Aligned Reads"
+ },
+ {
+ "info" : {
+ "analysis_tools" : [
+ "BWA-MEM",
+ "biobambam2:bammarkduplicates2"
+ ],
+ "data_category" : "Sequencing Reads"
+ },
+ "objectId" : "57a23de6-044c-555b-8070-bd278d449a63",
+ "studyId" : "TEST-PR",
+ "analysisId" : "aaf8d346-c24f-493b-b8d3-46c24f393b92",
+ "fileName" : "test.paired_end.sorted.cram.crai",
+ "fileType" : "CRAI",
+ "fileMd5sum" : "31ffc1b11f53210e4ac0d55092de0652",
+ "fileSize" : 62,
+ "fileAccess" : "controlled",
+ "dataType" : "Aligned Reads Index"
+ }
+ ],
+ "createdAt" : "2023-04-18T21:30:04.11549",
+ "updatedAt" : "2023-04-18T21:32:47.73069",
+ "firstPublishedAt" : "2023-04-18T21:32:47.728311",
+ "publishedAt" : "2023-04-18T21:32:47.728311",
+ "analysisStateHistory" : [
+ {
+ "initialState" : "UNPUBLISHED",
+ "updatedState" : "PUBLISHED",
+ "updatedAt" : "2023-04-18T21:32:47.728311"
+ }
+ ],
+ "experiment" : {
+ "experimental_strategy" : "WXS",
+ "platform" : "ILLUMINA",
+ "platform_model" : "HiSeq 2000",
+ "sequencing_center" : "EXT",
+ "sequencing_date" : "2014-12-12",
+ "submitter_sequencing_experiment_id" : "TEST_EXP"
+ },
+ "read_group_count" : 1,
+ "read_groups" : [
+ {
+ "file_r1" : "test.paired_end.sorted.cram",
+ "file_r2" : "test.paired_end.sorted.cram",
+ "insert_size" : 298,
+ "is_paired_end" : true,
+ "library_name" : "testN",
+ "platform_unit" : "1",
+ "read_length_r1" : 75,
+ "read_length_r2" : 75,
+ "sample_barcode" : null,
+ "submitter_read_group_id" : "1"
+ }
+ ],
+ "workflow" : {
+ "genome_build" : "GRCh38_hla_decoy_ebv",
+ "inputs" : [
+ {
+ "analysis_type" : "sequencing_experiment",
+ "input_analysis_id" : "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b"
+ }
+ ],
+ "run_id" : "wes-d4f902992b3d443595d05dbf1490ab9e",
+ "session_id" : "9da55181-2fb8-443e-a82f-b8b8e33d9f2a",
+ "workflow_name" : "DNA Seq Alignment",
+ "workflow_version" : "1.9.0"
+ }
+}
diff --git a/tests/data/qa/tiddit/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz b/tests/data/qa/tiddit/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz
new file mode 100644
index 0000000000000000000000000000000000000000..f2820417168a15946418e7c9860f04ed225630dd
GIT binary patch
literal 1172
zcmV;F1Z(>riwFb&00000{{{d;LjnMI1g%zGbK*7>eI~y`le}a*F~K&FO{eY*U`UxE
zkYGZmPdmcCfF`!&l>{mM^}X^(77R_*}2Qd8DhWC5vGxWJ0w!?+v!ix_xnV
zrNl;t;LV1i?ZPJ*9
z^pxSxv(DJ>KOWl8D32f^(FfaxV@35E1?H=F=P1u~|?xQ;1(R>yG_Hv#tdC
zN`Ob1Stz(>tIRT7OUJp1fv@!1Yc?7U+ZYVTgGsg=%x8Z*^>4`l}p-CP%S%7{6;AGk#AZJ7{kA%iCc9A-y84WrmiSq%;6li3n
zlu4DrDXh6BSb|6yQVA=xBAqI9_kV-#R-kA7AB0w$G!@uqd`0vc2yCHRfZpukj5Z12
z>n6)R7B}z0LzHR%VK^C|mE%BDsWV3*ZO_Hg;Sk8o=8L_!kZZcf2F$30X=SnkB;)
zF%=3K;dt!xz0*}0zG3wQ$_-t?aVTN2V&F~f-x!aO75UzW5-<>quw$ZGv(*|S*K3^J
z=1;WPTY^*_2oj)`GW?$7=N=a?2#@
z2olOeH9t1V^}^gZQh~=e>D=V%^XVwg@ATs2S@hqidz1GZyE*qD7F1VTq>!&`YM{C~
zSRvs@kfY~HG$#-Tds=O@yA-D^vJ7y<7Lv+cps7{|_gzwY<2klnZ)Cqa+itmyW{VJW
zc8~H~9Db`i?Yp*p=iIfOdwe2~KzV*sG$Q6XcXhknZna(4y>Gc_LV}sUw{F|_^_GKo
zLAT{JKif{zMSOD|g)ajk->8hd0gV-)ySay95_y-m8zpaJ5z_c}!T7CaQG{J-S!!$k
zURnBuDh0^`MaHrgG$E8OKSyq}ZG}#AX*KWNg|)yBR2
zk}6G!CC=NQyK#AO)%$Had8|E6g4)op;YF{8m%Q5ZOJ`KeMXYA-47I$TH4RFSFa8E3
m^xyrB3;+NhiwFb&00000{{{d;LjnLB00RI3000000002%m?ZE3
literal 0
HcmV?d00001
diff --git a/tests/data/qa/tiddit/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi b/tests/data/qa/tiddit/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi
new file mode 100644
index 0000000000000000000000000000000000000000..b9e0990fab2fca9dab7a72ca96023b80994bf088
GIT binary patch
literal 72
zcmb2|=3rp}f&Xj_PR>jW+6>%file(it)}
+ pipeline_ch=Channel.of(file(params.pipeline_yml))
+
+
+ ch_payload=analysis_ch.combine(files_ch.collect().toList())
+ .map {analysis_json,files ->
+ [
+ [ id : params.analysis_id,
+ study_id : params.study_id,
+ tool : params.tool
+ ],
+ files, analysis_json]
+ }
+ PAYLOAD_GERMLINEVARIANT(
+ ch_payload,
+ "",
+ "",
+ pipeline_ch,
+ params.tool
+ )
+}
diff --git a/tests/modules/icgc-argo-workflows/payload/germlinevariant/nextflow.config b/tests/modules/icgc-argo-workflows/payload/germlinevariant/nextflow.config
new file mode 100644
index 0000000..e60f059
--- /dev/null
+++ b/tests/modules/icgc-argo-workflows/payload/germlinevariant/nextflow.config
@@ -0,0 +1,6 @@
+manifest {
+ name = 'Germline variant calls'
+ mainScript = 'main.nf'
+ nextflowVersion = '!>=22.10.1'
+ version = '1.0dev'
+}
\ No newline at end of file
diff --git a/tests/modules/icgc-argo-workflows/payload/germlinevariant/test.yml b/tests/modules/icgc-argo-workflows/payload/germlinevariant/test.yml
new file mode 100644
index 0000000..6f0ac2a
--- /dev/null
+++ b/tests/modules/icgc-argo-workflows/payload/germlinevariant/test.yml
@@ -0,0 +1,55 @@
+## TODO nf-core: Please run the following command to build this file:
+# nf-core modules create-test-yml payload/germlinevariant
+- name: "payload germlinevariant haplotypecaller"
+ command: nextflow run ./tests/modules/icgc-argo-workflows/payload/germlinevariant -entry test_payload_germlinevariant -c ./tests/config/nextflow.config -c ./tests/modules/icgc-argo-workflows/payload/germlinevariant/nextflow.config -profile docker,haplotypecaller_vcf
+ tags:
+ - "payload"
+ - "payload/germlinevariant"
+ - path: "output/variant_calling/haplotypecaller/*.json"
+ - path: "output/variant_calling/haplotypecaller/08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b/out/*.haplotypecaller.germline.snv.vcf.gz"
+ - path: "output/variant_calling/haplotypecaller/08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b/out/*.haplotypecaller.germline.snv.vcf.gz.tbi"
+- name: "payload germlinevariant freebayes"
+ command: nextflow run ./tests/modules/icgc-argo-workflows/payload/germlinevariant -entry test_payload_germlinevariant -c ./tests/config/nextflow.config -c ./tests/modules/icgc-argo-workflows/payload/germlinevariant/nextflow.config -profile docker,freebayes_vcf
+ tags:
+ - "payload"
+ - "payload/germlinevariant"
+ files:
+ - path: "output/variant_calling/freebayes/*.json"
+ - path: "output/variant_calling/freebayes/08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b/out/*.freebayes.germline.snv.vcf.gz"
+ - path: "output/variant_calling/freebayes/08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b/out/*.freebayes.germline.snv.vcf.gz.tbi"
+- name: "payload germlinevariant strelka"
+ command: nextflow run ./tests/modules/icgc-argo-workflows/payload/germlinevariant -entry test_payload_germlinevariant -c ./tests/config/nextflow.config -c ./tests/modules/icgc-argo-workflows/payload/germlinevariant/nextflow.config -profile docker,strelka_vcf
+ tags:
+ - "payload"
+ - "payload/germlinevariant"
+ files:
+ - path: "output/variant_calling/strelka/*.json"
+ - path: "output/variant_calling/strelka/08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b/out/*.strelka.germline.snv.vcf.gz"
+ - path: "output/variant_calling/strelka/08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b/out/*.strelka.germline.snv.vcf.gz.tbi"
+- name: "payload germlinevariant manta"
+ command: nextflow run ./tests/modules/icgc-argo-workflows/payload/germlinevariant -entry test_payload_germlinevariant -c ./tests/config/nextflow.config -c ./tests/modules/icgc-argo-workflows/payload/germlinevariant/nextflow.config -profile docker,manta_vcf
+ tags:
+ - "payload"
+ - "payload/germlinevariant"
+ files:
+ - path: "output/variant_calling/manta/*.json"
+ - path: "output/variant_calling/manta/08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b/out/*.manta.germline.snv.vcf.gz"
+ - path: "output/variant_calling/manta/08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b/out/*.manta.germline.snv.vcf.gz.tbi"
+- name: "payload germlinevariant deepvariant"
+ command: nextflow run ./tests/modules/icgc-argo-workflows/payload/germlinevariant -entry test_payload_germlinevariant -c ./tests/config/nextflow.config -c ./tests/modules/icgc-argo-workflows/payload/germlinevariant/nextflow.config -profile docker,deepvariant_vcf
+ tags:
+ - "payload"
+ - "payload/germlinevariant"
+ files:
+ - path: "output/variant_calling/deepvariant/*.json"
+ - path: "output/variant_calling/deepvariant/08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b/out/*.deepvariant.germline.snv.vcf.gz"
+ - path: "output/variant_calling/deepvariant/08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b/out/*.deepvariant.germline.snv.vcf.gz.tbi"
+- name: "payload germlinevariant tiddit"
+ command: nextflow run ./tests/modules/icgc-argo-workflows/payload/germlinevariant -entry test_payload_germlinevariant -c ./tests/config/nextflow.config -c ./tests/modules/icgc-argo-workflows/payload/germlinevariant/nextflow.config -profile docker,tiddit_vcf
+ tags:
+ - "payload"
+ - "payload/germlinevariant"
+ files:
+ - path: "output/variant_calling/tiddit/*.json"
+ - path: "output/variant_calling/tiddit/08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b/out/*.tiddit.germline.snv.vcf.gz"
+ - path: "output/variant_calling/tiddit/08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b/out/*.tiddit.germline.snv.vcf.gz.tbi"
\ No newline at end of file
From c767733e11c830ad7f1931ce2b0cce1c6efa5602 Mon Sep 17 00:00:00 2001
From: edsu7 <22638361+edsu7@users.noreply.github.com>
Date: Thu, 25 May 2023 19:03:02 -0400
Subject: [PATCH 2/4] update payload formating
---
.../payload/germlinevariant/main.nf | 15 +-
.../germlinevariant/resources/usr/bin/main.py | 257 ++++++++++--------
tests/config/test_data.config | 43 ++-
.../payload/germlinevariant/main.nf | 18 +-
4 files changed, 194 insertions(+), 139 deletions(-)
diff --git a/modules/icgc-argo-workflows/payload/germlinevariant/main.nf b/modules/icgc-argo-workflows/payload/germlinevariant/main.nf
index 6e127c8..d08b238 100644
--- a/modules/icgc-argo-workflows/payload/germlinevariant/main.nf
+++ b/modules/icgc-argo-workflows/payload/germlinevariant/main.nf
@@ -10,14 +10,12 @@ process PAYLOAD_GERMLINEVARIANT {
input: // input, make update as needed
tuple val(meta), path(files_to_upload), path(metadata_analysis)
- val genome_annotation
- val genome_build
path pipeline_yml
- val tool
+ val tarball
output: // output, make update as needed
- tuple val(meta), path("*.payload.json"), path("out/*{vcf.gz,vcf.gz.tbi}"), emit: payload_files
+ tuple val(meta), path("*.payload.json"), path("out/*"), emit: payload_files
path "versions.yml", emit: versions
script:
@@ -27,12 +25,13 @@ process PAYLOAD_GERMLINEVARIANT {
main.py \
-f ${files_to_upload} \
-a ${metadata_analysis} \
- -g "${genome_annotation}" \
- -b "${genome_build}" \
- -w "DNA Seq Germline Workflow" \
+ -b "${meta.genomeBuild}" \
+ -w "DNA Seq Germline Variant Workflow" \
+ -r ${workflow.runName} \
-s "${workflow.sessionId}" \
-v "${workflow.manifest.version}" \
- -t "${tool}" \
+ -t "${meta.tool}" \
+ -l "${tarball}" \
$arg_pipeline_yml
cat <<-END_VERSIONS > versions.yml
diff --git a/modules/icgc-argo-workflows/payload/germlinevariant/resources/usr/bin/main.py b/modules/icgc-argo-workflows/payload/germlinevariant/resources/usr/bin/main.py
index a33e5c4..dcd4ca3 100755
--- a/modules/icgc-argo-workflows/payload/germlinevariant/resources/usr/bin/main.py
+++ b/modules/icgc-argo-workflows/payload/germlinevariant/resources/usr/bin/main.py
@@ -38,12 +38,11 @@
import csv
import io
import shutil
-#LUCA-KR.DO231106.SA602282.wxs.20210112.gatk-mutect2.somatic.snv.open-filter.vcf.gz
+
workflow_process_map = {
- 'DNA Seq Germline Workflow': 'snv'
+ 'DNA Seq Germline Variant Workflow': 'snv'
}
-tool_list = ['strelka']
def calculate_size(file_path):
return os.stat(file_path).st_size
@@ -56,7 +55,7 @@ def calculate_md5(file_path):
md5.update(chunk)
return md5.hexdigest()
-def get_files_info(file_to_upload, date_str, analysis_dict, process_indicator,tool,new_dir):
+def get_files_info(file_to_upload, date_str, analysis_dict, process_indicator,tool,new_dir,pipeline_info,tarball):
file_info = {
'fileSize': calculate_size(file_to_upload),
'fileMd5sum': calculate_md5(file_to_upload),
@@ -65,82 +64,106 @@ def get_files_info(file_to_upload, date_str, analysis_dict, process_indicator,to
'data_category': "Simple Nucleotide Variation",
}
}
- ### deepvariant
- if tool=="deepvariant":
- if re.match(r'.*.vcf.gz$', file_to_upload):
- file_type = 'VCF'
- file_info.update({'dataType': 'Raw SNV Calls'})
- file_info['info'].update({'analysis_tools': ['DeepVariant']})
- elif re.match(r'.*.vcf.gz.tbi$', file_to_upload):
- file_type = 'TBI'
- file_info.update({'dataType': 'VCF Index'})
- file_info['info'].update({'analysis_tools': ['DeepVariant']})
- else:
- sys.exit('Error: unknown QC metrics file: %s' % file_to_upload)
- elif tool=="strelka":
- if re.match(r'.*.vcf.gz$', file_to_upload):
- file_type = 'VCF'
- file_info.update({'dataType': 'Raw SNV Calls'})
- file_info['info'].update({'analysis_tools': ['Strelka']})
- elif re.match(r'.*.vcf.gz.tbi$', file_to_upload):
- file_type = 'TBI'
- file_info.update({'dataType': 'VCF Index'})
- file_info['info'].update({'analysis_tools': ['Strelka']})
- else:
- sys.exit('Error: unknown QC metrics file: %s' % file_to_upload)
- elif tool=="tiddit":
- if re.match(r'.*.vcf.gz$', file_to_upload):
- file_type = 'VCF'
- file_info.update({'dataType': 'Raw SNV Calls'})
- file_info['info'].update({'analysis_tools': ['Tiddit']})
- elif re.match(r'.*.vcf.gz.tbi$', file_to_upload):
- file_type = 'TBI'
- file_info.update({'dataType': 'VCF Index'})
- file_info['info'].update({'analysis_tools': ['Tiddit']})
+
+ if tarball=="false":
+ if tool=="deepvariant":
+ if re.match(r'.*.vcf.gz$', file_to_upload):
+ file_type = 'VCF'
+ file_info.update({'dataType': 'Raw SNV Calls'})
+ file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]})
+ elif re.match(r'.*.vcf.gz.tbi$', file_to_upload):
+ file_type = 'TBI'
+ file_info.update({'dataType': 'VCF Index'})
+ file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]})
+ else:
+ sys.exit('Error: unknown QC metrics file: %s' % file_to_upload)
+ elif tool=="strelka":
+ if re.match(r'.*.vcf.gz$', file_to_upload):
+ file_type = 'VCF'
+ file_info.update({'dataType': 'Raw SNV Calls'})
+ file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]})
+ elif re.match(r'.*.vcf.gz.tbi$', file_to_upload):
+ file_type = 'TBI'
+ file_info.update({'dataType': 'VCF Index'})
+ file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]})
+ else:
+ sys.exit('Error: unknown QC metrics file: %s' % file_to_upload)
+ elif tool=="tiddit":
+ if re.match(r'.*.vcf.gz$', file_to_upload):
+ file_type = 'VCF'
+ file_info.update({'dataType': 'Raw SNV Calls'})
+ file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]})
+ elif re.match(r'.*.vcf.gz.tbi$', file_to_upload):
+ file_type = 'TBI'
+ file_info.update({'dataType': 'VCF Index'})
+ file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]})
+ else:
+ sys.exit('Error: unknown QC metrics file: %s' % file_to_upload)
+ elif tool=="haplotypecaller" :
+ if re.match(r'.*.vcf.gz$', file_to_upload):
+ file_type = 'VCF'
+ file_info.update({'dataType': 'Raw SNV Calls'})
+ file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]})
+ elif re.match(r'.*.vcf.gz.tbi$', file_to_upload):
+ file_type = 'TBI'
+ file_info.update({'dataType': 'VCF Index'})
+ file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]})
+ else:
+ sys.exit('Error: unknown QC metrics file: %s' % file_to_upload)
+ elif tool=="manta":
+ if re.match(r'.*.vcf.gz$', file_to_upload):
+ file_type = 'VCF'
+ file_info.update({'dataType': 'Raw SNV Calls'})
+ file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]})
+ elif re.match(r'.*.vcf.gz.tbi$', file_to_upload):
+ file_type = 'TBI'
+ file_info.update({'dataType': 'VCF Index'})
+ file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]})
+ else:
+ sys.exit('Error: unknown QC metrics file: %s' % file_to_upload)
+ elif tool=="freebayes":
+ if re.match(r'.*.vcf.gz$', file_to_upload):
+ file_type = 'VCF'
+ file_info.update({'dataType': 'Raw SNV Calls'})
+ file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]})
+ elif re.match(r'.*.vcf.gz.tbi$', file_to_upload):
+ file_type = 'TBI'
+ file_info.update({'dataType': 'VCF Index'})
+ file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]})
+ else:
+ sys.exit('Error: unknown QC metrics file: %s' % file_to_upload)
+ elif tool=="cnvkit":
+ if re.match(r'.*.vcf.gz$', file_to_upload):
+ file_type = 'VCF'
+ file_info.update({'dataType': 'Raw SNV Calls'})
+ file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]})
+ elif re.match(r'.*.vcf.gz.tbi$', file_to_upload):
+ file_type = 'TBI'
+ file_info.update({'dataType': 'VCF Index'})
+ file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]})
+ else:
+ sys.exit('Error: unknown QC metrics file: %s' % file_to_upload)
else:
sys.exit('Error: unknown QC metrics file: %s' % file_to_upload)
- elif tool=="haplotypecaller":
- if re.match(r'.*.vcf.gz$', file_to_upload):
- file_type = 'VCF'
- file_info.update({'dataType': 'Raw SNV Calls'})
- file_info['info'].update({'analysis_tools': ['haplotypecaller']})
- elif re.match(r'.*.vcf.gz.tbi$', file_to_upload):
- file_type = 'TBI'
- file_info.update({'dataType': 'VCF Index'})
- file_info['info'].update({'analysis_tools': ['haplotypecaller']})
- else:
- sys.exit('Error: unknown QC metrics file: %s' % file_to_upload)
- elif tool=="manta":
- if re.match(r'.*.vcf.gz$', file_to_upload):
- file_type = 'VCF'
- file_info.update({'dataType': 'Raw SNV Calls'})
- file_info['info'].update({'analysis_tools': ['Manta']})
- elif re.match(r'.*.vcf.gz.tbi$', file_to_upload):
- file_type = 'TBI'
- file_info.update({'dataType': 'VCF Index'})
- file_info['info'].update({'analysis_tools': ['Manta']})
- else:
- sys.exit('Error: unknown QC metrics file: %s' % file_to_upload)
- elif tool=="freebayes":
- if re.match(r'.*.vcf.gz$', file_to_upload):
- file_type = 'VCF'
- file_info.update({'dataType': 'Raw SNV Calls'})
- file_info['info'].update({'analysis_tools': ['Freebayes']})
- elif re.match(r'.*.vcf.gz.tbi$', file_to_upload):
- file_type = 'TBI'
- file_info.update({'dataType': 'VCF Index'})
- file_info['info'].update({'analysis_tools': ['Freebayes']})
- else:
- sys.exit('Error: unknown QC metrics file: %s' % file_to_upload)
- #elif tool=="cnvkit":
+ elif tarball=="true":
+ if tool=="cnvkit":
+ file_type = 'TGZ'
+ file_info.update({'dataType': "CNV Supplement"})
+
+ file_info['info']['files_in_tgz']=[]
+ with tarfile.open(file_to_upload, 'r') as tar:
+ for member in tar.getmembers():
+ file_info['info']['files_in_tgz'].append(member.name)
+
else:
sys.exit('Error: unknown QC metrics file: %s' % file_to_upload)
-
+
#LUCA-KR.DO231106.SA602282.wxs.20210112.gatk-mutect2.somatic.snv.open-filter.vcf.gz.tbi"
#"TEST-PR.DO250183.SA610228.wxs.20230501.snv-strelka.gvcf.gz",
suffix={
"VCF":"vcf.gz",
- "TBI": "vcf.gz.tbi",
+ "TBI":"vcf.gz.tbi",
+ "TGZ":"tgz"
}
# file naming patterns:
# pattern: .......
@@ -154,27 +177,19 @@ def get_files_info(file_to_upload, date_str, analysis_dict, process_indicator,to
date_str,
process_indicator,
suffix[file_type]
- ])
-
- file_info['fileName'] = new_fname
- file_info['fileType'] = file_type
+ ])
- if re.match(r'cnvkit', file_to_upload):
- with tarfile.open(file_to_upload, 'r') as tar:
- for member in tar.getmembers():
- file_info['info']['files_in_tgz'].append(member.name)
+ new_dir = 'out'
+ try:
+ os.mkdir(new_dir)
+ except FileExistsError:
+ pass
- new_dir = 'out'
- try:
- os.mkdir(new_dir)
- except FileExistsError:
- pass
+ dst = os.path.join(os.getcwd(), new_dir, new_fname)
+ os.symlink(os.path.abspath(file_to_upload), dst)
- dst = os.path.join(os.getcwd(), new_dir, new_fname)
- os.symlink(os.path.abspath(file_to_upload), dst)
- else:
- shutil.copyfile(os.path.realpath(file_to_upload),"/".join([new_dir,new_fname]))
- ##os.symlink(os.path.realpath(file_to_upload),"/".join([new_dir,new_fname]))
+ file_info['fileName'] = new_fname
+ file_info['fileType'] = file_type
return file_info
@@ -198,7 +213,7 @@ def get_sample_info(sample_list):
return samples
-def prepare_tarball(sampleId, qc_files, tool_list):
+def prepare_tarball(sampleId, qc_files, tool):
tgz_dir = 'tarball'
try:
@@ -206,20 +221,16 @@ def prepare_tarball(sampleId, qc_files, tool_list):
except FileExistsError:
pass
- files_to_tar = {}
- for tool in tool_list:
- if not tool in files_to_tar: files_to_tar[tool] = []
- for f in sorted(qc_files):
- if tool in f:
- files_to_tar[tool].append(f)
-
- for tool in tool_list:
- if not files_to_tar[tool]: continue
- tarfile_name = f"{tgz_dir}/{sampleId}.{tool}.tgz"
- with tarfile.open(tarfile_name, "w:gz", dereference=True) as tar:
- for f in files_to_tar[tool]:
- tar.add(f, arcname=os.path.basename(f))
+ files_to_tar=[]
+ for f in sorted(qc_files):
+ files_to_tar.append(f)
+ tarfile_name = f"{tgz_dir}/{sampleId}.{tool}.tgz"
+ with tarfile.open(tarfile_name, "w:gz", dereference=True) as tar:
+ for f in files_to_tar:
+ tar.add(f, arcname=os.path.basename(f))
+
+ return(tarfile_name)
def main():
"""
Python implementation of tool: payload-gen-qc
@@ -234,8 +245,10 @@ def main():
parser.add_argument("-b", "--genome_build", dest="genome_build", default="", help="Genome build")
parser.add_argument("-w", "--wf-name", dest="wf_name", required=True, help="Workflow name")
parser.add_argument("-s", "--wf-session", dest="wf_session", required=True, help="workflow session ID")
+ parser.add_argument("-r", "--wf-run", dest="wf_run", required=True, help="workflow run ID")
parser.add_argument("-v", "--wf-version", dest="wf_version", required=True, help="Workflow version")
parser.add_argument("-p", "--pipeline_yml", dest="pipeline_yml", required=False, help="Pipeline info in yaml")
+ parser.add_argument("-l", "--tarball", dest="tarball", required=True,default="false", help="Tarball files")
parser.add_argument("-t", "--tool", dest="tool", required=True,type=str, help="Tool used for variant calling",
choices=['strelka','cnvkit','deepvariant','tiddit','manta','haplotypecaller','freebayes']
)
@@ -254,33 +267,38 @@ def main():
'analysisType': {
'name': 'variant_processing'
},
+ "variant_class":"Germline",
'studyId': analysis_dict.get('studyId'),
- 'info': {},
'workflow': {
- 'workflow_name': args.wf_name,
+ 'workflow_name': "%s-%s" % (args.wf_name,args.tool),
'workflow_version': args.wf_version,
'session_id': args.wf_session,
+ 'genome_build': args.genome_build,
+ 'run_id': args.wf_run,
+ "workflow_short_name": "%s-%s" % (args.wf_name.replace("DNA Seq","").replace("Workflow","").replace(" ",""),args.tool),
'inputs': [
{
'analysis_type': analysis_dict['analysisType']['name'],
- 'input_analysis_id': analysis_dict.get('analysisId')
+ 'normal_analysis_id': analysis_dict.get('analysisId')
}
],
- 'info': pipeline_info
},
'files': [],
'experiment': analysis_dict.get('experiment'),
'samples': get_sample_info(analysis_dict.get('samples'))
}
+
+ for key in ['platform_model',"sequencing_center","sequencing_date","submitter_sequencing_experiment_id"]:
+ if payload['experiment'].get(key):
+ payload['experiment'].pop(key)
+
if args.genome_build:
payload['workflow']['genome_build'] = args.genome_build
if args.genome_annotation:
payload['workflow']['genome_annotation'] = args.genome_annotation
# pass `info` dict from seq_experiment payload to new payload
- if 'info' in analysis_dict and isinstance(analysis_dict['info'], dict):
- payload['info'] = analysis_dict['info']
- else:
+ if 'info' in analysis_dict.keys():
payload.pop('info')
if 'library_strategy' in payload['experiment']:
@@ -296,13 +314,18 @@ def main():
# generate date string
date_str = date.today().strftime("%Y%m%d")
- # prepare tarball to include all QC files generated by one tool
- ##prepare_tarball(analysis_dict['samples'][0]['sampleId'], args.files_to_upload, tool_list)
- process_indicator = ".".join([args.tool,"germline",workflow_process_map.get(args.wf_name)])
- for f in sorted(args.files_to_upload):
- file_info = get_files_info(f, date_str, analysis_dict, process_indicator,args.tool,new_dir)
- payload['files'].append(file_info)
+ # prepare tarball to include all QC files generated by one tool
+ if args.tarball=="true":
+ process_indicator = ".".join([args.tool,"germline",args.tool+"-"+"supplement"])
+ tarball_file=prepare_tarball(analysis_dict['samples'][0]['sampleId'], args.files_to_upload, args.tool)
+ file_info = get_files_info(tarball_file, date_str, analysis_dict, process_indicator,args.tool,new_dir,pipeline_info,args.tarball)
+ payload['files'].append(file_info)
+ elif args.tarball=="false":
+ process_indicator = ".".join([args.tool,"germline",workflow_process_map.get(args.wf_name)])
+ for f in sorted(args.files_to_upload):
+ file_info = get_files_info(f, date_str, analysis_dict, process_indicator,args.tool,new_dir,pipeline_info,args.tarball)
+ payload['files'].append(file_info)
with open("%s.%s.payload.json" % (str(uuid.uuid4()), args.wf_name.replace(" ","_")), 'w') as f:
f.write(json.dumps(payload, indent=2))
diff --git a/tests/config/test_data.config b/tests/config/test_data.config
index e16a301..ee6b710 100644
--- a/tests/config/test_data.config
+++ b/tests/config/test_data.config
@@ -1,6 +1,5 @@
// Base directory for test data in RDPC QA
// Should be ="../data"
-
params {
test_data {
'rdpc_qa' {
@@ -37,7 +36,12 @@ params {
pipeline_yml="${params.test_data_base}/tiddit/collated_versions.yml"
tool="deepvariant"
study_id = "TEST-PR"
- analysis_id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b"
+ id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b"
+ gender = "Male"
+ experimentalStrategy : "WGS"
+ genomeBuild : "GRCh38_hla_decoy_ebv"
+ tumourNormalDesignation : "Normal"
+ sampleType : "Total DNA"
}
freebayes_vcf {
metadata_analysis="${params.test_data_base}/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json"
@@ -45,7 +49,12 @@ params {
pipeline_yml="${params.test_data_base}/tiddit/collated_versions.yml"
tool="freebayes"
study_id = "TEST-PR"
- analysis_id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b"
+ id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b"
+ gender = "Male"
+ experimentalStrategy : "WGS"
+ genomeBuild : "GRCh38_hla_decoy_ebv"
+ tumourNormalDesignation : "Normal"
+ sampleType : "Total DNA"
}
haplotypecaller_vcf {
metadata_analysis="${params.test_data_base}/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json"
@@ -53,7 +62,12 @@ params {
pipeline_yml="${params.test_data_base}/tiddit/collated_versions.yml"
tool="haplotypecaller"
study_id = "TEST-PR"
- analysis_id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b"
+ id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b"
+ gender = "Male"
+ experimentalStrategy : "WGS"
+ genomeBuild : "GRCh38_hla_decoy_ebv"
+ tumourNormalDesignation : "Normal"
+ sampleType : "Total DNA"
}
manta_vcf {
metadata_analysis="${params.test_data_base}/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json"
@@ -61,7 +75,12 @@ params {
pipeline_yml="${params.test_data_base}/tiddit/collated_versions.yml"
tool="manta"
study_id = "TEST-PR"
- analysis_id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b"
+ id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b"
+ gender = "Male"
+ experimentalStrategy : "WGS"
+ genomeBuild : "GRCh38_hla_decoy_ebv"
+ tumourNormalDesignation : "Normal"
+ sampleType : "Total DNA"
}
strelka_vcf {
metadata_analysis="${params.test_data_base}/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json"
@@ -69,7 +88,12 @@ params {
pipeline_yml="${params.test_data_base}/tiddit/collated_versions.yml"
tool="strelka"
study_id = "TEST-PR"
- analysis_id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b"
+ id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b"
+ gender = "Male"
+ experimentalStrategy : "WGS"
+ genomeBuild : "GRCh38_hla_decoy_ebv"
+ tumourNormalDesignation : "Normal"
+ sampleType : "Total DNA"
}
tiddit_vcf {
metadata_analysis="${params.test_data_base}/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json"
@@ -77,7 +101,12 @@ params {
pipeline_yml="${params.test_data_base}/tiddit/collated_versions.yml"
tool="tiddit"
study_id = "TEST-PR"
- analysis_id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b"
+ id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b"
+ gender = "Male"
+ experimentalStrategy : "WGS"
+ genomeBuild : "GRCh38_hla_decoy_ebv"
+ tumourNormalDesignation : "Normal"
+ sampleType : "Total DNA"
}
}
}
\ No newline at end of file
diff --git a/tests/modules/icgc-argo-workflows/payload/germlinevariant/main.nf b/tests/modules/icgc-argo-workflows/payload/germlinevariant/main.nf
index 62cc5b9..3ad0a6a 100644
--- a/tests/modules/icgc-argo-workflows/payload/germlinevariant/main.nf
+++ b/tests/modules/icgc-argo-workflows/payload/germlinevariant/main.nf
@@ -13,17 +13,21 @@ workflow test_payload_germlinevariant {
ch_payload=analysis_ch.combine(files_ch.collect().toList())
.map {analysis_json,files ->
[
- [ id : params.analysis_id,
- study_id : params.study_id,
- tool : params.tool
- ],
+ [
+ id : params.id,
+ experimentalStrategy : params.experimentalStrategy,
+ genomeBuild : params.genomeBuild,
+ tumourNormalDesignation : params.tumourNormalDesignation,
+ sampleType : params.sampleType,
+ gender : params.gender,
+ study_id : params.study_id,
+ tool : params.tool
+ ],
files, analysis_json]
}
PAYLOAD_GERMLINEVARIANT(
ch_payload,
- "",
- "",
pipeline_ch,
- params.tool
+ false
)
}
From e86f34c62dea82268ceaefa554d7d8c3ab13727f Mon Sep 17 00:00:00 2001
From: edsu7 <22638361+edsu7@users.noreply.github.com>
Date: Tue, 6 Jun 2023 17:37:02 -0400
Subject: [PATCH 3/4] update payloads
---
.../payload/germlinevariant/main.nf | 1 +
.../germlinevariant/resources/usr/bin/main.py | 31 +++++-----
tests/config/test_data.config | 56 ++++++++++---------
.../payload/germlinevariant/main.nf | 3 +-
4 files changed, 47 insertions(+), 44 deletions(-)
diff --git a/modules/icgc-argo-workflows/payload/germlinevariant/main.nf b/modules/icgc-argo-workflows/payload/germlinevariant/main.nf
index d08b238..7752022 100644
--- a/modules/icgc-argo-workflows/payload/germlinevariant/main.nf
+++ b/modules/icgc-argo-workflows/payload/germlinevariant/main.nf
@@ -32,6 +32,7 @@ process PAYLOAD_GERMLINEVARIANT {
-v "${workflow.manifest.version}" \
-t "${meta.tool}" \
-l "${tarball}" \
+ -d "${meta.dataType}" \
$arg_pipeline_yml
cat <<-END_VERSIONS > versions.yml
diff --git a/modules/icgc-argo-workflows/payload/germlinevariant/resources/usr/bin/main.py b/modules/icgc-argo-workflows/payload/germlinevariant/resources/usr/bin/main.py
index dcd4ca3..dea2199 100755
--- a/modules/icgc-argo-workflows/payload/germlinevariant/resources/usr/bin/main.py
+++ b/modules/icgc-argo-workflows/payload/germlinevariant/resources/usr/bin/main.py
@@ -39,11 +39,6 @@
import io
import shutil
-workflow_process_map = {
- 'DNA Seq Germline Variant Workflow': 'snv'
-}
-
-
def calculate_size(file_path):
return os.stat(file_path).st_size
@@ -55,7 +50,7 @@ def calculate_md5(file_path):
md5.update(chunk)
return md5.hexdigest()
-def get_files_info(file_to_upload, date_str, analysis_dict, process_indicator,tool,new_dir,pipeline_info,tarball):
+def get_files_info(file_to_upload, date_str, analysis_dict, process_indicator,tool,new_dir,pipeline_info,tarball,data_type):
file_info = {
'fileSize': calculate_size(file_to_upload),
'fileMd5sum': calculate_md5(file_to_upload),
@@ -69,7 +64,7 @@ def get_files_info(file_to_upload, date_str, analysis_dict, process_indicator,to
if tool=="deepvariant":
if re.match(r'.*.vcf.gz$', file_to_upload):
file_type = 'VCF'
- file_info.update({'dataType': 'Raw SNV Calls'})
+ file_info.update({'dataType': 'Raw %s Calls' % data_type})
file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]})
elif re.match(r'.*.vcf.gz.tbi$', file_to_upload):
file_type = 'TBI'
@@ -80,7 +75,7 @@ def get_files_info(file_to_upload, date_str, analysis_dict, process_indicator,to
elif tool=="strelka":
if re.match(r'.*.vcf.gz$', file_to_upload):
file_type = 'VCF'
- file_info.update({'dataType': 'Raw SNV Calls'})
+ file_info.update({'dataType': 'Raw %s Calls' % data_type})
file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]})
elif re.match(r'.*.vcf.gz.tbi$', file_to_upload):
file_type = 'TBI'
@@ -91,7 +86,7 @@ def get_files_info(file_to_upload, date_str, analysis_dict, process_indicator,to
elif tool=="tiddit":
if re.match(r'.*.vcf.gz$', file_to_upload):
file_type = 'VCF'
- file_info.update({'dataType': 'Raw SNV Calls'})
+ file_info.update({'dataType': 'Raw %s Calls' % data_type})
file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]})
elif re.match(r'.*.vcf.gz.tbi$', file_to_upload):
file_type = 'TBI'
@@ -102,7 +97,7 @@ def get_files_info(file_to_upload, date_str, analysis_dict, process_indicator,to
elif tool=="haplotypecaller" :
if re.match(r'.*.vcf.gz$', file_to_upload):
file_type = 'VCF'
- file_info.update({'dataType': 'Raw SNV Calls'})
+ file_info.update({'dataType': 'Raw %s Calls' % data_type})
file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]})
elif re.match(r'.*.vcf.gz.tbi$', file_to_upload):
file_type = 'TBI'
@@ -113,7 +108,7 @@ def get_files_info(file_to_upload, date_str, analysis_dict, process_indicator,to
elif tool=="manta":
if re.match(r'.*.vcf.gz$', file_to_upload):
file_type = 'VCF'
- file_info.update({'dataType': 'Raw SNV Calls'})
+ file_info.update({'dataType': 'Raw %s Calls' % data_type})
file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]})
elif re.match(r'.*.vcf.gz.tbi$', file_to_upload):
file_type = 'TBI'
@@ -124,7 +119,7 @@ def get_files_info(file_to_upload, date_str, analysis_dict, process_indicator,to
elif tool=="freebayes":
if re.match(r'.*.vcf.gz$', file_to_upload):
file_type = 'VCF'
- file_info.update({'dataType': 'Raw SNV Calls'})
+ file_info.update({'dataType': 'Raw %s Calls' % data_type})
file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]})
elif re.match(r'.*.vcf.gz.tbi$', file_to_upload):
file_type = 'TBI'
@@ -135,7 +130,7 @@ def get_files_info(file_to_upload, date_str, analysis_dict, process_indicator,to
elif tool=="cnvkit":
if re.match(r'.*.vcf.gz$', file_to_upload):
file_type = 'VCF'
- file_info.update({'dataType': 'Raw SNV Calls'})
+ file_info.update({'dataType': 'Raw %s Calls' % data_type})
file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]})
elif re.match(r'.*.vcf.gz.tbi$', file_to_upload):
file_type = 'TBI'
@@ -250,8 +245,8 @@ def main():
parser.add_argument("-p", "--pipeline_yml", dest="pipeline_yml", required=False, help="Pipeline info in yaml")
parser.add_argument("-l", "--tarball", dest="tarball", required=True,default="false", help="Tarball files")
parser.add_argument("-t", "--tool", dest="tool", required=True,type=str, help="Tool used for variant calling",
- choices=['strelka','cnvkit','deepvariant','tiddit','manta','haplotypecaller','freebayes']
- )
+ choices=['strelka','cnvkit','deepvariant','tiddit','manta','haplotypecaller','freebayes'])
+ parser.add_argument("-d", "--data-type", dest="data_type", required=True,type=str, help="Data type for upload",choices=['InDel',"SNV","CNV"])
args = parser.parse_args()
@@ -319,12 +314,12 @@ def main():
if args.tarball=="true":
process_indicator = ".".join([args.tool,"germline",args.tool+"-"+"supplement"])
tarball_file=prepare_tarball(analysis_dict['samples'][0]['sampleId'], args.files_to_upload, args.tool)
- file_info = get_files_info(tarball_file, date_str, analysis_dict, process_indicator,args.tool,new_dir,pipeline_info,args.tarball)
+ file_info = get_files_info(tarball_file, date_str, analysis_dict, process_indicator,args.tool,new_dir,pipeline_info,args.tarball,args.data_type)
payload['files'].append(file_info)
elif args.tarball=="false":
- process_indicator = ".".join([args.tool,"germline",workflow_process_map.get(args.wf_name)])
+ process_indicator = ".".join([args.tool,"germline",args.data_type.lower()])
for f in sorted(args.files_to_upload):
- file_info = get_files_info(f, date_str, analysis_dict, process_indicator,args.tool,new_dir,pipeline_info,args.tarball)
+ file_info = get_files_info(f, date_str, analysis_dict, process_indicator,args.tool,new_dir,pipeline_info,args.tarball,args.data_type)
payload['files'].append(file_info)
with open("%s.%s.payload.json" % (str(uuid.uuid4()), args.wf_name.replace(" ","_")), 'w') as f:
diff --git a/tests/config/test_data.config b/tests/config/test_data.config
index ee6b710..bc9aff4 100644
--- a/tests/config/test_data.config
+++ b/tests/config/test_data.config
@@ -38,10 +38,11 @@ params {
study_id = "TEST-PR"
id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b"
gender = "Male"
- experimentalStrategy : "WGS"
- genomeBuild : "GRCh38_hla_decoy_ebv"
- tumourNormalDesignation : "Normal"
- sampleType : "Total DNA"
+ experimentalStrategy = "WGS"
+ genomeBuild = "GRCh38_hla_decoy_ebv"
+ tumourNormalDesignation = "Normal"
+ sampleType = "Total DNA"
+ dataType = "SNV"
}
freebayes_vcf {
metadata_analysis="${params.test_data_base}/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json"
@@ -51,10 +52,11 @@ params {
study_id = "TEST-PR"
id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b"
gender = "Male"
- experimentalStrategy : "WGS"
- genomeBuild : "GRCh38_hla_decoy_ebv"
- tumourNormalDesignation : "Normal"
- sampleType : "Total DNA"
+ experimentalStrategy = "WGS"
+ genomeBuild = "GRCh38_hla_decoy_ebv"
+ tumourNormalDesignation = "Normal"
+ sampleType = "Total DNA"
+ dataType = "SNV"
}
haplotypecaller_vcf {
metadata_analysis="${params.test_data_base}/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json"
@@ -64,10 +66,11 @@ params {
study_id = "TEST-PR"
id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b"
gender = "Male"
- experimentalStrategy : "WGS"
- genomeBuild : "GRCh38_hla_decoy_ebv"
- tumourNormalDesignation : "Normal"
- sampleType : "Total DNA"
+ experimentalStrategy = "WGS"
+ genomeBuild = "GRCh38_hla_decoy_ebv"
+ tumourNormalDesignation = "Normal"
+ sampleType = "Total DNA"
+ dataType = "SNV"
}
manta_vcf {
metadata_analysis="${params.test_data_base}/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json"
@@ -77,10 +80,11 @@ params {
study_id = "TEST-PR"
id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b"
gender = "Male"
- experimentalStrategy : "WGS"
- genomeBuild : "GRCh38_hla_decoy_ebv"
- tumourNormalDesignation : "Normal"
- sampleType : "Total DNA"
+ experimentalStrategy = "WGS"
+ genomeBuild = "GRCh38_hla_decoy_ebv"
+ tumourNormalDesignation = "Normal"
+ sampleType = "Total DNA"
+ dataType = "SNV"
}
strelka_vcf {
metadata_analysis="${params.test_data_base}/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json"
@@ -90,10 +94,11 @@ params {
study_id = "TEST-PR"
id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b"
gender = "Male"
- experimentalStrategy : "WGS"
- genomeBuild : "GRCh38_hla_decoy_ebv"
- tumourNormalDesignation : "Normal"
- sampleType : "Total DNA"
+ experimentalStrategy = "WGS"
+ genomeBuild = "GRCh38_hla_decoy_ebv"
+ tumourNormalDesignation = "Normal"
+ sampleType = "Total DNA"
+ dataType = "SNV"
}
tiddit_vcf {
metadata_analysis="${params.test_data_base}/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json"
@@ -103,10 +108,11 @@ params {
study_id = "TEST-PR"
id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b"
gender = "Male"
- experimentalStrategy : "WGS"
- genomeBuild : "GRCh38_hla_decoy_ebv"
- tumourNormalDesignation : "Normal"
- sampleType : "Total DNA"
+ experimentalStrategy = "WGS"
+ genomeBuild = "GRCh38_hla_decoy_ebv"
+ tumourNormalDesignation = "Normal"
+ sampleType = "Total DNA"
+ dataType = "SNV"
}
}
-}
\ No newline at end of file
+}
diff --git a/tests/modules/icgc-argo-workflows/payload/germlinevariant/main.nf b/tests/modules/icgc-argo-workflows/payload/germlinevariant/main.nf
index 3ad0a6a..fcce522 100644
--- a/tests/modules/icgc-argo-workflows/payload/germlinevariant/main.nf
+++ b/tests/modules/icgc-argo-workflows/payload/germlinevariant/main.nf
@@ -21,7 +21,8 @@ workflow test_payload_germlinevariant {
sampleType : params.sampleType,
gender : params.gender,
study_id : params.study_id,
- tool : params.tool
+ tool : params.tool,
+ dataType : params.dataType
],
files, analysis_json]
}
From 78c534810d5236110062b8d59b1f121bf961cb03 Mon Sep 17 00:00:00 2001
From: edsu7 <22638361+edsu7@users.noreply.github.com>
Date: Fri, 30 Jun 2023 13:31:10 -0400
Subject: [PATCH 4/4] add mpileup
---
.../germlinevariant/resources/usr/bin/main.py | 13 ++++++++++++-
1 file changed, 12 insertions(+), 1 deletion(-)
diff --git a/modules/icgc-argo-workflows/payload/germlinevariant/resources/usr/bin/main.py b/modules/icgc-argo-workflows/payload/germlinevariant/resources/usr/bin/main.py
index dea2199..94b1c24 100755
--- a/modules/icgc-argo-workflows/payload/germlinevariant/resources/usr/bin/main.py
+++ b/modules/icgc-argo-workflows/payload/germlinevariant/resources/usr/bin/main.py
@@ -127,6 +127,17 @@ def get_files_info(file_to_upload, date_str, analysis_dict, process_indicator,to
file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]})
else:
sys.exit('Error: unknown QC metrics file: %s' % file_to_upload)
+ elif tool=="mpileup":
+ if re.match(r'.*.vcf.gz$', file_to_upload):
+ file_type = 'VCF'
+ file_info.update({'dataType': 'Raw %s Calls' % data_type})
+ file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]})
+ elif re.match(r'.*.vcf.gz.tbi$', file_to_upload):
+ file_type = 'TBI'
+ file_info.update({'dataType': 'VCF Index'})
+ file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]})
+ else:
+ sys.exit('Error: unknown QC metrics file: %s' % file_to_upload)
elif tool=="cnvkit":
if re.match(r'.*.vcf.gz$', file_to_upload):
file_type = 'VCF'
@@ -245,7 +256,7 @@ def main():
parser.add_argument("-p", "--pipeline_yml", dest="pipeline_yml", required=False, help="Pipeline info in yaml")
parser.add_argument("-l", "--tarball", dest="tarball", required=True,default="false", help="Tarball files")
parser.add_argument("-t", "--tool", dest="tool", required=True,type=str, help="Tool used for variant calling",
- choices=['strelka','cnvkit','deepvariant','tiddit','manta','haplotypecaller','freebayes'])
+ choices=['strelka','cnvkit','deepvariant','tiddit','manta','haplotypecaller','freebayes','mpileup'])
parser.add_argument("-d", "--data-type", dest="data_type", required=True,type=str, help="Data type for upload",choices=['InDel',"SNV","CNV"])
args = parser.parse_args()