From 26efb189383a035fcf8ccdecef7f91d06d6f75c6 Mon Sep 17 00:00:00 2001 From: edsu7 <22638361+edsu7@users.noreply.github.com> Date: Tue, 9 May 2023 18:01:50 -0400 Subject: [PATCH 1/4] first commit --- .../payload/germlinevariant/main.nf | 43 +++ .../payload/germlinevariant/meta.yml | 60 ++++ .../germlinevariant/resources/usr/bin/main.py | 314 ++++++++++++++++++ tests/config/nextflow.config | 14 +- tests/config/test_data.config | 52 ++- ...-c24f-493b-b8d3-46c24f393b92.analysis.json | 117 +++++++ ...af8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz | Bin 0 -> 751 bytes ...346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi | Bin 0 -> 105 bytes .../data/qa/deepvariant/collated_versions.yml | 14 + ...-c24f-493b-b8d3-46c24f393b92.analysis.json | 117 +++++++ ...af8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz | Bin 0 -> 3315 bytes ...346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi | Bin 0 -> 106 bytes tests/data/qa/freebayes/collated_versions.yml | 24 ++ ...-c24f-493b-b8d3-46c24f393b92.analysis.json | 117 +++++++ ...c24f393b92.haplotypecaller.filtered.vcf.gz | Bin 0 -> 5034 bytes ...393b92.haplotypecaller.filtered.vcf.gz.tbi | Bin 0 -> 105 bytes .../qa/haplotypecaller/collated_versions.yml | 22 ++ ...-c24f-493b-b8d3-46c24f393b92.analysis.json | 117 +++++++ ...af8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz | Bin 0 -> 1627 bytes ...346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi | Bin 0 -> 77 bytes tests/data/qa/manta/collated_versions.yml | 14 + ...-c24f-493b-b8d3-46c24f393b92.analysis.json | 117 +++++++ ...af8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz | Bin 0 -> 2162 bytes ...346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi | Bin 0 -> 105 bytes tests/data/qa/strelka/collated_versions.yml | 18 + ...-c24f-493b-b8d3-46c24f393b92.analysis.json | 117 +++++++ ...af8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz | Bin 0 -> 1172 bytes ...346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi | Bin 0 -> 72 bytes tests/data/qa/tiddit/collated_versions.yml | 14 + .../payload/germlinevariant/main.nf | 29 ++ .../payload/germlinevariant/nextflow.config | 6 + .../payload/germlinevariant/test.yml | 55 +++ 32 files changed, 1379 insertions(+), 2 deletions(-) create mode 100644 modules/icgc-argo-workflows/payload/germlinevariant/main.nf create mode 100644 modules/icgc-argo-workflows/payload/germlinevariant/meta.yml create mode 100755 modules/icgc-argo-workflows/payload/germlinevariant/resources/usr/bin/main.py create mode 100644 tests/data/qa/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json create mode 100644 tests/data/qa/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz create mode 100644 tests/data/qa/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi create mode 100644 tests/data/qa/deepvariant/collated_versions.yml create mode 100644 tests/data/qa/freebayes/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json create mode 100644 tests/data/qa/freebayes/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz create mode 100644 tests/data/qa/freebayes/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi create mode 100644 tests/data/qa/freebayes/collated_versions.yml create mode 100644 tests/data/qa/haplotypecaller/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json create mode 100644 tests/data/qa/haplotypecaller/aaf8d346-c24f-493b-b8d3-46c24f393b92.haplotypecaller.filtered.vcf.gz create mode 100644 tests/data/qa/haplotypecaller/aaf8d346-c24f-493b-b8d3-46c24f393b92.haplotypecaller.filtered.vcf.gz.tbi create mode 100644 tests/data/qa/haplotypecaller/collated_versions.yml create mode 100644 tests/data/qa/manta/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json create mode 100644 tests/data/qa/manta/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz create mode 100644 tests/data/qa/manta/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi create mode 100644 tests/data/qa/manta/collated_versions.yml create mode 100644 tests/data/qa/strelka/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json create mode 100644 tests/data/qa/strelka/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz create mode 100644 tests/data/qa/strelka/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi create mode 100644 tests/data/qa/strelka/collated_versions.yml create mode 100644 tests/data/qa/tiddit/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json create mode 100644 tests/data/qa/tiddit/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz create mode 100644 tests/data/qa/tiddit/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi create mode 100644 tests/data/qa/tiddit/collated_versions.yml create mode 100644 tests/modules/icgc-argo-workflows/payload/germlinevariant/main.nf create mode 100644 tests/modules/icgc-argo-workflows/payload/germlinevariant/nextflow.config create mode 100644 tests/modules/icgc-argo-workflows/payload/germlinevariant/test.yml diff --git a/modules/icgc-argo-workflows/payload/germlinevariant/main.nf b/modules/icgc-argo-workflows/payload/germlinevariant/main.nf new file mode 100644 index 0000000..6e127c8 --- /dev/null +++ b/modules/icgc-argo-workflows/payload/germlinevariant/main.nf @@ -0,0 +1,43 @@ +process PAYLOAD_GERMLINEVARIANT { + tag "$meta.id" + label 'process_single' + + + conda "bioconda::multiqc=1.13" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' : + 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }" + + input: // input, make update as needed + tuple val(meta), path(files_to_upload), path(metadata_analysis) + val genome_annotation + val genome_build + path pipeline_yml + val tool + + + output: // output, make update as needed + tuple val(meta), path("*.payload.json"), path("out/*{vcf.gz,vcf.gz.tbi}"), emit: payload_files + path "versions.yml", emit: versions + + script: + // add and initialize variables here as needed + def arg_pipeline_yml = pipeline_yml.name != 'NO_FILE' ? "-p $pipeline_yml" : '' + """ + main.py \ + -f ${files_to_upload} \ + -a ${metadata_analysis} \ + -g "${genome_annotation}" \ + -b "${genome_build}" \ + -w "DNA Seq Germline Workflow" \ + -s "${workflow.sessionId}" \ + -v "${workflow.manifest.version}" \ + -t "${tool}" \ + $arg_pipeline_yml + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ + } diff --git a/modules/icgc-argo-workflows/payload/germlinevariant/meta.yml b/modules/icgc-argo-workflows/payload/germlinevariant/meta.yml new file mode 100644 index 0000000..105e61b --- /dev/null +++ b/modules/icgc-argo-workflows/payload/germlinevariant/meta.yml @@ -0,0 +1,60 @@ +name: "payload_germlinevariant" +## TODO nf-core: Add a description of the module and list keywords +description: write your description here +keywords: + - sort +tools: + - "payload_germlinevariant": + description: "A simple wrapper written in `nextflow` for the payload generation tool to generate ARGO Song payloads containing QC metrics files." + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + + - files_to_upload: + type: file + description: QC metrics files + + - metadata_analysis: + type: file + description: Song metadata in JSON format + + - genome_annotation: + type: string + description: genome annotation name + + - genome_build: + type: string + description: genome build name + + - path pipeline_yml: + type: file + description: yml file collect from CUSTOM_DUMPSOFTWAREVERSIONS + - tool: + type: string + description: name of tool + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + + - payload_files: + type: file + description: Generated payload and QC files with normalized names + + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + + + +authors: + - "@edsu7" diff --git a/modules/icgc-argo-workflows/payload/germlinevariant/resources/usr/bin/main.py b/modules/icgc-argo-workflows/payload/germlinevariant/resources/usr/bin/main.py new file mode 100755 index 0000000..a33e5c4 --- /dev/null +++ b/modules/icgc-argo-workflows/payload/germlinevariant/resources/usr/bin/main.py @@ -0,0 +1,314 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" + Copyright (C) 2021, Ontario Institute for Cancer Research + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + + Authors: + Edmund Su + Linda Xiang +""" + +import os +import sys +import argparse +import subprocess +import json +import re +import hashlib +import uuid +import tarfile +from datetime import date +import copy +from glob import glob +import yaml +import csv +import io +import shutil +#LUCA-KR.DO231106.SA602282.wxs.20210112.gatk-mutect2.somatic.snv.open-filter.vcf.gz +workflow_process_map = { + 'DNA Seq Germline Workflow': 'snv' +} + +tool_list = ['strelka'] + +def calculate_size(file_path): + return os.stat(file_path).st_size + + +def calculate_md5(file_path): + md5 = hashlib.md5() + with open(file_path, 'rb') as f: + for chunk in iter(lambda: f.read(1024 * 1024), b''): + md5.update(chunk) + return md5.hexdigest() + +def get_files_info(file_to_upload, date_str, analysis_dict, process_indicator,tool,new_dir): + file_info = { + 'fileSize': calculate_size(file_to_upload), + 'fileMd5sum': calculate_md5(file_to_upload), + 'fileAccess': 'controlled', + 'info': { + 'data_category': "Simple Nucleotide Variation", + } + } + ### deepvariant + if tool=="deepvariant": + if re.match(r'.*.vcf.gz$', file_to_upload): + file_type = 'VCF' + file_info.update({'dataType': 'Raw SNV Calls'}) + file_info['info'].update({'analysis_tools': ['DeepVariant']}) + elif re.match(r'.*.vcf.gz.tbi$', file_to_upload): + file_type = 'TBI' + file_info.update({'dataType': 'VCF Index'}) + file_info['info'].update({'analysis_tools': ['DeepVariant']}) + else: + sys.exit('Error: unknown QC metrics file: %s' % file_to_upload) + elif tool=="strelka": + if re.match(r'.*.vcf.gz$', file_to_upload): + file_type = 'VCF' + file_info.update({'dataType': 'Raw SNV Calls'}) + file_info['info'].update({'analysis_tools': ['Strelka']}) + elif re.match(r'.*.vcf.gz.tbi$', file_to_upload): + file_type = 'TBI' + file_info.update({'dataType': 'VCF Index'}) + file_info['info'].update({'analysis_tools': ['Strelka']}) + else: + sys.exit('Error: unknown QC metrics file: %s' % file_to_upload) + elif tool=="tiddit": + if re.match(r'.*.vcf.gz$', file_to_upload): + file_type = 'VCF' + file_info.update({'dataType': 'Raw SNV Calls'}) + file_info['info'].update({'analysis_tools': ['Tiddit']}) + elif re.match(r'.*.vcf.gz.tbi$', file_to_upload): + file_type = 'TBI' + file_info.update({'dataType': 'VCF Index'}) + file_info['info'].update({'analysis_tools': ['Tiddit']}) + else: + sys.exit('Error: unknown QC metrics file: %s' % file_to_upload) + elif tool=="haplotypecaller": + if re.match(r'.*.vcf.gz$', file_to_upload): + file_type = 'VCF' + file_info.update({'dataType': 'Raw SNV Calls'}) + file_info['info'].update({'analysis_tools': ['haplotypecaller']}) + elif re.match(r'.*.vcf.gz.tbi$', file_to_upload): + file_type = 'TBI' + file_info.update({'dataType': 'VCF Index'}) + file_info['info'].update({'analysis_tools': ['haplotypecaller']}) + else: + sys.exit('Error: unknown QC metrics file: %s' % file_to_upload) + elif tool=="manta": + if re.match(r'.*.vcf.gz$', file_to_upload): + file_type = 'VCF' + file_info.update({'dataType': 'Raw SNV Calls'}) + file_info['info'].update({'analysis_tools': ['Manta']}) + elif re.match(r'.*.vcf.gz.tbi$', file_to_upload): + file_type = 'TBI' + file_info.update({'dataType': 'VCF Index'}) + file_info['info'].update({'analysis_tools': ['Manta']}) + else: + sys.exit('Error: unknown QC metrics file: %s' % file_to_upload) + elif tool=="freebayes": + if re.match(r'.*.vcf.gz$', file_to_upload): + file_type = 'VCF' + file_info.update({'dataType': 'Raw SNV Calls'}) + file_info['info'].update({'analysis_tools': ['Freebayes']}) + elif re.match(r'.*.vcf.gz.tbi$', file_to_upload): + file_type = 'TBI' + file_info.update({'dataType': 'VCF Index'}) + file_info['info'].update({'analysis_tools': ['Freebayes']}) + else: + sys.exit('Error: unknown QC metrics file: %s' % file_to_upload) + #elif tool=="cnvkit": + else: + sys.exit('Error: unknown QC metrics file: %s' % file_to_upload) + + #LUCA-KR.DO231106.SA602282.wxs.20210112.gatk-mutect2.somatic.snv.open-filter.vcf.gz.tbi" + #"TEST-PR.DO250183.SA610228.wxs.20230501.snv-strelka.gvcf.gz", + suffix={ + "VCF":"vcf.gz", + "TBI": "vcf.gz.tbi", + } + # file naming patterns: + # pattern: ....... + # process_indicator: pre-alignment, alignment(aligner), post-alignment(caller) + # example: TEST-PR.DO250183.SA610229.rna-seq.20200319.star.genome_aln.cram + new_fname = '.'.join([ + analysis_dict['studyId'], + analysis_dict['samples'][0]['donor']['donorId'], + analysis_dict['samples'][0]['sampleId'], + analysis_dict['experiment']['experimental_strategy'].lower() if analysis_dict['experiment'].get('experimental_strategy') else analysis_dict['experiment']['library_strategy'], + date_str, + process_indicator, + suffix[file_type] + ]) + + file_info['fileName'] = new_fname + file_info['fileType'] = file_type + + if re.match(r'cnvkit', file_to_upload): + with tarfile.open(file_to_upload, 'r') as tar: + for member in tar.getmembers(): + file_info['info']['files_in_tgz'].append(member.name) + + new_dir = 'out' + try: + os.mkdir(new_dir) + except FileExistsError: + pass + + dst = os.path.join(os.getcwd(), new_dir, new_fname) + os.symlink(os.path.abspath(file_to_upload), dst) + else: + shutil.copyfile(os.path.realpath(file_to_upload),"/".join([new_dir,new_fname])) + ##os.symlink(os.path.realpath(file_to_upload),"/".join([new_dir,new_fname])) + + return file_info + +def get_basename(metadata): + study_id = metadata['studyId'] + donor_id = metadata['samples'][0]['donor']['donorId'] + sample_id = metadata['samples'][0]['sampleId'] + + if not sample_id or not donor_id or not study_id: + sys.exit('Error: missing study/donor/sample ID in the provided metadata') + + return ".".join([study_id, donor_id, sample_id]) + +def get_sample_info(sample_list): + samples = copy.deepcopy(sample_list) + for sample in samples: + for item in ['info', 'sampleId', 'specimenId', 'donorId', 'studyId']: + sample.pop(item, None) + sample['specimen'].pop(item, None) + sample['donor'].pop(item, None) + + return samples + +def prepare_tarball(sampleId, qc_files, tool_list): + + tgz_dir = 'tarball' + try: + os.mkdir(tgz_dir) + except FileExistsError: + pass + + files_to_tar = {} + for tool in tool_list: + if not tool in files_to_tar: files_to_tar[tool] = [] + for f in sorted(qc_files): + if tool in f: + files_to_tar[tool].append(f) + + for tool in tool_list: + if not files_to_tar[tool]: continue + tarfile_name = f"{tgz_dir}/{sampleId}.{tool}.tgz" + with tarfile.open(tarfile_name, "w:gz", dereference=True) as tar: + for f in files_to_tar[tool]: + tar.add(f, arcname=os.path.basename(f)) + +def main(): + """ + Python implementation of tool: payload-gen-qc + """ + + parser = argparse.ArgumentParser(description='Tool: payload-gen-qc') + parser.add_argument("-a", "--metatada-analysis", dest="metadata_analysis", required=True, + help="Input metadata analysis", type=str) + parser.add_argument("-f", "--files_to_upload", dest="files_to_upload", type=str, required=True, + nargs="+", help="All files to upload") + parser.add_argument("-g", "--genome_annotation", dest="genome_annotation", default="", help="Genome annotation") + parser.add_argument("-b", "--genome_build", dest="genome_build", default="", help="Genome build") + parser.add_argument("-w", "--wf-name", dest="wf_name", required=True, help="Workflow name") + parser.add_argument("-s", "--wf-session", dest="wf_session", required=True, help="workflow session ID") + parser.add_argument("-v", "--wf-version", dest="wf_version", required=True, help="Workflow version") + parser.add_argument("-p", "--pipeline_yml", dest="pipeline_yml", required=False, help="Pipeline info in yaml") + parser.add_argument("-t", "--tool", dest="tool", required=True,type=str, help="Tool used for variant calling", + choices=['strelka','cnvkit','deepvariant','tiddit','manta','haplotypecaller','freebayes'] + ) + + args = parser.parse_args() + + with open(args.metadata_analysis, 'r') as f: + analysis_dict = json.load(f) + + pipeline_info = {} + if args.pipeline_yml: + with open(args.pipeline_yml, 'r') as f: + pipeline_info = yaml.safe_load(f) + + payload = { + 'analysisType': { + 'name': 'variant_processing' + }, + 'studyId': analysis_dict.get('studyId'), + 'info': {}, + 'workflow': { + 'workflow_name': args.wf_name, + 'workflow_version': args.wf_version, + 'session_id': args.wf_session, + 'inputs': [ + { + 'analysis_type': analysis_dict['analysisType']['name'], + 'input_analysis_id': analysis_dict.get('analysisId') + } + ], + 'info': pipeline_info + }, + 'files': [], + 'experiment': analysis_dict.get('experiment'), + 'samples': get_sample_info(analysis_dict.get('samples')) + } + if args.genome_build: + payload['workflow']['genome_build'] = args.genome_build + if args.genome_annotation: + payload['workflow']['genome_annotation'] = args.genome_annotation + + # pass `info` dict from seq_experiment payload to new payload + if 'info' in analysis_dict and isinstance(analysis_dict['info'], dict): + payload['info'] = analysis_dict['info'] + else: + payload.pop('info') + + if 'library_strategy' in payload['experiment']: + experimental_strategy = payload['experiment'].pop('library_strategy') + payload['experiment']['experimental_strategy'] = experimental_strategy + + new_dir = 'out' + try: + os.mkdir(new_dir) + except FileExistsError: + pass + + # generate date string + date_str = date.today().strftime("%Y%m%d") + + # prepare tarball to include all QC files generated by one tool + ##prepare_tarball(analysis_dict['samples'][0]['sampleId'], args.files_to_upload, tool_list) + + process_indicator = ".".join([args.tool,"germline",workflow_process_map.get(args.wf_name)]) + for f in sorted(args.files_to_upload): + file_info = get_files_info(f, date_str, analysis_dict, process_indicator,args.tool,new_dir) + payload['files'].append(file_info) + + with open("%s.%s.payload.json" % (str(uuid.uuid4()), args.wf_name.replace(" ","_")), 'w') as f: + f.write(json.dumps(payload, indent=2)) + + + +if __name__ == "__main__": + main() + diff --git a/tests/config/nextflow.config b/tests/config/nextflow.config index 4c14e4a..80f3bee 100644 --- a/tests/config/nextflow.config +++ b/tests/config/nextflow.config @@ -1,4 +1,5 @@ + manifest { homePage = 'https://github.com/icgc-argo-workflows/argo-modules' description = 'ARGO Generic Modules to be shared across workflows for RDPC processing' @@ -57,4 +58,15 @@ includeConfig 'modules.config' includeConfig 'test_data.config' // Enable locally defined binary scripts for modules -nextflow.enable.moduleBinaries = true \ No newline at end of file +nextflow.enable.moduleBinaries = true + +process { + withName: 'PAYLOAD_GERMLINEVARIANT' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/"}, + pattern: "{*payload.json,out/*vcf.gz,out/*vcf.gz.tbi}", + saveAs: { "${meta.tool}/${meta.id}/${it}" } + ] + } +} diff --git a/tests/config/test_data.config b/tests/config/test_data.config index 2bcd53d..e16a301 100644 --- a/tests/config/test_data.config +++ b/tests/config/test_data.config @@ -30,4 +30,54 @@ params { analysis_id_stage = "c62cee87-04ae-4988-acee-8704aec988d4" } } -} + profiles { + deepvariant_vcf { + metadata_analysis="${params.test_data_base}/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json" + files_to_upload=["${params.test_data_base}/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz","${params.test_data_base}/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi"] + pipeline_yml="${params.test_data_base}/tiddit/collated_versions.yml" + tool="deepvariant" + study_id = "TEST-PR" + analysis_id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b" + } + freebayes_vcf { + metadata_analysis="${params.test_data_base}/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json" + files_to_upload=["${params.test_data_base}/freebayes/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz","${params.test_data_base}/freebayes/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi"] + pipeline_yml="${params.test_data_base}/tiddit/collated_versions.yml" + tool="freebayes" + study_id = "TEST-PR" + analysis_id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b" + } + haplotypecaller_vcf { + metadata_analysis="${params.test_data_base}/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json" + files_to_upload=["${params.test_data_base}/haplotypecaller/aaf8d346-c24f-493b-b8d3-46c24f393b92.haplotypecaller.filtered.vcf.gz","${params.test_data_base}/haplotypecaller/aaf8d346-c24f-493b-b8d3-46c24f393b92.haplotypecaller.filtered.vcf.gz.tbi"] + pipeline_yml="${params.test_data_base}/tiddit/collated_versions.yml" + tool="haplotypecaller" + study_id = "TEST-PR" + analysis_id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b" + } + manta_vcf { + metadata_analysis="${params.test_data_base}/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json" + files_to_upload=["${params.test_data_base}/manta/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz","${params.test_data_base}/manta/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi"] + pipeline_yml="${params.test_data_base}/tiddit/collated_versions.yml" + tool="manta" + study_id = "TEST-PR" + analysis_id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b" + } + strelka_vcf { + metadata_analysis="${params.test_data_base}/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json" + files_to_upload=["${params.test_data_base}/strelka/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz","${params.test_data_base}/strelka/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi"] + pipeline_yml="${params.test_data_base}/tiddit/collated_versions.yml" + tool="strelka" + study_id = "TEST-PR" + analysis_id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b" + } + tiddit_vcf { + metadata_analysis="${params.test_data_base}/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json" + files_to_upload=["${params.test_data_base}/tiddit/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz","${params.test_data_base}/tiddit/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi"] + pipeline_yml="${params.test_data_base}/tiddit/collated_versions.yml" + tool="tiddit" + study_id = "TEST-PR" + analysis_id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b" + } + } +} \ No newline at end of file diff --git a/tests/data/qa/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json b/tests/data/qa/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json new file mode 100644 index 0000000..dc1d78a --- /dev/null +++ b/tests/data/qa/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json @@ -0,0 +1,117 @@ +{ + "analysisId" : "aaf8d346-c24f-493b-b8d3-46c24f393b92", + "studyId" : "TEST-PR", + "analysisState" : "PUBLISHED", + "analysisType" : { + "name" : "sequencing_alignment", + "version" : 15 + }, + "samples" : [ + { + "sampleId" : "SA610228", + "specimenId" : "SP210201", + "submitterSampleId" : "COLO-829-BL", + "sampleType" : "Total DNA", + "matchedNormalSubmitterSampleId" : null, + "specimen" : { + "specimenId" : "SP210201", + "donorId" : "DO250183", + "submitterSpecimenId" : "COLO-829-BL", + "tumourNormalDesignation" : "Normal", + "specimenTissueSource" : "Blood derived", + "specimenType" : "Normal" + }, + "donor" : { + "donorId" : "DO250183", + "studyId" : "TEST-PR", + "submitterDonorId" : "COLO-829", + "gender" : "Female" + } + } + ], + "files" : [ + { + "info" : { + "analysis_tools" : [ + "BWA-MEM", + "biobambam2:bammarkduplicates2" + ], + "data_category" : "Sequencing Reads" + }, + "objectId" : "581f8e5b-0601-5dde-8ace-78eef3a6db47", + "studyId" : "TEST-PR", + "analysisId" : "aaf8d346-c24f-493b-b8d3-46c24f393b92", + "fileName" : "test.paired_end.sorted.cram", + "fileType" : "CRAM", + "fileMd5sum" : "e0146aa3a5a196e4cb5593f545e95c31", + "fileSize" : 76810, + "fileAccess" : "controlled", + "dataType" : "Aligned Reads" + }, + { + "info" : { + "analysis_tools" : [ + "BWA-MEM", + "biobambam2:bammarkduplicates2" + ], + "data_category" : "Sequencing Reads" + }, + "objectId" : "57a23de6-044c-555b-8070-bd278d449a63", + "studyId" : "TEST-PR", + "analysisId" : "aaf8d346-c24f-493b-b8d3-46c24f393b92", + "fileName" : "test.paired_end.sorted.cram.crai", + "fileType" : "CRAI", + "fileMd5sum" : "31ffc1b11f53210e4ac0d55092de0652", + "fileSize" : 62, + "fileAccess" : "controlled", + "dataType" : "Aligned Reads Index" + } + ], + "createdAt" : "2023-04-18T21:30:04.11549", + "updatedAt" : "2023-04-18T21:32:47.73069", + "firstPublishedAt" : "2023-04-18T21:32:47.728311", + "publishedAt" : "2023-04-18T21:32:47.728311", + "analysisStateHistory" : [ + { + "initialState" : "UNPUBLISHED", + "updatedState" : "PUBLISHED", + "updatedAt" : "2023-04-18T21:32:47.728311" + } + ], + "experiment" : { + "experimental_strategy" : "WXS", + "platform" : "ILLUMINA", + "platform_model" : "HiSeq 2000", + "sequencing_center" : "EXT", + "sequencing_date" : "2014-12-12", + "submitter_sequencing_experiment_id" : "TEST_EXP" + }, + "read_group_count" : 1, + "read_groups" : [ + { + "file_r1" : "test.paired_end.sorted.cram", + "file_r2" : "test.paired_end.sorted.cram", + "insert_size" : 298, + "is_paired_end" : true, + "library_name" : "testN", + "platform_unit" : "1", + "read_length_r1" : 75, + "read_length_r2" : 75, + "sample_barcode" : null, + "submitter_read_group_id" : "1" + } + ], + "workflow" : { + "genome_build" : "GRCh38_hla_decoy_ebv", + "inputs" : [ + { + "analysis_type" : "sequencing_experiment", + "input_analysis_id" : "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b" + } + ], + "run_id" : "wes-d4f902992b3d443595d05dbf1490ab9e", + "session_id" : "9da55181-2fb8-443e-a82f-b8b8e33d9f2a", + "workflow_name" : "DNA Seq Alignment", + "workflow_version" : "1.9.0" + } +} diff --git a/tests/data/qa/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz b/tests/data/qa/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..65cc12947d62c84930154b7cc945a32d179fb942 GIT binary patch literal 751 zcmV1WxHWb!e1AR2-r_K7wc9D; zVJ)l_+JAdP>!i_r7RDiMwcFWvvKlV>e~#1s#Js;8g$h!1g_Q?i|LtL5^j4Or3k0db zSJJ`T2?ZOJdIuJT0_k9b%Dh7%l&atQ%A%``D#M58OO=@!2z4d0lpdiH4oW=us!ze9 zJ1Sp;qy}vo>V&^w>KrKI;MVg^}~Uy zy-5caw`l7*oPEp}(`41mElGpfalb*^Uj(a<1NO(-;|}emw*`vwln=JX0*aynp%f~- z=B4>3kKM()K0kW7=lsCvQq~;{1-sJ#5dJv|CH=<@adkypdMouVxCnTj;!T|CF#UF4 zq$!qCXh?G~8;AA{OL&)c7vL*|(Rz@R+2PO_OY z!e171zWV2FgRK=s-Mc$w7|*hKv&O^O_dWjE-VJpL2jiO6fiLwcA00M5f&18RjFQD! z$$f1E3ZuQ;H4;T-qbN|QcfRV!lv4KGY7hQe%%>!uFUdG1i(y8R$%?%EolMB-t3hgq zB%9awjd*l^)@nF}{n3j^ND?w4l$1M=w0hd6M#VlJR^(=p*G3`+KhwaT-K4=+HB=6Lq^JBr7r?9U8`znUKp*1w{)kCz$- h001A02m}BC000301^_}s0stET0{{R300000005x1XV?G$ literal 0 HcmV?d00001 diff --git a/tests/data/qa/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi b/tests/data/qa/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi new file mode 100644 index 0000000000000000000000000000000000000000..5e4aeb4ddb80a14675b55474af89f0ee2af557ad GIT binary patch literal 105 zcmb2|=3rp}f&Xj_PR>jWJ`CK2pHfm%5)u-U5)v9N@&Lgpg{H>F#@@z8Ma#t}PH^bF n);z+nEPg}76Nd*z9?D%34DJgvii3b=$fFr1&A<#c8$K}-(a4cH7xeOg{Epd^kr zfRiY4da!#~3}}nCg^?wVB&R2H*x$Y=>cg_ExIL5Z1vZCRiKHS~PrX$|`r=|0Mq-s@ zTV6OnU*EoHn0>goxV@V_PkiTZcQ=mb&gX+0kuS3_Ey5&resZG-t-`1fS&mYk=OXy& z@9@r}fA2oacifx7!*06}nd1+hUsK`S#f4al?9fj62ufBcS>z&n;b0!Ek_-vH+#nuB zA`(ZYA8XU@3DYjjF6M=(tQ?H6Om=b6wR_{$cIgSbzTg465oxjMzCWGS?@!D3Z=-}4 zhYOyHI4NFJfo9>aA__N25)4q3tSK4hOCE^;MPYp-2fq{>fdXD|v<_cHEdN}>f&#Rz zO*QUHlROU>k=Wl_i&$j55CK|c$yUCdiIvDiycCGX0pcAm<*jjYzD*;6(kKao*Q3Qg z)r&pltY*1SJ069_v6bhJm3fhc@w(q<-Iwq09)3BG*?ky?+uargfDMH)TK{}~i;`WD z?h3SslI5?sYuxL??2lhiO%(nif&ub2nF#tQ0-u^lj(Cm&u?k}mz(jxu%q367GF*jA zDS+-=p4@sarviSy+d{znR(cvP!T26j9?v;P(Ub&yqe-Od?0D)^>M1XcV*!f2y zR;wV4*B^5f#-fVrju);wPUuzXb5iiA=9{cqngB;f0_@jLZ-m+z=(aOZPoPaI4?$K1 zvuKrx-*zHizJh&lOUC21K%dBf{<~xTp*DX?9mng4&YXQCKKF$AYbc6Nd%w8m%v>KW_vQzZW77M1bT=d!@-iVa%!tsK+ebIzV5; zVk7-2eqRdNk)UG33-r}H=(vCK(DbX7w|X;GRcDOAYKExQbvy_CUJl~h~KS<3*_yytbN#F?7m}ihVYLAH1V6%Jegfi{C6bg ziRU$0fd2Wl$}NCYj_g$1x#<7O4b$hlju(2vs(b`8UR8cTT44dLyG*}ON5U$HWM!Nu z@r%gTrFSy4;9+ze)U!!bR;qaG9lerBB;s|oscE{5`-%JD&YC8@veFZ^_kzc*3cHKL z-*y7UNj%(Btj1}>l)180u@t7vI?E)>M4l!w6#rJqDc{dKi~Rn(fc@&9?$2ELwn~Rp zlY*LtNI)~tSwLt0PxogiMf&&DC|x&qs=>p&Ge&PDl#jsdOD0ujh5cv8+?V@SSX2W4 zhRG8#A&X0ux+?{UyevXcWUGAJ0snY&(_K_gU_ze}C2N!fL5`%JV0jQ`3y~MK-XQ3< zi!g@!F{lh4$i*^=gCRWE8QOF2{7F+V-d2T|C25jLnA9N^eZ+D1g4tq`IAF52B;mpR_W>pbWv3XGO?6U1?cl z<>3Zjf|q#|l}8MXguSD^^Enf!Wt{J}5Sb|y99fe3rqLbn3Frbj z$BKE`_yuStR)yUvr=a`1P7P3Jii4A#>S!aV6IUcw1^T3a{9LEH{iBd$s}t2mBmOyU zDa@~&-u8djm-=4WnEKwnGxfdMrSH9`H}%izP2XOy%mjPvEh*(cusQuvzGz%=W^DT5 zdh?#(%;0_WNj*C{zns;?`nwpF9jq-wem_4rbrZ{75x#&}Q^AY;t&vfEPSe1J{$ShD z%iQfA2KA{sJ-fVS`}jB_}(js_^nUaCsDF~#bFffwqeXWqxT@c z&B}fjHM<76F*(r!luwo4DV5zq`K>}Vq@2?-*=~6pL}4tPpH`U=3;rr{G%QaYwgRts zUT`!V7Cc*v0xdTgW4|y$2pg(|5P}yXK*QlSjE7A^8P>fwBA8Y;t9|OQZo(+yd}Rfy zW(=21TMf05x)?63KpkoZ{6ih7ixI=ijBkIE^NfpdEx+H9Qy7$a-O&gk6c(}yBXPqE z;V{Bfq7&=G#YMW|aIovdNi05GTr~0FwAc3G;v!FW*-|*L{42Tq7m8JEe7Ly&hyQr5 zc#m`C?ndz^w~9M^R-V4NGo}0{T!DT`DZk-X3XvBNALQ&5I;vlTpiQE)ziIW5cu%n^I`7L@q^r~J9G>?W`|;v=_(zU z`$ea|@a5H`L&n|{jOso4@Y{#~Jh=|lDIGugj%iXd_8%RlT4UdvIb`g6o&yT6w+^MF@!a3v@qPI(P^y#1>9|Kj4ppYl z_Kjz|H@5$1Kl!%%Xg|63bf%C?YBS5QnQ9F*3qEX^wXIn;tim4Z1XF8321A3Gc%*6- zM@E^dVC5P()(|zxPsN(1>7!nL>;RuG;lb}-bS74Hm1*NXb_}37-(p888yOWcNenK& z!A{qJoUTc3k{Sb2PCXk*ZT|;w-$SEaW>g|05?KAx-1i+eeg`kzW&>t3Ti428Llv8v zWl^A1URID)s!4?@Q$@9~O5ys-Arcb-Knw&%8+je#VG<8}3lHf9b114_O_^9VO|yp! zImtx@E>ce9Ei7WcVrUFY@tkI%AHskPcjYknXyX`})F?AuKaWG9^zu+gm>Ep}lRQja?*O4ADHLjA z!y4&uEpL0iW3-8|S|f7mkl3i&Qpxv#VgMDr#G>1#uG>Sy!jz7z!O+Ay(@lL3MkBg{ zm8tTqkRv7p^g-Ux2UR5!`O!ww)E>&P0adM!OQ{px9G^;H44W#0Y}EyWM`T1s^psvy z3-CZ?^dB7x1*Wb)bsmy{=m0H5f96Rm;-%uu2t86@sZ*@xo00$h1avOiXRuSA4VsK~bxuXSM0+ zm|D~{OMDjAM;aNOz*pBFeS~$BYQ5sZjOm2+GGvx+o@7WV1Bhx_ji}awNGUc(25$h!nEl^VC(QQ&&+{OpKbOYLSs%_JKOqN4nbIN9@}`b}t)2Dq%8;4Z|?o zL@C3jMMou318bIM7`;SQK-qK@`Uv;pG_gS`r8Q21EwfIX{^`DgTBRU0Q?djp&K!`X zN=u?-1gp?30!mQgtpoTxr(W2*Q8H>%GO%n)Y^IsEWmC(thm>NS xP>GTn*kXiz_%H4lFVP4q001A02m}BC000301^_}s0stET0{{R3000000038@MQ;EA literal 0 HcmV?d00001 diff --git a/tests/data/qa/freebayes/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi b/tests/data/qa/freebayes/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi new file mode 100644 index 0000000000000000000000000000000000000000..89a3b5ee6aa18ea21c7c7a7074ab5bd55e575b1d GIT binary patch literal 106 zcmb2|=3rp}f&Xj_PR>jWz6{)jpHfm%5)u-U5)v9N@&Lgpg{H>F#@@z8Ma#t}PH^bF o);z+nZ0!@rB87sv>@G^(j0{^^FU<%78X}KonluA5*l-X50HX~YX#fBK literal 0 HcmV?d00001 diff --git a/tests/data/qa/freebayes/collated_versions.yml b/tests/data/qa/freebayes/collated_versions.yml new file mode 100644 index 0000000..c3e25f0 --- /dev/null +++ b/tests/data/qa/freebayes/collated_versions.yml @@ -0,0 +1,24 @@ +"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:GERMLINE_VARIANT_FREEBAYES:BCFTOOLS_SORT": + bcftools: 1.16 +"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:GERMLINE_VARIANT_FREEBAYES:FREEBAYES": + freebayes: 1.3.6 +"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:NORMAL_GATK4_RECALIBRATE:GATK4_APPLYBQSR": + gatk4: 4.3.0.0 +"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:GERMLINE_VARIANT_FREEBAYES:MERGE_FREEBAYES": + gatk4: 4.3.0.0 +"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:NORMAL_SONG_SCORE_DOWNLOAD:scoreDn": + score-client: 5.8.1 +"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:NORMAL_SONG_SCORE_DOWNLOAD:songGet": + song-client: 5.0.2 +"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:GERMLINE_VARIANT_FREEBAYES:BCFTOOLS_SORT": + bcftools: 1.16 +"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:GERMLINE_VARIANT_FREEBAYES:BCFTOOLS_SORT": + bcftools: 1.16 +"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:GERMLINE_VARIANT_FREEBAYES:FREEBAYES": + freebayes: 1.3.6 +"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:NORMAL_GATK4_RECALIBRATE:GATK4_BASERECALIBRATOR": + gatk4: 4.3.0.0 +"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:NORMAL_GATK4_RECALIBRATE:SAMTOOLS_INDEX": + samtools: 1.16.1 +"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:GERMLINE_VARIANT_FREEBAYES:FREEBAYES": + freebayes: 1.3.6 diff --git a/tests/data/qa/haplotypecaller/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json b/tests/data/qa/haplotypecaller/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json new file mode 100644 index 0000000..dc1d78a --- /dev/null +++ b/tests/data/qa/haplotypecaller/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json @@ -0,0 +1,117 @@ +{ + "analysisId" : "aaf8d346-c24f-493b-b8d3-46c24f393b92", + "studyId" : "TEST-PR", + "analysisState" : "PUBLISHED", + "analysisType" : { + "name" : "sequencing_alignment", + "version" : 15 + }, + "samples" : [ + { + "sampleId" : "SA610228", + "specimenId" : "SP210201", + "submitterSampleId" : "COLO-829-BL", + "sampleType" : "Total DNA", + "matchedNormalSubmitterSampleId" : null, + "specimen" : { + "specimenId" : "SP210201", + "donorId" : "DO250183", + "submitterSpecimenId" : "COLO-829-BL", + "tumourNormalDesignation" : "Normal", + "specimenTissueSource" : "Blood derived", + "specimenType" : "Normal" + }, + "donor" : { + "donorId" : "DO250183", + "studyId" : "TEST-PR", + "submitterDonorId" : "COLO-829", + "gender" : "Female" + } + } + ], + "files" : [ + { + "info" : { + "analysis_tools" : [ + "BWA-MEM", + "biobambam2:bammarkduplicates2" + ], + "data_category" : "Sequencing Reads" + }, + "objectId" : "581f8e5b-0601-5dde-8ace-78eef3a6db47", + "studyId" : "TEST-PR", + "analysisId" : "aaf8d346-c24f-493b-b8d3-46c24f393b92", + "fileName" : "test.paired_end.sorted.cram", + "fileType" : "CRAM", + "fileMd5sum" : "e0146aa3a5a196e4cb5593f545e95c31", + "fileSize" : 76810, + "fileAccess" : "controlled", + "dataType" : "Aligned Reads" + }, + { + "info" : { + "analysis_tools" : [ + "BWA-MEM", + "biobambam2:bammarkduplicates2" + ], + "data_category" : "Sequencing Reads" + }, + "objectId" : "57a23de6-044c-555b-8070-bd278d449a63", + "studyId" : "TEST-PR", + "analysisId" : "aaf8d346-c24f-493b-b8d3-46c24f393b92", + "fileName" : "test.paired_end.sorted.cram.crai", + "fileType" : "CRAI", + "fileMd5sum" : "31ffc1b11f53210e4ac0d55092de0652", + "fileSize" : 62, + "fileAccess" : "controlled", + "dataType" : "Aligned Reads Index" + } + ], + "createdAt" : "2023-04-18T21:30:04.11549", + "updatedAt" : "2023-04-18T21:32:47.73069", + "firstPublishedAt" : "2023-04-18T21:32:47.728311", + "publishedAt" : "2023-04-18T21:32:47.728311", + "analysisStateHistory" : [ + { + "initialState" : "UNPUBLISHED", + "updatedState" : "PUBLISHED", + "updatedAt" : "2023-04-18T21:32:47.728311" + } + ], + "experiment" : { + "experimental_strategy" : "WXS", + "platform" : "ILLUMINA", + "platform_model" : "HiSeq 2000", + "sequencing_center" : "EXT", + "sequencing_date" : "2014-12-12", + "submitter_sequencing_experiment_id" : "TEST_EXP" + }, + "read_group_count" : 1, + "read_groups" : [ + { + "file_r1" : "test.paired_end.sorted.cram", + "file_r2" : "test.paired_end.sorted.cram", + "insert_size" : 298, + "is_paired_end" : true, + "library_name" : "testN", + "platform_unit" : "1", + "read_length_r1" : 75, + "read_length_r2" : 75, + "sample_barcode" : null, + "submitter_read_group_id" : "1" + } + ], + "workflow" : { + "genome_build" : "GRCh38_hla_decoy_ebv", + "inputs" : [ + { + "analysis_type" : "sequencing_experiment", + "input_analysis_id" : "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b" + } + ], + "run_id" : "wes-d4f902992b3d443595d05dbf1490ab9e", + "session_id" : "9da55181-2fb8-443e-a82f-b8b8e33d9f2a", + "workflow_name" : "DNA Seq Alignment", + "workflow_version" : "1.9.0" + } +} diff --git a/tests/data/qa/haplotypecaller/aaf8d346-c24f-493b-b8d3-46c24f393b92.haplotypecaller.filtered.vcf.gz b/tests/data/qa/haplotypecaller/aaf8d346-c24f-493b-b8d3-46c24f393b92.haplotypecaller.filtered.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..2721fbff9a8989d962bf7b950135cc92b5211082 GIT binary patch literal 5034 zcmZvg=Q|q?qlQ&u?^U&F&DxFG8bwR(+Db%YQ#+{@Gl(Ka&88?nDKTQNM2ad^ilQh= z?5$SKn(ukfbw2&B^L%)&`+5F^TOgSlK=$8UQ<7=xkdvjbZQC0{msBz|Dx)Zh&uW1z z$bnp8L$F_5PtBj|xxk6;-(POL(WmiYesLq+d%R}uY4ivFtV$4x4ZdlZO)s48D9VaX z1e|WwsB)%DI>^EQtQ0+s%kz2Q^ zPGAX5F5!-B?Va~q*y~@(u7U85M3>3wWS2nrwH9yJtmz`&x`v5Pv{|IXx@LI`LE~$~ ze?nRsJnh@?4-fJuikB9d@-w}+7Rd+c(*cL4*--Ihofa8do@+fDUEgCodim5ur65nq zk^cI%h8y$>EHk#fL80$S6KY+%`%;Egvmu>Q_npx0CRfM~hR~HD6JLIlVsM)H_H}+e zN{}FAbhD38TO@RTE_2?WkXG9PDF0mnKy1!n@cr0==c3k7)#;!1>U}%&=UZWWE5Qo> zm9S30HRaR|LgCBaGJSM3iu#HiF_iu7!fxQ65>ig*elH;o2YxM!7qD+%; zS@bX-^{hgOis3#5qC%o559rMG&)6-ILA& z3WMOBv1XWUE_=>lArfMM9w>XBkxz3o&is%gOTQd9suB^CEu_q}dz4S=m;e&s;Wb-M zf51asQ*d=(N=6#^8rOEx+T#&%MSe-^@gs zpkw($F@9s-EhF~N+F0>&aX(NzN*s@-{-l&bpE`24Az*wRmGL=73WKs_B2CY-6xPV! zxluXy0Tf@9gQ*2ONMI5vOsXC0^1NpSN4d_kAk9G+yVcx6BvW4sTy^JlbeuS@b3wRA zwxvKR1V=g0Acw*^4Hg524+d^XYb|7s-Jrs49ZuKPaO>MJ7G3GKm>N3lDGIA1F)G>W zHbRZ|dU30w>M0gi1ajNc#es`Nnu*D{lkf9hHj#dacP`KjMB6yeuVGdNMQAAyYkngm zwvj=izn;4s5^r-uvJ7dtfkS@OYBYkr3ln=xk^3z3gSPa2yT#Hcqj@racop>1OLhbA zG*Wxu|C!UZQgGeiO)1j##8c>Gvr8tCsm+%LGZuSa*&=jxJg=0XM`vhvE?izt5%$}Q zuz*+a06p=D)9uZR@(u9qq7HXig;e zw)Od+8Yk=QuneOY8QG)51LQHnUNIP4dCoX}~;C^c}Pz-@w!ppFfZGd>RhXvuEsB`AZa4+dP2 zT(16q)hzD7m2&z}8@OajS)*W$hLQRbeSE`ZP)iFRzH_&;GDsO&C-&LH}BB zXM3odW9?%hMv(drHDm^!J6)q=57RDL8RzKMxOt&i7*q*5kS`ftLJoU#} zP50Sz1-9spK+xKv@s{xcF&iyqYPq(9ok%S3w{9CWDK&~1%bnMm0dP=mWGro*&nPum zRfE!~ymuM^B@Vb+-@cZ&N8QY=b{6fC^M5*~%fLFMHyLa2MIo`&OGqA@$Q%cGA`zXV zz3=^GuRE?@$jL> z5+}NNx&or6ed1}uPTaF>CZj4BlCLcObf|Vjc3e07?4FNAE@h)LMq%&~+0`s%tk|s> z6iLcbPW`6wcD}wJkA1h3S=M`hI4eU=zy`|vEV4$#(UDXttqe=$5L4S;m1;7`lq-7z zqtJL)ZGOWtI)6%4e^X<_%ieMYCS)<33JeOF6AIb*&1GK(`aqtw!X+5`y{YbDY%kkC zV|@EquNWTK%3GX`ce4@n!MOp}$PGEq?cMUYDhEHF(P$S|l@nR%I_#zD)kJSEZIbZN z*k=sF6ExYw*Bq1Jvkc)0o9y9l?o9AGG~8b6WIyD>izPr9d z^7XkTwj&!8IH7%M0=Q+&+oNCGK&&KM;{#lkBpu6{?D*UcaR46pWN%IsLV>w4$0bA(sRRm#5`%ev>sc%`!8j|*4&LHQ( z`KuUS;69&_Qf7)Bt&v%9uaXI^hgNTq#DeN^$-R9*>{L$2s1}%m5~1N?)0buFt2)c% zk-}m9F&CHF?#0V-&pr>B*c!lmlp5A4$IY1e>FQKpSmp;^>Y;XUlS zdl*O^B^1Ex@~VK|kIo+g$dyeWpAXtt{}OVvV64aYP=9s1+aPLhyW{&&HLE^ z25ejZ0ImM=?MMWNd3XTd%D825-TY#S!|n}6dMq*JXH4|JssABUOKrzcs=7qZQ;PF= z?cYZG(MFpOQoQ*S_uM(1>+GumV3GDb@_EHzB2CjXU`71^bC?Je^l@A!dk)QDpYJ_|CI&uMMiR%jzeo<)wBv))%jJee1u80kfjB8FneTi)1UsQH;NW9Z_PYYnj-! z{oMWWqf>#2wWoR3UxA#N0M#~PLB3`rm!?BR0Evfl7A88DMbiS}&p2tNPQv3#nh%dU z>Ed%k@GKY8ImtM23v1ts{Md*2>^R&+5!a1KM;FOH+V^41L|Xp}bd`>c`tf{#Sgr@mfXEEV;IVh*@PtIL=#g1z+ps&Wfo{Qo3A$_ zh_em7+$pI_tAr&tE|cF9@i|A480{o}-mDDUe@rBR)~SyPvMqKYR%LYM3i28c2Zxs2kYAyIAq#zGe@0 zrgo*ibW?sg;AgG(D)bO_2Y>rHEOFpT*2 zH(-|xT!w{5L_{86goZ~&oE+AiL&Hr*@Mf4mLyOLSR)5`wH^whMjo4QAD(3x@fO(2) ze3-`JKV&??=(gfecT!XOiFQqLcI%c=#x;Kw7iL0Q1>CnN@((U4?SsL%JdL=^6fU+u z7+PXcgm0tz3Ig&3n2ZpM*itLG+oJ@h^}MW;W`9B=^y<1VwRcSyCUw@Bg_JSwx9E7`8_ul zFtB2dzu{P1=EQ@2i`kl$0v^()TrC2^o3;k)4%r3j?z}#eA81f#O<3OuLN2jlB<*7N z(%Tx(*@^Dm{HmJweSc=iT`_k|@lOp8gXk;6Ho7fL;p^3yAZ>-sBEtqEiRv+d$enrl92TN zKEbUhsmcU-BA>O*UFPBjR-^pAC$?;UHdcK_=&{1r72Smk{XB>8+fPVuNK`la8%0d& z?^W$%>@w{JlbwKF3*$}vmsbkD9#oby2 zT5JYnXY6&*)mI8b)9=y40-`s>uX%o2#`)7ZD}?eqqo2&7!R-EMlv zV$)!*iY7)0F_=>qBKwyzpa zfBEzLn$q;Ljj=h8fAO$T-o@g=X@xTV+dwD**jad*zx4LDRrGm#S307qH(xr&XkU`I z(ALT^O`$U6DhDB7*txSe(-3#U!y#;LURb~CQS2wxG{}m`d*j_fKjjuC|2KB#*3Q*D zB2RVwS}qS&b*KIJBl=qg!22uSU)`#gEn_QL3-9$F22Elb5)`-s zj#16#yR3IY910ZxP;SR4u^MPPV9ryHfeQlcDXIJZO#@g^wXPY{FR{+G-=v+p=(E&8 z(71lfNrd>V_bI1)n;2XLK_jk$*X+|D?gW?-)O4;yvqf0_DbTA7Shye@sG{k1 zzzTzF`Kz_7C#EW6uJwnF&)S?d`)L?5cU}xX)F5gmV%G6Aa$+HG84yd!iPx`Ckz`^$ z*INWXev`9F_>6aQ(&@r);orI=`^^pok4YZ4`Am2CO%cA=WfO#O?vx>2_Y+Ki!pP+- zF%^0}GhTjSJ&cDOFGB)Gg^L$J)n_J$Yh;Nxo7o65DNt(d(prmDLx(?FghV|v0kAr@{g-;T?>}g>0U-+r$aaMgTi7V%Inw6 z-jm6ElgkMnm~WH$-6>V-`2~;ll~GaDN?cF+JeX6$x=vp(SFwl*y&JYzs~;7(czDdl zKs4JeQh($ literal 0 HcmV?d00001 diff --git a/tests/data/qa/haplotypecaller/aaf8d346-c24f-493b-b8d3-46c24f393b92.haplotypecaller.filtered.vcf.gz.tbi b/tests/data/qa/haplotypecaller/aaf8d346-c24f-493b-b8d3-46c24f393b92.haplotypecaller.filtered.vcf.gz.tbi new file mode 100644 index 0000000000000000000000000000000000000000..b782b83eedf503c70308758f374bbc2b72f4fd46 GIT binary patch literal 105 zcmb2|=3rp}f&Xj_PR>jWJ`CK2pHfm%5)u-U5)v9N@&Lgpg{H>F#@@z8Ma#t}PH^bF n);z+nZ01^rB83a;JW3OfGMs1mx*-T?hCG^K(hSUCvq1y^i6I>Aren%4gKm0c7GbS6sA>>If3bB@pbuwAXjTu`iZinjC68*JP4 z=2^Ac9j`jSj>1mZA76xIBF!_)mF#>9Ns#;X>9^Oj?&rU(y3?>fIi9>i$tZ;|&jgR~ zhh?Mb@_7D88q$YpgVA`^Uw9d}DqT=~Jb9y1^?jG9+<>N-G|7k{LLuNh=E9N&%re1= z0a`GaW`aP_mc(bT{T2EIzlD_9%`v_o0&XDDgbvPbP(te)*bPBSjH85TqgASmN(q>s zE(tX#NuV^WmEFJ=HAiV-tqGqR*GM_KJ zkXU2n90L`DCS(S<1h=Nxv4Ll9H!$hW?*c%ra>;+Mo=6_^ z2&EM}5U7(fl+L~kxhCeFgIHsdlC)s>ce((nUVk-U3M%h9y8M!8-7i6cyy9!_Q$nbL>FhGK%a=5x&a*rB^*z9SfYQ{dOQjW5~FzdUeHUO}JWuBIpLr(fW@u zC&6GaTTHsE#$dItHUHOF@?do_&C@mMPW@uF%c!%on#<&vdbr>J5yvUg@UBF;mQ+O{``j6vROAClY+4 z6f+Pj4KHCw1B_xWiMh;jzCQE@U^0q)D!&Br#n}0x)KY&(%Q8bLT_zQ&UGs@wX13Gd zxHQFpk^u_qf=+@=DMM*S8iY$QcIw9Rg9-$Y{{-QMUr;Q4-jF7kpBMW6>jppn z7SwNUZoI6~bTISz_Rnef820C@+fv)*JMJu;8$}~W3qnOGz-pAn{dA@un&vS{Z`7ZL zB|-BYf}WBwXa1z((vseyD7$ih8lHjvNy}aCxGRcK z$qvx&**#yTxU|JZ;E<=cv&pi5=xBS&x5B-oKXRWy>Th$7Ci6wVH(K^f+Mu8v2s~-X zqcqbLaYJCN&Lf*^+}}!E@Q!!#uls3lHJVKy-C15Dzn2H2BJ|4#_&t}A0|ag$Lv5X} zD53Ad4CkEcfRN_G@*1+=F(Mi53Ki1&>kv0koIjaXTh7Q`P6(V1*rLeE*M=KFw#qJWpe2jQ`r}gA0;blO@kjtn25|>BE zyZP}ck6 zd#jtc4=#C5%UW6w7^O^bNu5S+bi*BjqOs<(5xHY5nV_^-&vsC&tuY0}_?oraSGB0w zj%)3k*1ER7X02NL$_=z`TI-u;J<^y$t@iR@b9bJU>MitAP6qq9%3#iL`$7!&};=?g6p$t z^}ye8Uw>&d8ux~@X^_bqA(sH+NR88kOsaIzy7A(bDpvhmvuImJD3huj;{B zwRgLiO)B%*vN8%Qi~gX}9j_{`fmMq8dd0P-N^!leSTc5c_CK(G%}Bxz001A02m}BC Z000301^_}s0stET0{{R300000003V7Fed;2 literal 0 HcmV?d00001 diff --git a/tests/data/qa/manta/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi b/tests/data/qa/manta/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi new file mode 100644 index 0000000000000000000000000000000000000000..8ab61e09d27e124409b2dbad7c44816a0c593ffb GIT binary patch literal 77 zcmb2|=3rp}f&Xj_PR>jW1`OPVpHfm1fFLO$fx*zhghQj5LxY8(@ONR23Q(Oqng(eG JX0Uz`0RVK44YU9N literal 0 HcmV?d00001 diff --git a/tests/data/qa/manta/collated_versions.yml b/tests/data/qa/manta/collated_versions.yml new file mode 100644 index 0000000..7199083 --- /dev/null +++ b/tests/data/qa/manta/collated_versions.yml @@ -0,0 +1,14 @@ +"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:GERMLINE_VARIANT_MANTA:MERGE_MANTA_DIPLOID": + gatk4: 4.3.0.0 +"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:NORMAL_GATK4_RECALIBRATE:GATK4_APPLYBQSR": + gatk4: 4.3.0.0 +"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:NORMAL_SONG_SCORE_DOWNLOAD:scoreDn": + score-client: 5.8.1 +"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:GERMLINE_VARIANT_MANTA:MANTA_GERMLINE": + manta: 1.6.0 +"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:NORMAL_SONG_SCORE_DOWNLOAD:songGet": + song-client: 5.0.2 +"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:NORMAL_GATK4_RECALIBRATE:GATK4_BASERECALIBRATOR": + gatk4: 4.3.0.0 +"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:NORMAL_GATK4_RECALIBRATE:SAMTOOLS_INDEX": + samtools: 1.16.1 diff --git a/tests/data/qa/strelka/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json b/tests/data/qa/strelka/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json new file mode 100644 index 0000000..dc1d78a --- /dev/null +++ b/tests/data/qa/strelka/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json @@ -0,0 +1,117 @@ +{ + "analysisId" : "aaf8d346-c24f-493b-b8d3-46c24f393b92", + "studyId" : "TEST-PR", + "analysisState" : "PUBLISHED", + "analysisType" : { + "name" : "sequencing_alignment", + "version" : 15 + }, + "samples" : [ + { + "sampleId" : "SA610228", + "specimenId" : "SP210201", + "submitterSampleId" : "COLO-829-BL", + "sampleType" : "Total DNA", + "matchedNormalSubmitterSampleId" : null, + "specimen" : { + "specimenId" : "SP210201", + "donorId" : "DO250183", + "submitterSpecimenId" : "COLO-829-BL", + "tumourNormalDesignation" : "Normal", + "specimenTissueSource" : "Blood derived", + "specimenType" : "Normal" + }, + "donor" : { + "donorId" : "DO250183", + "studyId" : "TEST-PR", + "submitterDonorId" : "COLO-829", + "gender" : "Female" + } + } + ], + "files" : [ + { + "info" : { + "analysis_tools" : [ + "BWA-MEM", + "biobambam2:bammarkduplicates2" + ], + "data_category" : "Sequencing Reads" + }, + "objectId" : "581f8e5b-0601-5dde-8ace-78eef3a6db47", + "studyId" : "TEST-PR", + "analysisId" : "aaf8d346-c24f-493b-b8d3-46c24f393b92", + "fileName" : "test.paired_end.sorted.cram", + "fileType" : "CRAM", + "fileMd5sum" : "e0146aa3a5a196e4cb5593f545e95c31", + "fileSize" : 76810, + "fileAccess" : "controlled", + "dataType" : "Aligned Reads" + }, + { + "info" : { + "analysis_tools" : [ + "BWA-MEM", + "biobambam2:bammarkduplicates2" + ], + "data_category" : "Sequencing Reads" + }, + "objectId" : "57a23de6-044c-555b-8070-bd278d449a63", + "studyId" : "TEST-PR", + "analysisId" : "aaf8d346-c24f-493b-b8d3-46c24f393b92", + "fileName" : "test.paired_end.sorted.cram.crai", + "fileType" : "CRAI", + "fileMd5sum" : "31ffc1b11f53210e4ac0d55092de0652", + "fileSize" : 62, + "fileAccess" : "controlled", + "dataType" : "Aligned Reads Index" + } + ], + "createdAt" : "2023-04-18T21:30:04.11549", + "updatedAt" : "2023-04-18T21:32:47.73069", + "firstPublishedAt" : "2023-04-18T21:32:47.728311", + "publishedAt" : "2023-04-18T21:32:47.728311", + "analysisStateHistory" : [ + { + "initialState" : "UNPUBLISHED", + "updatedState" : "PUBLISHED", + "updatedAt" : "2023-04-18T21:32:47.728311" + } + ], + "experiment" : { + "experimental_strategy" : "WXS", + "platform" : "ILLUMINA", + "platform_model" : "HiSeq 2000", + "sequencing_center" : "EXT", + "sequencing_date" : "2014-12-12", + "submitter_sequencing_experiment_id" : "TEST_EXP" + }, + "read_group_count" : 1, + "read_groups" : [ + { + "file_r1" : "test.paired_end.sorted.cram", + "file_r2" : "test.paired_end.sorted.cram", + "insert_size" : 298, + "is_paired_end" : true, + "library_name" : "testN", + "platform_unit" : "1", + "read_length_r1" : 75, + "read_length_r2" : 75, + "sample_barcode" : null, + "submitter_read_group_id" : "1" + } + ], + "workflow" : { + "genome_build" : "GRCh38_hla_decoy_ebv", + "inputs" : [ + { + "analysis_type" : "sequencing_experiment", + "input_analysis_id" : "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b" + } + ], + "run_id" : "wes-d4f902992b3d443595d05dbf1490ab9e", + "session_id" : "9da55181-2fb8-443e-a82f-b8b8e33d9f2a", + "workflow_name" : "DNA Seq Alignment", + "workflow_version" : "1.9.0" + } +} diff --git a/tests/data/qa/strelka/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz b/tests/data/qa/strelka/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..c062997744dcca308be8488b4981a71683cde8be GIT binary patch literal 2162 zcmV-&2#xn2iwFb&00000{{{d;LjnL*2*p@kZ{kW4eRh6D>E>l-*BF06g6Fal34w_= zAq1H0NUPPTu?uL$?&fwk`M6j2zptuoGl68X%xa~5Knd>ds#B*R^QT<3d3#~pL)K3bbLIxyqaB1y1!obyPx@T)gKQg%y8-UQKyQp z4VR+(%WQ?PP%NcyAQv!a8dH|%8Wub^s4xS$F~AHk(A*$!4NHa0paQdE0(>w23(-8RkCND*X`MDH}oTp~jtX+a73Td9#s9~XG0p?MHR8UBT zkjs=qTAc+hh^ri#HXsFpwy5^~UCBqAvRzku1kEv*j}X0ac6j^st&_vsuS9Hl&a4#b zmCQ2&Dx`t6LXE=gn;Xe7(;BlorZ^L3IMchPN3x1RQ($JrHATG9ux45V%kyfv(u)RS ze#IUT7(A3Vx9q+4ZHCoSw(GT|B9+20E;KBpN--PGfQvP$G0!na4Ua3Ht{|10JhLIr zaqBAE_chKX&z|4TaSZ}U`Cv0#L#HQL5|~P%xi%;aJaV&osnfMJzVC9%4ZiJ=Rwz{^ zXP7~@K`;`S0QrC_XX`Z=OW;;}1fkkldbpWf_hz=PdwqAbDdwoU6LFNX<1E{Oj~RU7dBZe|IR2Ec1fxX7336c_S)y5cIqAORF;MpQav)2RTcW$^yI%c zt?&Vr#vhpOk88`pJD>-aWz4p1u;dIj8Z*nAZGn}>xLA5{DS&~HqEQQ`*yM&c?7n#} z)BCby$}Xg^Dnf0^U=OI+5~=M{is_7|210B0thY~4f4t@Xy)e!y^e+TvMO{|UT5(c< zv<~Zn`M(5nc}Or5WLcF4rnO8tGnkbXXzRlT!|qX(WlPlAu+|Dw%&3Uyk=?jY=3rL4 zuXH>$ii_oL?bNNIk>dmYs0@dQC-p4~-{Qxr;#hNQl>3ybt#plWr zvaGHDR$2$0((Oc|t))rCXcweep?c&Rq2PiSn}P)R#}-|Eo@u=)h`{>zT53*Xg8SpG zFAJgG{^zSWx4c(uy~q3h;p@xMx5H}rx(cpL+2J*9rN#eKS?}W-U)7j~Wl(Q+;*nGZ z%lYq^xiDTS%o;jnIcAXad(8Pt%B<|qbv89H8krVcU{<$=yYm6)HKu&QQyS>RD=gC* zOYjbs@rqHmL<2lSVfcch+P5@4+p{#tB{RDfQq5f?uB!Li!ONPu91U*lMRj&{bN=_< z-SAt%MdYs|R@?PUgoB*z72fEDl=ge72QOD`tJJ_0zF;cDH42I=f3}1oOIOvT;z93a zx7C|;1*~ki)(GA6Mlnmdkub-XLU7C3hMJ0I1y&6KY=3v{b!yj3Pvv6`e^6=n$8LW- z_>;1zm{=nG^T~bwmkZDEYZnUk^x^5Rk^AB4FM$_*{OIk{cYZnSZLh{&N!eRfWA_2e z`BD@p%=gF{p%jd!4r+yT-(WSCZgX@o+D;Si>E=RYP}eqf4x&QglHMOOo4|Q3Rjxr{ zDpj`Y=dwS!*j8}w9Z%&J>w_1lVe$p6$u+%xSRX$Q=6b&+zoFd8^%SnFQR`(--ZC<| z82rzmDqNsKk?xT``LbvJt+Y-H%9l1(?1We2ZyT}(8^I0WlOeTdH0#sQOfPgLL#Dp{ zs+5DmoEc6MmR0I1TpmomlvPB-uEMoc*2V&IDVFu?Ug+CCs&}K9j_y8>Z?-M)J^4&W zcNQW(nGMTfB@4Nh`Ex<@CW@t5S<7jWI5xzVbb%arBKITy`TiUnV86i$u8{GJU_ z6}0akQr$0d`RJ{mq0yML0vKDIW>MU3q+z^h#GPo~n4e}*BW}}!s1waQp_eLFK%=ou z&46jc>_k+cx3FIt4Z3M3R^OUN{xJL&`Mw`EV&C@zZ;qL_{GE(!M4v0rB`^wsf)u7( zJ%MF~6*OgY+r7xVuD0`~ZI;%HTe8u$Zitcv&#})8c0)gm{FZ-ubbP!;-lf%~*=&B% zsB{zcrrAe*Z{)f;M@e3a_G8J;x#6U3AuTVSi@w}VdBo@b0< zJH{4k#m$$D933BPxlt+Bfo&grqxqXl(G9(h7tk*d+Aw8iyuj|Y6mZR+0Zt(dlUAI> zAy6Dg$LF6XH`mViX6jt_oyo<(>0Qm7+b_KQ7hlad`Xn)9bfd=lZtW_CHlh?VdQpSu(s$hPPjn{y6E62T8A=^!oH? zl1$H%!7LeHIes%pqQp;v#7}%Ts6Kw;dwx>6s$C~h=ti!;Mb{1;&pCHyPH-T)R%?qc zJRDs}@WLcW0=NA5?%VLfQ`dLHlL}uLgptKJbVA=5_omZ5f})>*&`N?Zi3o)oT8aoq zqvdrHGMuzpMgD^by>(e^s8GS`mW_BL}- z*lt^0ojZ{q6Tsmif{G8v)$Sx=62^qh4P$~v{{>;vh&tQEB~cu>C*gNkQHNmloO8!_ zt_}qC6G;qO2{9HVv0Hw2b(91ruJ5+nHJaEbG*o;+`z`sL9_~5rByke7NIroZCmlCR zLdsOjYgPL3UEd9&EfaA-(k0*RgPHgVJkcptQ=GJuj@wGwZYzmgKMB2{&P;6a_@_tz o2C$e?u8tJ|03VA81ONa4009360763o02=@U00000000000113Nt^fc4 literal 0 HcmV?d00001 diff --git a/tests/data/qa/strelka/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi b/tests/data/qa/strelka/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi new file mode 100644 index 0000000000000000000000000000000000000000..0c846432a310f343296d3fbf6c942f45c16b3bfa GIT binary patch literal 105 zcmb2|=3rp}f&Xj_PR>jWJ`CK2pHfm%5)u-U5)v9N@&Lgpg{H>F#@@z8Ma#t}PH^bF n);z+nERxl+NI~Ejqte8q41!u~{saNdkVi92nt>T?Hi!TKK^_{u literal 0 HcmV?d00001 diff --git a/tests/data/qa/strelka/collated_versions.yml b/tests/data/qa/strelka/collated_versions.yml new file mode 100644 index 0000000..b6a800d --- /dev/null +++ b/tests/data/qa/strelka/collated_versions.yml @@ -0,0 +1,18 @@ +"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:GERMLINE_VARIANT_STRELKA:MERGE_STRELKA": + gatk4: 4.3.0.0 +"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:GERMLINE_VARIANT_STRELKA:STRELKA_GERMLINE": + strelka: 2.9.10 +"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:NORMAL_GATK4_RECALIBRATE:GATK4_APPLYBQSR": + gatk4: 4.3.0.0 +"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:NORMAL_SONG_SCORE_DOWNLOAD:scoreDn": + score-client: 5.8.1 +"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:GERMLINE_VARIANT_STRELKA:STRELKA_GERMLINE": + strelka: 2.9.10 +"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:GERMLINE_VARIANT_STRELKA:STRELKA_GERMLINE": + strelka: 2.9.10 +"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:NORMAL_SONG_SCORE_DOWNLOAD:songGet": + song-client: 5.0.2 +"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:NORMAL_GATK4_RECALIBRATE:GATK4_BASERECALIBRATOR": + gatk4: 4.3.0.0 +"NFCORE_ARGOGERMLINE:ARGOGERMLINE:MATCHED_GERMLINE_VARIANTS:NORMAL_GATK4_RECALIBRATE:SAMTOOLS_INDEX": + samtools: 1.16.1 diff --git a/tests/data/qa/tiddit/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json b/tests/data/qa/tiddit/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json new file mode 100644 index 0000000..dc1d78a --- /dev/null +++ b/tests/data/qa/tiddit/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json @@ -0,0 +1,117 @@ +{ + "analysisId" : "aaf8d346-c24f-493b-b8d3-46c24f393b92", + "studyId" : "TEST-PR", + "analysisState" : "PUBLISHED", + "analysisType" : { + "name" : "sequencing_alignment", + "version" : 15 + }, + "samples" : [ + { + "sampleId" : "SA610228", + "specimenId" : "SP210201", + "submitterSampleId" : "COLO-829-BL", + "sampleType" : "Total DNA", + "matchedNormalSubmitterSampleId" : null, + "specimen" : { + "specimenId" : "SP210201", + "donorId" : "DO250183", + "submitterSpecimenId" : "COLO-829-BL", + "tumourNormalDesignation" : "Normal", + "specimenTissueSource" : "Blood derived", + "specimenType" : "Normal" + }, + "donor" : { + "donorId" : "DO250183", + "studyId" : "TEST-PR", + "submitterDonorId" : "COLO-829", + "gender" : "Female" + } + } + ], + "files" : [ + { + "info" : { + "analysis_tools" : [ + "BWA-MEM", + "biobambam2:bammarkduplicates2" + ], + "data_category" : "Sequencing Reads" + }, + "objectId" : "581f8e5b-0601-5dde-8ace-78eef3a6db47", + "studyId" : "TEST-PR", + "analysisId" : "aaf8d346-c24f-493b-b8d3-46c24f393b92", + "fileName" : "test.paired_end.sorted.cram", + "fileType" : "CRAM", + "fileMd5sum" : "e0146aa3a5a196e4cb5593f545e95c31", + "fileSize" : 76810, + "fileAccess" : "controlled", + "dataType" : "Aligned Reads" + }, + { + "info" : { + "analysis_tools" : [ + "BWA-MEM", + "biobambam2:bammarkduplicates2" + ], + "data_category" : "Sequencing Reads" + }, + "objectId" : "57a23de6-044c-555b-8070-bd278d449a63", + "studyId" : "TEST-PR", + "analysisId" : "aaf8d346-c24f-493b-b8d3-46c24f393b92", + "fileName" : "test.paired_end.sorted.cram.crai", + "fileType" : "CRAI", + "fileMd5sum" : "31ffc1b11f53210e4ac0d55092de0652", + "fileSize" : 62, + "fileAccess" : "controlled", + "dataType" : "Aligned Reads Index" + } + ], + "createdAt" : "2023-04-18T21:30:04.11549", + "updatedAt" : "2023-04-18T21:32:47.73069", + "firstPublishedAt" : "2023-04-18T21:32:47.728311", + "publishedAt" : "2023-04-18T21:32:47.728311", + "analysisStateHistory" : [ + { + "initialState" : "UNPUBLISHED", + "updatedState" : "PUBLISHED", + "updatedAt" : "2023-04-18T21:32:47.728311" + } + ], + "experiment" : { + "experimental_strategy" : "WXS", + "platform" : "ILLUMINA", + "platform_model" : "HiSeq 2000", + "sequencing_center" : "EXT", + "sequencing_date" : "2014-12-12", + "submitter_sequencing_experiment_id" : "TEST_EXP" + }, + "read_group_count" : 1, + "read_groups" : [ + { + "file_r1" : "test.paired_end.sorted.cram", + "file_r2" : "test.paired_end.sorted.cram", + "insert_size" : 298, + "is_paired_end" : true, + "library_name" : "testN", + "platform_unit" : "1", + "read_length_r1" : 75, + "read_length_r2" : 75, + "sample_barcode" : null, + "submitter_read_group_id" : "1" + } + ], + "workflow" : { + "genome_build" : "GRCh38_hla_decoy_ebv", + "inputs" : [ + { + "analysis_type" : "sequencing_experiment", + "input_analysis_id" : "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b" + } + ], + "run_id" : "wes-d4f902992b3d443595d05dbf1490ab9e", + "session_id" : "9da55181-2fb8-443e-a82f-b8b8e33d9f2a", + "workflow_name" : "DNA Seq Alignment", + "workflow_version" : "1.9.0" + } +} diff --git a/tests/data/qa/tiddit/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz b/tests/data/qa/tiddit/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..f2820417168a15946418e7c9860f04ed225630dd GIT binary patch literal 1172 zcmV;F1Z(>riwFb&00000{{{d;LjnMI1g%zGbK*7>eI~y`le}a*F~K&FO{eY*U`UxE zkYGZmPdmcCfF`!&l>{mM^}X^(77R_*}2Qd8DhWC5vGxWJ0w!?+v!ix_xnV zrNl;t;LV1i?ZPJ*9 z^pxSxv(DJ>KOWl8D32f^(FfaxV@35E1?H=F=P1u~|?xQ;1(R>yG_Hv#tdC zN`Ob1Stz(>tIRT7OUJp1fv@!1Yc?7U+ZYVTgGsg=%x8Z*^>4`l}p-CP%S%7{6;AGk#AZJ7{kA%iCc9A-y84WrmiSq%;6li3n zlu4DrDXh6BSb|6yQVA=xBAqI9_kV-#R-kA7AB0w$G!@uqd`0vc2yCHRfZpukj5Z12 z>n6)R7B}z0LzHR%VK^C|mE%BDsWV3*ZO_Hg;Sk8o=8L_!kZZcf2F$30X=SnkB;) zF%=3K;dt!xz0*}0zG3wQ$_-t?aVTN2V&F~f-x!aO75UzW5-<>quw$ZGv(*|S*K3^J z=1;WPTY^*_2oj)`GW?$7=N=a?2#@ z2olOeH9t1V^}^gZQh~=e>D=V%^XVwg@ATs2S@hqidz1GZyE*qD7F1VTq>!&`YM{C~ zSRvs@kfY~HG$#-Tds=O@yA-D^vJ7y<7Lv+cps7{|_gzwY<2klnZ)Cqa+itmyW{VJW zc8~H~9Db`i?Yp*p=iIfOdwe2~KzV*sG$Q6XcXhknZna(4y>Gc_LV}sUw{F|_^_GKo zLAT{JKif{zMSOD|g)ajk->8hd0gV-)ySay95_y-m8zpaJ5z_c}!T7CaQG{J-S!!$k zURnBuDh0^`MaHrgG$E8OKSyq}ZG}#AX*KWNg|)yBR2 zk}6G!CC=NQyK#AO)%$Had8|E6g4)op;YF{8m%Q5ZOJ`KeMXYA-47I$TH4RFSFa8E3 m^xyrB3;+NhiwFb&00000{{{d;LjnLB00RI3000000002%m?ZE3 literal 0 HcmV?d00001 diff --git a/tests/data/qa/tiddit/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi b/tests/data/qa/tiddit/aaf8d346-c24f-493b-b8d3-46c24f393b92.vcf.gz.tbi new file mode 100644 index 0000000000000000000000000000000000000000..b9e0990fab2fca9dab7a72ca96023b80994bf088 GIT binary patch literal 72 zcmb2|=3rp}f&Xj_PR>jW+6>%file(it)} + pipeline_ch=Channel.of(file(params.pipeline_yml)) + + + ch_payload=analysis_ch.combine(files_ch.collect().toList()) + .map {analysis_json,files -> + [ + [ id : params.analysis_id, + study_id : params.study_id, + tool : params.tool + ], + files, analysis_json] + } + PAYLOAD_GERMLINEVARIANT( + ch_payload, + "", + "", + pipeline_ch, + params.tool + ) +} diff --git a/tests/modules/icgc-argo-workflows/payload/germlinevariant/nextflow.config b/tests/modules/icgc-argo-workflows/payload/germlinevariant/nextflow.config new file mode 100644 index 0000000..e60f059 --- /dev/null +++ b/tests/modules/icgc-argo-workflows/payload/germlinevariant/nextflow.config @@ -0,0 +1,6 @@ +manifest { + name = 'Germline variant calls' + mainScript = 'main.nf' + nextflowVersion = '!>=22.10.1' + version = '1.0dev' +} \ No newline at end of file diff --git a/tests/modules/icgc-argo-workflows/payload/germlinevariant/test.yml b/tests/modules/icgc-argo-workflows/payload/germlinevariant/test.yml new file mode 100644 index 0000000..6f0ac2a --- /dev/null +++ b/tests/modules/icgc-argo-workflows/payload/germlinevariant/test.yml @@ -0,0 +1,55 @@ +## TODO nf-core: Please run the following command to build this file: +# nf-core modules create-test-yml payload/germlinevariant +- name: "payload germlinevariant haplotypecaller" + command: nextflow run ./tests/modules/icgc-argo-workflows/payload/germlinevariant -entry test_payload_germlinevariant -c ./tests/config/nextflow.config -c ./tests/modules/icgc-argo-workflows/payload/germlinevariant/nextflow.config -profile docker,haplotypecaller_vcf + tags: + - "payload" + - "payload/germlinevariant" + - path: "output/variant_calling/haplotypecaller/*.json" + - path: "output/variant_calling/haplotypecaller/08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b/out/*.haplotypecaller.germline.snv.vcf.gz" + - path: "output/variant_calling/haplotypecaller/08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b/out/*.haplotypecaller.germline.snv.vcf.gz.tbi" +- name: "payload germlinevariant freebayes" + command: nextflow run ./tests/modules/icgc-argo-workflows/payload/germlinevariant -entry test_payload_germlinevariant -c ./tests/config/nextflow.config -c ./tests/modules/icgc-argo-workflows/payload/germlinevariant/nextflow.config -profile docker,freebayes_vcf + tags: + - "payload" + - "payload/germlinevariant" + files: + - path: "output/variant_calling/freebayes/*.json" + - path: "output/variant_calling/freebayes/08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b/out/*.freebayes.germline.snv.vcf.gz" + - path: "output/variant_calling/freebayes/08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b/out/*.freebayes.germline.snv.vcf.gz.tbi" +- name: "payload germlinevariant strelka" + command: nextflow run ./tests/modules/icgc-argo-workflows/payload/germlinevariant -entry test_payload_germlinevariant -c ./tests/config/nextflow.config -c ./tests/modules/icgc-argo-workflows/payload/germlinevariant/nextflow.config -profile docker,strelka_vcf + tags: + - "payload" + - "payload/germlinevariant" + files: + - path: "output/variant_calling/strelka/*.json" + - path: "output/variant_calling/strelka/08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b/out/*.strelka.germline.snv.vcf.gz" + - path: "output/variant_calling/strelka/08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b/out/*.strelka.germline.snv.vcf.gz.tbi" +- name: "payload germlinevariant manta" + command: nextflow run ./tests/modules/icgc-argo-workflows/payload/germlinevariant -entry test_payload_germlinevariant -c ./tests/config/nextflow.config -c ./tests/modules/icgc-argo-workflows/payload/germlinevariant/nextflow.config -profile docker,manta_vcf + tags: + - "payload" + - "payload/germlinevariant" + files: + - path: "output/variant_calling/manta/*.json" + - path: "output/variant_calling/manta/08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b/out/*.manta.germline.snv.vcf.gz" + - path: "output/variant_calling/manta/08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b/out/*.manta.germline.snv.vcf.gz.tbi" +- name: "payload germlinevariant deepvariant" + command: nextflow run ./tests/modules/icgc-argo-workflows/payload/germlinevariant -entry test_payload_germlinevariant -c ./tests/config/nextflow.config -c ./tests/modules/icgc-argo-workflows/payload/germlinevariant/nextflow.config -profile docker,deepvariant_vcf + tags: + - "payload" + - "payload/germlinevariant" + files: + - path: "output/variant_calling/deepvariant/*.json" + - path: "output/variant_calling/deepvariant/08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b/out/*.deepvariant.germline.snv.vcf.gz" + - path: "output/variant_calling/deepvariant/08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b/out/*.deepvariant.germline.snv.vcf.gz.tbi" +- name: "payload germlinevariant tiddit" + command: nextflow run ./tests/modules/icgc-argo-workflows/payload/germlinevariant -entry test_payload_germlinevariant -c ./tests/config/nextflow.config -c ./tests/modules/icgc-argo-workflows/payload/germlinevariant/nextflow.config -profile docker,tiddit_vcf + tags: + - "payload" + - "payload/germlinevariant" + files: + - path: "output/variant_calling/tiddit/*.json" + - path: "output/variant_calling/tiddit/08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b/out/*.tiddit.germline.snv.vcf.gz" + - path: "output/variant_calling/tiddit/08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b/out/*.tiddit.germline.snv.vcf.gz.tbi" \ No newline at end of file From c767733e11c830ad7f1931ce2b0cce1c6efa5602 Mon Sep 17 00:00:00 2001 From: edsu7 <22638361+edsu7@users.noreply.github.com> Date: Thu, 25 May 2023 19:03:02 -0400 Subject: [PATCH 2/4] update payload formating --- .../payload/germlinevariant/main.nf | 15 +- .../germlinevariant/resources/usr/bin/main.py | 257 ++++++++++-------- tests/config/test_data.config | 43 ++- .../payload/germlinevariant/main.nf | 18 +- 4 files changed, 194 insertions(+), 139 deletions(-) diff --git a/modules/icgc-argo-workflows/payload/germlinevariant/main.nf b/modules/icgc-argo-workflows/payload/germlinevariant/main.nf index 6e127c8..d08b238 100644 --- a/modules/icgc-argo-workflows/payload/germlinevariant/main.nf +++ b/modules/icgc-argo-workflows/payload/germlinevariant/main.nf @@ -10,14 +10,12 @@ process PAYLOAD_GERMLINEVARIANT { input: // input, make update as needed tuple val(meta), path(files_to_upload), path(metadata_analysis) - val genome_annotation - val genome_build path pipeline_yml - val tool + val tarball output: // output, make update as needed - tuple val(meta), path("*.payload.json"), path("out/*{vcf.gz,vcf.gz.tbi}"), emit: payload_files + tuple val(meta), path("*.payload.json"), path("out/*"), emit: payload_files path "versions.yml", emit: versions script: @@ -27,12 +25,13 @@ process PAYLOAD_GERMLINEVARIANT { main.py \ -f ${files_to_upload} \ -a ${metadata_analysis} \ - -g "${genome_annotation}" \ - -b "${genome_build}" \ - -w "DNA Seq Germline Workflow" \ + -b "${meta.genomeBuild}" \ + -w "DNA Seq Germline Variant Workflow" \ + -r ${workflow.runName} \ -s "${workflow.sessionId}" \ -v "${workflow.manifest.version}" \ - -t "${tool}" \ + -t "${meta.tool}" \ + -l "${tarball}" \ $arg_pipeline_yml cat <<-END_VERSIONS > versions.yml diff --git a/modules/icgc-argo-workflows/payload/germlinevariant/resources/usr/bin/main.py b/modules/icgc-argo-workflows/payload/germlinevariant/resources/usr/bin/main.py index a33e5c4..dcd4ca3 100755 --- a/modules/icgc-argo-workflows/payload/germlinevariant/resources/usr/bin/main.py +++ b/modules/icgc-argo-workflows/payload/germlinevariant/resources/usr/bin/main.py @@ -38,12 +38,11 @@ import csv import io import shutil -#LUCA-KR.DO231106.SA602282.wxs.20210112.gatk-mutect2.somatic.snv.open-filter.vcf.gz + workflow_process_map = { - 'DNA Seq Germline Workflow': 'snv' + 'DNA Seq Germline Variant Workflow': 'snv' } -tool_list = ['strelka'] def calculate_size(file_path): return os.stat(file_path).st_size @@ -56,7 +55,7 @@ def calculate_md5(file_path): md5.update(chunk) return md5.hexdigest() -def get_files_info(file_to_upload, date_str, analysis_dict, process_indicator,tool,new_dir): +def get_files_info(file_to_upload, date_str, analysis_dict, process_indicator,tool,new_dir,pipeline_info,tarball): file_info = { 'fileSize': calculate_size(file_to_upload), 'fileMd5sum': calculate_md5(file_to_upload), @@ -65,82 +64,106 @@ def get_files_info(file_to_upload, date_str, analysis_dict, process_indicator,to 'data_category': "Simple Nucleotide Variation", } } - ### deepvariant - if tool=="deepvariant": - if re.match(r'.*.vcf.gz$', file_to_upload): - file_type = 'VCF' - file_info.update({'dataType': 'Raw SNV Calls'}) - file_info['info'].update({'analysis_tools': ['DeepVariant']}) - elif re.match(r'.*.vcf.gz.tbi$', file_to_upload): - file_type = 'TBI' - file_info.update({'dataType': 'VCF Index'}) - file_info['info'].update({'analysis_tools': ['DeepVariant']}) - else: - sys.exit('Error: unknown QC metrics file: %s' % file_to_upload) - elif tool=="strelka": - if re.match(r'.*.vcf.gz$', file_to_upload): - file_type = 'VCF' - file_info.update({'dataType': 'Raw SNV Calls'}) - file_info['info'].update({'analysis_tools': ['Strelka']}) - elif re.match(r'.*.vcf.gz.tbi$', file_to_upload): - file_type = 'TBI' - file_info.update({'dataType': 'VCF Index'}) - file_info['info'].update({'analysis_tools': ['Strelka']}) - else: - sys.exit('Error: unknown QC metrics file: %s' % file_to_upload) - elif tool=="tiddit": - if re.match(r'.*.vcf.gz$', file_to_upload): - file_type = 'VCF' - file_info.update({'dataType': 'Raw SNV Calls'}) - file_info['info'].update({'analysis_tools': ['Tiddit']}) - elif re.match(r'.*.vcf.gz.tbi$', file_to_upload): - file_type = 'TBI' - file_info.update({'dataType': 'VCF Index'}) - file_info['info'].update({'analysis_tools': ['Tiddit']}) + + if tarball=="false": + if tool=="deepvariant": + if re.match(r'.*.vcf.gz$', file_to_upload): + file_type = 'VCF' + file_info.update({'dataType': 'Raw SNV Calls'}) + file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]}) + elif re.match(r'.*.vcf.gz.tbi$', file_to_upload): + file_type = 'TBI' + file_info.update({'dataType': 'VCF Index'}) + file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]}) + else: + sys.exit('Error: unknown QC metrics file: %s' % file_to_upload) + elif tool=="strelka": + if re.match(r'.*.vcf.gz$', file_to_upload): + file_type = 'VCF' + file_info.update({'dataType': 'Raw SNV Calls'}) + file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]}) + elif re.match(r'.*.vcf.gz.tbi$', file_to_upload): + file_type = 'TBI' + file_info.update({'dataType': 'VCF Index'}) + file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]}) + else: + sys.exit('Error: unknown QC metrics file: %s' % file_to_upload) + elif tool=="tiddit": + if re.match(r'.*.vcf.gz$', file_to_upload): + file_type = 'VCF' + file_info.update({'dataType': 'Raw SNV Calls'}) + file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]}) + elif re.match(r'.*.vcf.gz.tbi$', file_to_upload): + file_type = 'TBI' + file_info.update({'dataType': 'VCF Index'}) + file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]}) + else: + sys.exit('Error: unknown QC metrics file: %s' % file_to_upload) + elif tool=="haplotypecaller" : + if re.match(r'.*.vcf.gz$', file_to_upload): + file_type = 'VCF' + file_info.update({'dataType': 'Raw SNV Calls'}) + file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]}) + elif re.match(r'.*.vcf.gz.tbi$', file_to_upload): + file_type = 'TBI' + file_info.update({'dataType': 'VCF Index'}) + file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]}) + else: + sys.exit('Error: unknown QC metrics file: %s' % file_to_upload) + elif tool=="manta": + if re.match(r'.*.vcf.gz$', file_to_upload): + file_type = 'VCF' + file_info.update({'dataType': 'Raw SNV Calls'}) + file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]}) + elif re.match(r'.*.vcf.gz.tbi$', file_to_upload): + file_type = 'TBI' + file_info.update({'dataType': 'VCF Index'}) + file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]}) + else: + sys.exit('Error: unknown QC metrics file: %s' % file_to_upload) + elif tool=="freebayes": + if re.match(r'.*.vcf.gz$', file_to_upload): + file_type = 'VCF' + file_info.update({'dataType': 'Raw SNV Calls'}) + file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]}) + elif re.match(r'.*.vcf.gz.tbi$', file_to_upload): + file_type = 'TBI' + file_info.update({'dataType': 'VCF Index'}) + file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]}) + else: + sys.exit('Error: unknown QC metrics file: %s' % file_to_upload) + elif tool=="cnvkit": + if re.match(r'.*.vcf.gz$', file_to_upload): + file_type = 'VCF' + file_info.update({'dataType': 'Raw SNV Calls'}) + file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]}) + elif re.match(r'.*.vcf.gz.tbi$', file_to_upload): + file_type = 'TBI' + file_info.update({'dataType': 'VCF Index'}) + file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]}) + else: + sys.exit('Error: unknown QC metrics file: %s' % file_to_upload) else: sys.exit('Error: unknown QC metrics file: %s' % file_to_upload) - elif tool=="haplotypecaller": - if re.match(r'.*.vcf.gz$', file_to_upload): - file_type = 'VCF' - file_info.update({'dataType': 'Raw SNV Calls'}) - file_info['info'].update({'analysis_tools': ['haplotypecaller']}) - elif re.match(r'.*.vcf.gz.tbi$', file_to_upload): - file_type = 'TBI' - file_info.update({'dataType': 'VCF Index'}) - file_info['info'].update({'analysis_tools': ['haplotypecaller']}) - else: - sys.exit('Error: unknown QC metrics file: %s' % file_to_upload) - elif tool=="manta": - if re.match(r'.*.vcf.gz$', file_to_upload): - file_type = 'VCF' - file_info.update({'dataType': 'Raw SNV Calls'}) - file_info['info'].update({'analysis_tools': ['Manta']}) - elif re.match(r'.*.vcf.gz.tbi$', file_to_upload): - file_type = 'TBI' - file_info.update({'dataType': 'VCF Index'}) - file_info['info'].update({'analysis_tools': ['Manta']}) - else: - sys.exit('Error: unknown QC metrics file: %s' % file_to_upload) - elif tool=="freebayes": - if re.match(r'.*.vcf.gz$', file_to_upload): - file_type = 'VCF' - file_info.update({'dataType': 'Raw SNV Calls'}) - file_info['info'].update({'analysis_tools': ['Freebayes']}) - elif re.match(r'.*.vcf.gz.tbi$', file_to_upload): - file_type = 'TBI' - file_info.update({'dataType': 'VCF Index'}) - file_info['info'].update({'analysis_tools': ['Freebayes']}) - else: - sys.exit('Error: unknown QC metrics file: %s' % file_to_upload) - #elif tool=="cnvkit": + elif tarball=="true": + if tool=="cnvkit": + file_type = 'TGZ' + file_info.update({'dataType': "CNV Supplement"}) + + file_info['info']['files_in_tgz']=[] + with tarfile.open(file_to_upload, 'r') as tar: + for member in tar.getmembers(): + file_info['info']['files_in_tgz'].append(member.name) + else: sys.exit('Error: unknown QC metrics file: %s' % file_to_upload) - + #LUCA-KR.DO231106.SA602282.wxs.20210112.gatk-mutect2.somatic.snv.open-filter.vcf.gz.tbi" #"TEST-PR.DO250183.SA610228.wxs.20230501.snv-strelka.gvcf.gz", suffix={ "VCF":"vcf.gz", - "TBI": "vcf.gz.tbi", + "TBI":"vcf.gz.tbi", + "TGZ":"tgz" } # file naming patterns: # pattern: ....... @@ -154,27 +177,19 @@ def get_files_info(file_to_upload, date_str, analysis_dict, process_indicator,to date_str, process_indicator, suffix[file_type] - ]) - - file_info['fileName'] = new_fname - file_info['fileType'] = file_type + ]) - if re.match(r'cnvkit', file_to_upload): - with tarfile.open(file_to_upload, 'r') as tar: - for member in tar.getmembers(): - file_info['info']['files_in_tgz'].append(member.name) + new_dir = 'out' + try: + os.mkdir(new_dir) + except FileExistsError: + pass - new_dir = 'out' - try: - os.mkdir(new_dir) - except FileExistsError: - pass + dst = os.path.join(os.getcwd(), new_dir, new_fname) + os.symlink(os.path.abspath(file_to_upload), dst) - dst = os.path.join(os.getcwd(), new_dir, new_fname) - os.symlink(os.path.abspath(file_to_upload), dst) - else: - shutil.copyfile(os.path.realpath(file_to_upload),"/".join([new_dir,new_fname])) - ##os.symlink(os.path.realpath(file_to_upload),"/".join([new_dir,new_fname])) + file_info['fileName'] = new_fname + file_info['fileType'] = file_type return file_info @@ -198,7 +213,7 @@ def get_sample_info(sample_list): return samples -def prepare_tarball(sampleId, qc_files, tool_list): +def prepare_tarball(sampleId, qc_files, tool): tgz_dir = 'tarball' try: @@ -206,20 +221,16 @@ def prepare_tarball(sampleId, qc_files, tool_list): except FileExistsError: pass - files_to_tar = {} - for tool in tool_list: - if not tool in files_to_tar: files_to_tar[tool] = [] - for f in sorted(qc_files): - if tool in f: - files_to_tar[tool].append(f) - - for tool in tool_list: - if not files_to_tar[tool]: continue - tarfile_name = f"{tgz_dir}/{sampleId}.{tool}.tgz" - with tarfile.open(tarfile_name, "w:gz", dereference=True) as tar: - for f in files_to_tar[tool]: - tar.add(f, arcname=os.path.basename(f)) + files_to_tar=[] + for f in sorted(qc_files): + files_to_tar.append(f) + tarfile_name = f"{tgz_dir}/{sampleId}.{tool}.tgz" + with tarfile.open(tarfile_name, "w:gz", dereference=True) as tar: + for f in files_to_tar: + tar.add(f, arcname=os.path.basename(f)) + + return(tarfile_name) def main(): """ Python implementation of tool: payload-gen-qc @@ -234,8 +245,10 @@ def main(): parser.add_argument("-b", "--genome_build", dest="genome_build", default="", help="Genome build") parser.add_argument("-w", "--wf-name", dest="wf_name", required=True, help="Workflow name") parser.add_argument("-s", "--wf-session", dest="wf_session", required=True, help="workflow session ID") + parser.add_argument("-r", "--wf-run", dest="wf_run", required=True, help="workflow run ID") parser.add_argument("-v", "--wf-version", dest="wf_version", required=True, help="Workflow version") parser.add_argument("-p", "--pipeline_yml", dest="pipeline_yml", required=False, help="Pipeline info in yaml") + parser.add_argument("-l", "--tarball", dest="tarball", required=True,default="false", help="Tarball files") parser.add_argument("-t", "--tool", dest="tool", required=True,type=str, help="Tool used for variant calling", choices=['strelka','cnvkit','deepvariant','tiddit','manta','haplotypecaller','freebayes'] ) @@ -254,33 +267,38 @@ def main(): 'analysisType': { 'name': 'variant_processing' }, + "variant_class":"Germline", 'studyId': analysis_dict.get('studyId'), - 'info': {}, 'workflow': { - 'workflow_name': args.wf_name, + 'workflow_name': "%s-%s" % (args.wf_name,args.tool), 'workflow_version': args.wf_version, 'session_id': args.wf_session, + 'genome_build': args.genome_build, + 'run_id': args.wf_run, + "workflow_short_name": "%s-%s" % (args.wf_name.replace("DNA Seq","").replace("Workflow","").replace(" ",""),args.tool), 'inputs': [ { 'analysis_type': analysis_dict['analysisType']['name'], - 'input_analysis_id': analysis_dict.get('analysisId') + 'normal_analysis_id': analysis_dict.get('analysisId') } ], - 'info': pipeline_info }, 'files': [], 'experiment': analysis_dict.get('experiment'), 'samples': get_sample_info(analysis_dict.get('samples')) } + + for key in ['platform_model',"sequencing_center","sequencing_date","submitter_sequencing_experiment_id"]: + if payload['experiment'].get(key): + payload['experiment'].pop(key) + if args.genome_build: payload['workflow']['genome_build'] = args.genome_build if args.genome_annotation: payload['workflow']['genome_annotation'] = args.genome_annotation # pass `info` dict from seq_experiment payload to new payload - if 'info' in analysis_dict and isinstance(analysis_dict['info'], dict): - payload['info'] = analysis_dict['info'] - else: + if 'info' in analysis_dict.keys(): payload.pop('info') if 'library_strategy' in payload['experiment']: @@ -296,13 +314,18 @@ def main(): # generate date string date_str = date.today().strftime("%Y%m%d") - # prepare tarball to include all QC files generated by one tool - ##prepare_tarball(analysis_dict['samples'][0]['sampleId'], args.files_to_upload, tool_list) - process_indicator = ".".join([args.tool,"germline",workflow_process_map.get(args.wf_name)]) - for f in sorted(args.files_to_upload): - file_info = get_files_info(f, date_str, analysis_dict, process_indicator,args.tool,new_dir) - payload['files'].append(file_info) + # prepare tarball to include all QC files generated by one tool + if args.tarball=="true": + process_indicator = ".".join([args.tool,"germline",args.tool+"-"+"supplement"]) + tarball_file=prepare_tarball(analysis_dict['samples'][0]['sampleId'], args.files_to_upload, args.tool) + file_info = get_files_info(tarball_file, date_str, analysis_dict, process_indicator,args.tool,new_dir,pipeline_info,args.tarball) + payload['files'].append(file_info) + elif args.tarball=="false": + process_indicator = ".".join([args.tool,"germline",workflow_process_map.get(args.wf_name)]) + for f in sorted(args.files_to_upload): + file_info = get_files_info(f, date_str, analysis_dict, process_indicator,args.tool,new_dir,pipeline_info,args.tarball) + payload['files'].append(file_info) with open("%s.%s.payload.json" % (str(uuid.uuid4()), args.wf_name.replace(" ","_")), 'w') as f: f.write(json.dumps(payload, indent=2)) diff --git a/tests/config/test_data.config b/tests/config/test_data.config index e16a301..ee6b710 100644 --- a/tests/config/test_data.config +++ b/tests/config/test_data.config @@ -1,6 +1,5 @@ // Base directory for test data in RDPC QA // Should be ="../data" - params { test_data { 'rdpc_qa' { @@ -37,7 +36,12 @@ params { pipeline_yml="${params.test_data_base}/tiddit/collated_versions.yml" tool="deepvariant" study_id = "TEST-PR" - analysis_id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b" + id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b" + gender = "Male" + experimentalStrategy : "WGS" + genomeBuild : "GRCh38_hla_decoy_ebv" + tumourNormalDesignation : "Normal" + sampleType : "Total DNA" } freebayes_vcf { metadata_analysis="${params.test_data_base}/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json" @@ -45,7 +49,12 @@ params { pipeline_yml="${params.test_data_base}/tiddit/collated_versions.yml" tool="freebayes" study_id = "TEST-PR" - analysis_id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b" + id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b" + gender = "Male" + experimentalStrategy : "WGS" + genomeBuild : "GRCh38_hla_decoy_ebv" + tumourNormalDesignation : "Normal" + sampleType : "Total DNA" } haplotypecaller_vcf { metadata_analysis="${params.test_data_base}/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json" @@ -53,7 +62,12 @@ params { pipeline_yml="${params.test_data_base}/tiddit/collated_versions.yml" tool="haplotypecaller" study_id = "TEST-PR" - analysis_id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b" + id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b" + gender = "Male" + experimentalStrategy : "WGS" + genomeBuild : "GRCh38_hla_decoy_ebv" + tumourNormalDesignation : "Normal" + sampleType : "Total DNA" } manta_vcf { metadata_analysis="${params.test_data_base}/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json" @@ -61,7 +75,12 @@ params { pipeline_yml="${params.test_data_base}/tiddit/collated_versions.yml" tool="manta" study_id = "TEST-PR" - analysis_id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b" + id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b" + gender = "Male" + experimentalStrategy : "WGS" + genomeBuild : "GRCh38_hla_decoy_ebv" + tumourNormalDesignation : "Normal" + sampleType : "Total DNA" } strelka_vcf { metadata_analysis="${params.test_data_base}/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json" @@ -69,7 +88,12 @@ params { pipeline_yml="${params.test_data_base}/tiddit/collated_versions.yml" tool="strelka" study_id = "TEST-PR" - analysis_id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b" + id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b" + gender = "Male" + experimentalStrategy : "WGS" + genomeBuild : "GRCh38_hla_decoy_ebv" + tumourNormalDesignation : "Normal" + sampleType : "Total DNA" } tiddit_vcf { metadata_analysis="${params.test_data_base}/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json" @@ -77,7 +101,12 @@ params { pipeline_yml="${params.test_data_base}/tiddit/collated_versions.yml" tool="tiddit" study_id = "TEST-PR" - analysis_id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b" + id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b" + gender = "Male" + experimentalStrategy : "WGS" + genomeBuild : "GRCh38_hla_decoy_ebv" + tumourNormalDesignation : "Normal" + sampleType : "Total DNA" } } } \ No newline at end of file diff --git a/tests/modules/icgc-argo-workflows/payload/germlinevariant/main.nf b/tests/modules/icgc-argo-workflows/payload/germlinevariant/main.nf index 62cc5b9..3ad0a6a 100644 --- a/tests/modules/icgc-argo-workflows/payload/germlinevariant/main.nf +++ b/tests/modules/icgc-argo-workflows/payload/germlinevariant/main.nf @@ -13,17 +13,21 @@ workflow test_payload_germlinevariant { ch_payload=analysis_ch.combine(files_ch.collect().toList()) .map {analysis_json,files -> [ - [ id : params.analysis_id, - study_id : params.study_id, - tool : params.tool - ], + [ + id : params.id, + experimentalStrategy : params.experimentalStrategy, + genomeBuild : params.genomeBuild, + tumourNormalDesignation : params.tumourNormalDesignation, + sampleType : params.sampleType, + gender : params.gender, + study_id : params.study_id, + tool : params.tool + ], files, analysis_json] } PAYLOAD_GERMLINEVARIANT( ch_payload, - "", - "", pipeline_ch, - params.tool + false ) } From e86f34c62dea82268ceaefa554d7d8c3ab13727f Mon Sep 17 00:00:00 2001 From: edsu7 <22638361+edsu7@users.noreply.github.com> Date: Tue, 6 Jun 2023 17:37:02 -0400 Subject: [PATCH 3/4] update payloads --- .../payload/germlinevariant/main.nf | 1 + .../germlinevariant/resources/usr/bin/main.py | 31 +++++----- tests/config/test_data.config | 56 ++++++++++--------- .../payload/germlinevariant/main.nf | 3 +- 4 files changed, 47 insertions(+), 44 deletions(-) diff --git a/modules/icgc-argo-workflows/payload/germlinevariant/main.nf b/modules/icgc-argo-workflows/payload/germlinevariant/main.nf index d08b238..7752022 100644 --- a/modules/icgc-argo-workflows/payload/germlinevariant/main.nf +++ b/modules/icgc-argo-workflows/payload/germlinevariant/main.nf @@ -32,6 +32,7 @@ process PAYLOAD_GERMLINEVARIANT { -v "${workflow.manifest.version}" \ -t "${meta.tool}" \ -l "${tarball}" \ + -d "${meta.dataType}" \ $arg_pipeline_yml cat <<-END_VERSIONS > versions.yml diff --git a/modules/icgc-argo-workflows/payload/germlinevariant/resources/usr/bin/main.py b/modules/icgc-argo-workflows/payload/germlinevariant/resources/usr/bin/main.py index dcd4ca3..dea2199 100755 --- a/modules/icgc-argo-workflows/payload/germlinevariant/resources/usr/bin/main.py +++ b/modules/icgc-argo-workflows/payload/germlinevariant/resources/usr/bin/main.py @@ -39,11 +39,6 @@ import io import shutil -workflow_process_map = { - 'DNA Seq Germline Variant Workflow': 'snv' -} - - def calculate_size(file_path): return os.stat(file_path).st_size @@ -55,7 +50,7 @@ def calculate_md5(file_path): md5.update(chunk) return md5.hexdigest() -def get_files_info(file_to_upload, date_str, analysis_dict, process_indicator,tool,new_dir,pipeline_info,tarball): +def get_files_info(file_to_upload, date_str, analysis_dict, process_indicator,tool,new_dir,pipeline_info,tarball,data_type): file_info = { 'fileSize': calculate_size(file_to_upload), 'fileMd5sum': calculate_md5(file_to_upload), @@ -69,7 +64,7 @@ def get_files_info(file_to_upload, date_str, analysis_dict, process_indicator,to if tool=="deepvariant": if re.match(r'.*.vcf.gz$', file_to_upload): file_type = 'VCF' - file_info.update({'dataType': 'Raw SNV Calls'}) + file_info.update({'dataType': 'Raw %s Calls' % data_type}) file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]}) elif re.match(r'.*.vcf.gz.tbi$', file_to_upload): file_type = 'TBI' @@ -80,7 +75,7 @@ def get_files_info(file_to_upload, date_str, analysis_dict, process_indicator,to elif tool=="strelka": if re.match(r'.*.vcf.gz$', file_to_upload): file_type = 'VCF' - file_info.update({'dataType': 'Raw SNV Calls'}) + file_info.update({'dataType': 'Raw %s Calls' % data_type}) file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]}) elif re.match(r'.*.vcf.gz.tbi$', file_to_upload): file_type = 'TBI' @@ -91,7 +86,7 @@ def get_files_info(file_to_upload, date_str, analysis_dict, process_indicator,to elif tool=="tiddit": if re.match(r'.*.vcf.gz$', file_to_upload): file_type = 'VCF' - file_info.update({'dataType': 'Raw SNV Calls'}) + file_info.update({'dataType': 'Raw %s Calls' % data_type}) file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]}) elif re.match(r'.*.vcf.gz.tbi$', file_to_upload): file_type = 'TBI' @@ -102,7 +97,7 @@ def get_files_info(file_to_upload, date_str, analysis_dict, process_indicator,to elif tool=="haplotypecaller" : if re.match(r'.*.vcf.gz$', file_to_upload): file_type = 'VCF' - file_info.update({'dataType': 'Raw SNV Calls'}) + file_info.update({'dataType': 'Raw %s Calls' % data_type}) file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]}) elif re.match(r'.*.vcf.gz.tbi$', file_to_upload): file_type = 'TBI' @@ -113,7 +108,7 @@ def get_files_info(file_to_upload, date_str, analysis_dict, process_indicator,to elif tool=="manta": if re.match(r'.*.vcf.gz$', file_to_upload): file_type = 'VCF' - file_info.update({'dataType': 'Raw SNV Calls'}) + file_info.update({'dataType': 'Raw %s Calls' % data_type}) file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]}) elif re.match(r'.*.vcf.gz.tbi$', file_to_upload): file_type = 'TBI' @@ -124,7 +119,7 @@ def get_files_info(file_to_upload, date_str, analysis_dict, process_indicator,to elif tool=="freebayes": if re.match(r'.*.vcf.gz$', file_to_upload): file_type = 'VCF' - file_info.update({'dataType': 'Raw SNV Calls'}) + file_info.update({'dataType': 'Raw %s Calls' % data_type}) file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]}) elif re.match(r'.*.vcf.gz.tbi$', file_to_upload): file_type = 'TBI' @@ -135,7 +130,7 @@ def get_files_info(file_to_upload, date_str, analysis_dict, process_indicator,to elif tool=="cnvkit": if re.match(r'.*.vcf.gz$', file_to_upload): file_type = 'VCF' - file_info.update({'dataType': 'Raw SNV Calls'}) + file_info.update({'dataType': 'Raw %s Calls' % data_type}) file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]}) elif re.match(r'.*.vcf.gz.tbi$', file_to_upload): file_type = 'TBI' @@ -250,8 +245,8 @@ def main(): parser.add_argument("-p", "--pipeline_yml", dest="pipeline_yml", required=False, help="Pipeline info in yaml") parser.add_argument("-l", "--tarball", dest="tarball", required=True,default="false", help="Tarball files") parser.add_argument("-t", "--tool", dest="tool", required=True,type=str, help="Tool used for variant calling", - choices=['strelka','cnvkit','deepvariant','tiddit','manta','haplotypecaller','freebayes'] - ) + choices=['strelka','cnvkit','deepvariant','tiddit','manta','haplotypecaller','freebayes']) + parser.add_argument("-d", "--data-type", dest="data_type", required=True,type=str, help="Data type for upload",choices=['InDel',"SNV","CNV"]) args = parser.parse_args() @@ -319,12 +314,12 @@ def main(): if args.tarball=="true": process_indicator = ".".join([args.tool,"germline",args.tool+"-"+"supplement"]) tarball_file=prepare_tarball(analysis_dict['samples'][0]['sampleId'], args.files_to_upload, args.tool) - file_info = get_files_info(tarball_file, date_str, analysis_dict, process_indicator,args.tool,new_dir,pipeline_info,args.tarball) + file_info = get_files_info(tarball_file, date_str, analysis_dict, process_indicator,args.tool,new_dir,pipeline_info,args.tarball,args.data_type) payload['files'].append(file_info) elif args.tarball=="false": - process_indicator = ".".join([args.tool,"germline",workflow_process_map.get(args.wf_name)]) + process_indicator = ".".join([args.tool,"germline",args.data_type.lower()]) for f in sorted(args.files_to_upload): - file_info = get_files_info(f, date_str, analysis_dict, process_indicator,args.tool,new_dir,pipeline_info,args.tarball) + file_info = get_files_info(f, date_str, analysis_dict, process_indicator,args.tool,new_dir,pipeline_info,args.tarball,args.data_type) payload['files'].append(file_info) with open("%s.%s.payload.json" % (str(uuid.uuid4()), args.wf_name.replace(" ","_")), 'w') as f: diff --git a/tests/config/test_data.config b/tests/config/test_data.config index ee6b710..bc9aff4 100644 --- a/tests/config/test_data.config +++ b/tests/config/test_data.config @@ -38,10 +38,11 @@ params { study_id = "TEST-PR" id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b" gender = "Male" - experimentalStrategy : "WGS" - genomeBuild : "GRCh38_hla_decoy_ebv" - tumourNormalDesignation : "Normal" - sampleType : "Total DNA" + experimentalStrategy = "WGS" + genomeBuild = "GRCh38_hla_decoy_ebv" + tumourNormalDesignation = "Normal" + sampleType = "Total DNA" + dataType = "SNV" } freebayes_vcf { metadata_analysis="${params.test_data_base}/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json" @@ -51,10 +52,11 @@ params { study_id = "TEST-PR" id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b" gender = "Male" - experimentalStrategy : "WGS" - genomeBuild : "GRCh38_hla_decoy_ebv" - tumourNormalDesignation : "Normal" - sampleType : "Total DNA" + experimentalStrategy = "WGS" + genomeBuild = "GRCh38_hla_decoy_ebv" + tumourNormalDesignation = "Normal" + sampleType = "Total DNA" + dataType = "SNV" } haplotypecaller_vcf { metadata_analysis="${params.test_data_base}/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json" @@ -64,10 +66,11 @@ params { study_id = "TEST-PR" id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b" gender = "Male" - experimentalStrategy : "WGS" - genomeBuild : "GRCh38_hla_decoy_ebv" - tumourNormalDesignation : "Normal" - sampleType : "Total DNA" + experimentalStrategy = "WGS" + genomeBuild = "GRCh38_hla_decoy_ebv" + tumourNormalDesignation = "Normal" + sampleType = "Total DNA" + dataType = "SNV" } manta_vcf { metadata_analysis="${params.test_data_base}/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json" @@ -77,10 +80,11 @@ params { study_id = "TEST-PR" id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b" gender = "Male" - experimentalStrategy : "WGS" - genomeBuild : "GRCh38_hla_decoy_ebv" - tumourNormalDesignation : "Normal" - sampleType : "Total DNA" + experimentalStrategy = "WGS" + genomeBuild = "GRCh38_hla_decoy_ebv" + tumourNormalDesignation = "Normal" + sampleType = "Total DNA" + dataType = "SNV" } strelka_vcf { metadata_analysis="${params.test_data_base}/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json" @@ -90,10 +94,11 @@ params { study_id = "TEST-PR" id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b" gender = "Male" - experimentalStrategy : "WGS" - genomeBuild : "GRCh38_hla_decoy_ebv" - tumourNormalDesignation : "Normal" - sampleType : "Total DNA" + experimentalStrategy = "WGS" + genomeBuild = "GRCh38_hla_decoy_ebv" + tumourNormalDesignation = "Normal" + sampleType = "Total DNA" + dataType = "SNV" } tiddit_vcf { metadata_analysis="${params.test_data_base}/deepvariant/aaf8d346-c24f-493b-b8d3-46c24f393b92.analysis.json" @@ -103,10 +108,11 @@ params { study_id = "TEST-PR" id = "08e7c5b1-b3af-4e20-a7c5-b1b3af2e206b" gender = "Male" - experimentalStrategy : "WGS" - genomeBuild : "GRCh38_hla_decoy_ebv" - tumourNormalDesignation : "Normal" - sampleType : "Total DNA" + experimentalStrategy = "WGS" + genomeBuild = "GRCh38_hla_decoy_ebv" + tumourNormalDesignation = "Normal" + sampleType = "Total DNA" + dataType = "SNV" } } -} \ No newline at end of file +} diff --git a/tests/modules/icgc-argo-workflows/payload/germlinevariant/main.nf b/tests/modules/icgc-argo-workflows/payload/germlinevariant/main.nf index 3ad0a6a..fcce522 100644 --- a/tests/modules/icgc-argo-workflows/payload/germlinevariant/main.nf +++ b/tests/modules/icgc-argo-workflows/payload/germlinevariant/main.nf @@ -21,7 +21,8 @@ workflow test_payload_germlinevariant { sampleType : params.sampleType, gender : params.gender, study_id : params.study_id, - tool : params.tool + tool : params.tool, + dataType : params.dataType ], files, analysis_json] } From 78c534810d5236110062b8d59b1f121bf961cb03 Mon Sep 17 00:00:00 2001 From: edsu7 <22638361+edsu7@users.noreply.github.com> Date: Fri, 30 Jun 2023 13:31:10 -0400 Subject: [PATCH 4/4] add mpileup --- .../germlinevariant/resources/usr/bin/main.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/modules/icgc-argo-workflows/payload/germlinevariant/resources/usr/bin/main.py b/modules/icgc-argo-workflows/payload/germlinevariant/resources/usr/bin/main.py index dea2199..94b1c24 100755 --- a/modules/icgc-argo-workflows/payload/germlinevariant/resources/usr/bin/main.py +++ b/modules/icgc-argo-workflows/payload/germlinevariant/resources/usr/bin/main.py @@ -127,6 +127,17 @@ def get_files_info(file_to_upload, date_str, analysis_dict, process_indicator,to file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]}) else: sys.exit('Error: unknown QC metrics file: %s' % file_to_upload) + elif tool=="mpileup": + if re.match(r'.*.vcf.gz$', file_to_upload): + file_type = 'VCF' + file_info.update({'dataType': 'Raw %s Calls' % data_type}) + file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]}) + elif re.match(r'.*.vcf.gz.tbi$', file_to_upload): + file_type = 'TBI' + file_info.update({'dataType': 'VCF Index'}) + file_info['info'].update({'analysis_tools': [{key.split(":")[-1]:pipeline_info[key]} for key in pipeline_info.keys()]}) + else: + sys.exit('Error: unknown QC metrics file: %s' % file_to_upload) elif tool=="cnvkit": if re.match(r'.*.vcf.gz$', file_to_upload): file_type = 'VCF' @@ -245,7 +256,7 @@ def main(): parser.add_argument("-p", "--pipeline_yml", dest="pipeline_yml", required=False, help="Pipeline info in yaml") parser.add_argument("-l", "--tarball", dest="tarball", required=True,default="false", help="Tarball files") parser.add_argument("-t", "--tool", dest="tool", required=True,type=str, help="Tool used for variant calling", - choices=['strelka','cnvkit','deepvariant','tiddit','manta','haplotypecaller','freebayes']) + choices=['strelka','cnvkit','deepvariant','tiddit','manta','haplotypecaller','freebayes','mpileup']) parser.add_argument("-d", "--data-type", dest="data_type", required=True,type=str, help="Data type for upload",choices=['InDel',"SNV","CNV"]) args = parser.parse_args()