From 5ef76b23d0d593c33ae354a1e658cef4553f6a02 Mon Sep 17 00:00:00 2001 From: apaul7 Date: Thu, 15 Jul 2021 08:55:48 -0500 Subject: [PATCH 01/35] add input to control full output filename --- definitions/tools/replace_vcf_sample_name.cwl | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/definitions/tools/replace_vcf_sample_name.cwl b/definitions/tools/replace_vcf_sample_name.cwl index 6c1e82fa..a584c2bc 100644 --- a/definitions/tools/replace_vcf_sample_name.cwl +++ b/definitions/tools/replace_vcf_sample_name.cwl @@ -16,8 +16,6 @@ requirements: entry: | #!/bin/bash set -eou pipefail - basen=`basename "$3"` - basen="renamed.$basen" #escape spaces, otherwise bcftools will try to use them as a delimiter #triple backslash to escape within backticks and then again within sed @@ -25,7 +23,7 @@ requirements: new_name=`echo "$2" | sed 's/ /\\\ /g'` echo "$old_name $new_name" > sample_update.txt - /opt/bcftools/bin/bcftools reheader -s sample_update.txt -o "$basen" "$3" + /opt/bcftools/bin/bcftools reheader -s sample_update.txt -o "$4" "$3" inputs: input_vcf: @@ -43,9 +41,14 @@ inputs: inputBinding: position: 2 doc: "Sample name to replace the other" - + output_name: + type: string? + inputBinding: + position: 4 + default: "renamed.$(inputs.input_vcf.basename)" + doc: "output filename for vcf" outputs: renamed_vcf: type: File outputBinding: - glob: $("renamed." + inputs.input_vcf.basename) + glob: "$(inputs.output_name)" From 15db8c7904d56466e6b48ec043f929b7255f5ab8 Mon Sep 17 00:00:00 2001 From: apaul7 Date: Thu, 15 Jul 2021 08:58:30 -0500 Subject: [PATCH 02/35] add minimum confidence input for gatk calls --- definitions/tools/gatk_genotypegvcfs.cwl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/definitions/tools/gatk_genotypegvcfs.cwl b/definitions/tools/gatk_genotypegvcfs.cwl index 23c4f1b6..258fd42d 100644 --- a/definitions/tools/gatk_genotypegvcfs.cwl +++ b/definitions/tools/gatk_genotypegvcfs.cwl @@ -44,6 +44,12 @@ inputs: prefix: "-L" inputBinding: position: 4 + min_conf_call: + type: float? + inputBinding: + prefix: "-stand-call-conf" + position: 5 + doc: "The minimum phred-scaled confidence threshold at which variants should be called" outputs: genotype_vcf: type: File From cc85818d7d3dfcb135e0365041b740586375e61d Mon Sep 17 00:00:00 2001 From: apaul7 Date: Thu, 15 Jul 2021 09:15:31 -0500 Subject: [PATCH 03/35] s/all_cds/no_cds/ update input name to be clear that the no_cds filter does not run the coding sequences filter --- definitions/subworkflows/merge_svs.cwl | 3 +++ definitions/tools/annotsv_filter.cwl | 10 +++++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/definitions/subworkflows/merge_svs.cwl b/definitions/subworkflows/merge_svs.cwl index 02373d00..1f6585a6 100644 --- a/definitions/subworkflows/merge_svs.cwl +++ b/definitions/subworkflows/merge_svs.cwl @@ -32,6 +32,8 @@ inputs: type: File[] blocklist_bedpe: type: File? + filter_no_CDS: + type: boolean? outputs: bcftools_merged_sv_vcf: type: File @@ -123,5 +125,6 @@ steps: annotsv_tsv: bcftools_annotate_variants/sv_variants_tsv filtering_frequency: default: "0.05" + no_CDS: filter_no_CDS out: [filtered_tsv] diff --git a/definitions/tools/annotsv_filter.cwl b/definitions/tools/annotsv_filter.cwl index 85a16272..9ed40a43 100644 --- a/definitions/tools/annotsv_filter.cwl +++ b/definitions/tools/annotsv_filter.cwl @@ -23,14 +23,14 @@ requirements: parser.add_argument('--input', '-i', dest="input", help='input AnnotSV tsv file', required=True, action="store") parser.add_argument('--output', '-o', dest="output", help='output tsv file name', required=True, action="store") parser.add_argument('--filtering_frequency', dest="filtering_frequency", help="frequency to filter with", action="store", type=float, default="0.05") - parser.add_argument('--all-CDS', dest="CDS", help="Do not require a positive CoDing Sequence overlap", action="store_true") + parser.add_argument('--no-CDS', dest="CDS", help="Do not require a positive CoDing Sequence overlap", action="store_true") parser.add_argument('--ignore-pass-filter', dest="filter", help="Do not require calls to have a PASS filter", action="store_true") args = parser.parse_args() input_file_name = args.input output_file_name = args.output filtering_frequency = args.filtering_frequency - all_cds = args.CDS + no_cds = args.CDS ignore_pass_filter = args.filter with open(input_file_name, 'r') as file_in, open(output_file_name, 'w') as file_out: @@ -43,7 +43,7 @@ requirements: total_sv_count += 1 if(row['AnnotSV type'] == 'split' \ and (row['FILTER'] == 'PASS' or ignore_pass_filter) \ - and (int(row['CDS length']) > 0 or all_cds) \ + and (int(row['CDS length']) > 0 or no_cds) \ and float(row['IMH_AF']) < filtering_frequency and float(row['1000g_max_AF']) < filtering_frequency and not(float(row['DGV_LOSS_Frequency']) > filtering_frequency and 'DEL' in row['SV type']) @@ -55,11 +55,11 @@ requirements: print("total sv passed count:",pass_sv_count) inputs: - all_CDS: + no_CDS: type: boolean? inputBinding: position: 1 - prefix: "--all-CDS" + prefix: "--no-CDS" annotsv_tsv: type: File inputBinding: From 9e8876d043191ef1f2761b5622bec916674d53ea Mon Sep 17 00:00:00 2001 From: apaul7 Date: Thu, 15 Jul 2021 09:25:31 -0500 Subject: [PATCH 04/35] add survivor merged annotsv tsv filtering allows filtering of survivor merged annotsv tsv. Also allow control over the population allele frequency value, still defaults to 0.05. --- definitions/subworkflows/merge_svs.cwl | 28 +++++++++++++++++++++++--- definitions/tools/annotsv_filter.cwl | 14 ++++++++++--- 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/definitions/subworkflows/merge_svs.cwl b/definitions/subworkflows/merge_svs.cwl index 1f6585a6..464529e6 100644 --- a/definitions/subworkflows/merge_svs.cwl +++ b/definitions/subworkflows/merge_svs.cwl @@ -32,6 +32,9 @@ inputs: type: File[] blocklist_bedpe: type: File? + filter_pop_af: + type: double? + default: "0.05" filter_no_CDS: type: boolean? outputs: @@ -50,6 +53,9 @@ outputs: survivor_merged_annotated_tsv: type: File outputSource: survivor_annotate_variants/sv_variants_tsv + survivor_merged_filtered_annotated_tsv: + type: File + outputSource: survivor_annotsv_filter/filtered_tsv steps: survivor_merge_sv_vcfs: run: ../tools/survivor.cwl @@ -86,6 +92,18 @@ steps: valueFrom: ${ return [ self ]; } out: [sv_variants_tsv] + survivor_annotsv_filter: + run: ../tools/annotsv_filter.cwl + in: + annotsv_tsv: survivor_annotate_variants/annotated_tsv + filtering_frequency: filter_pop_af + no_CDS: filter_no_CDS + survivor_merged: + default: true + output_tsv_name: + default: "survivor-merged-AnnotSV-filtered.tsv" + out: + [filtered_tsv] bcftools_merge_sv_vcfs: run: ../tools/bcftools_merge.cwl in: @@ -122,9 +140,13 @@ steps: bcftools_annotsv_filter: run: ../tools/annotsv_filter.cwl in: - annotsv_tsv: bcftools_annotate_variants/sv_variants_tsv - filtering_frequency: - default: "0.05" + annotsv_tsv: bcftools_annotate_variants/annotated_tsv + filtering_frequency: filter_pop_af no_CDS: filter_no_CDS + survivor_merged: + default: false + output_tsv_name: + default: "bcftools-merged-AnnotSV-filtered.tsv" + out: [filtered_tsv] diff --git a/definitions/tools/annotsv_filter.cwl b/definitions/tools/annotsv_filter.cwl index 9ed40a43..e69c4f30 100644 --- a/definitions/tools/annotsv_filter.cwl +++ b/definitions/tools/annotsv_filter.cwl @@ -25,6 +25,7 @@ requirements: parser.add_argument('--filtering_frequency', dest="filtering_frequency", help="frequency to filter with", action="store", type=float, default="0.05") parser.add_argument('--no-CDS', dest="CDS", help="Do not require a positive CoDing Sequence overlap", action="store_true") parser.add_argument('--ignore-pass-filter', dest="filter", help="Do not require calls to have a PASS filter", action="store_true") + parser.add_argument('--survivor-merged', dest="survivor", help="survivor merge filtering, drop the last filter step", action="store_true") args = parser.parse_args() input_file_name = args.input @@ -32,6 +33,7 @@ requirements: filtering_frequency = args.filtering_frequency no_cds = args.CDS ignore_pass_filter = args.filter + survivor_merged = args.survivor with open(input_file_name, 'r') as file_in, open(output_file_name, 'w') as file_out: file_in = csv.DictReader(file_in, delimiter='\t') @@ -47,8 +49,8 @@ requirements: and float(row['IMH_AF']) < filtering_frequency and float(row['1000g_max_AF']) < filtering_frequency and not(float(row['DGV_LOSS_Frequency']) > filtering_frequency and 'DEL' in row['SV type']) - and not(float(row['DGV_GAIN_Frequency']) < filtering_frequency and ('DUP' in row['SV type'] or 'INS' in row['SV type'])) - and not(('Manta' in row['ID'] and 'IMPRECISE' in row['INFO']) or (row['QUAL'] != '.' and 'IMPRECISE' in row['INFO'])) ): + and not(float(row['DGV_GAIN_Frequency']) > filtering_frequency and ('DUP' in row['SV type'] or 'INS' in row['SV type'])) + and (survivor_merged or not(('Manta' in row['ID'] and 'IMPRECISE' in row['INFO']) or (row['QUAL'] != '.' and 'IMPRECISE' in row['INFO'])))): file_out.writerow(row) pass_sv_count += 1 print("total sv count:",total_sv_count) @@ -75,11 +77,17 @@ inputs: inputBinding: position: 4 prefix: "--ignore-pass-filter" + survivor_merged: + type: boolean + default: false + inputBinding: + position: 5 + prefix: "--survivor-merged" output_tsv_name: type: string? default: "filtered-bcftools-merged-AnnotSV.tsv" inputBinding: - position: 5 + position: 6 prefix: "--output" outputs: From 47c368aa9861c24c52b79482bff920997ce1ebd1 Mon Sep 17 00:00:00 2001 From: apaul7 Date: Thu, 15 Jul 2021 09:34:55 -0500 Subject: [PATCH 05/35] update annotsv to version 2.3 version 2.3 requires the annotation directory to be passed as an input. Also capture the unannotated event tsv as an output. --- definitions/pipelines/germline_wgs.cwl | 5 +++ definitions/subworkflows/merge_svs.cwl | 25 +++++++++++---- .../subworkflows/single_sample_sv_callers.cwl | 6 ++++ definitions/tools/annotsv.cwl | 32 ++++++++++++------- 4 files changed, 51 insertions(+), 17 deletions(-) diff --git a/definitions/pipelines/germline_wgs.cwl b/definitions/pipelines/germline_wgs.cwl index a6650249..a29bc142 100644 --- a/definitions/pipelines/germline_wgs.cwl +++ b/definitions/pipelines/germline_wgs.cwl @@ -152,6 +152,11 @@ inputs: disclaimer_text: type: string? default: 'Workflow source can be found at https://github.com/genome/analysis-workflows' + annotsv_annotations: + type: + - string + - Directory + doc: "directory/path of the annotsv annotations directory" outputs: cram: type: File diff --git a/definitions/subworkflows/merge_svs.cwl b/definitions/subworkflows/merge_svs.cwl index 464529e6..3e53b8ac 100644 --- a/definitions/subworkflows/merge_svs.cwl +++ b/definitions/subworkflows/merge_svs.cwl @@ -37,13 +37,21 @@ inputs: default: "0.05" filter_no_CDS: type: boolean? + annotsv_annotations: + type: + - string + - Directory + doc: "directory/path of the annotsv annotations directory" outputs: bcftools_merged_sv_vcf: type: File outputSource: filter_blocklist_bcftools/filtered_sv_vcf bcftools_merged_annotated_tsv: type: File - outputSource: bcftools_annotate_variants/sv_variants_tsv + outputSource: bcftools_annotate_variants/annotated_tsv + bcftools_merged_unannotated_tsv: + type: File + outputSource: bcftools_annotate_variants/unannotated_tsv bcftools_merged_filtered_annotated_tsv: type: File outputSource: bcftools_annotsv_filter/filtered_tsv @@ -52,7 +60,10 @@ outputs: outputSource: filter_blocklist_survivor/filtered_sv_vcf survivor_merged_annotated_tsv: type: File - outputSource: survivor_annotate_variants/sv_variants_tsv + outputSource: survivor_annotate_variants/annotated_tsv + survivor_merged_unannotated_tsv: + type: File + outputSource: survivor_annotate_variants/unannotated_tsv survivor_merged_filtered_annotated_tsv: type: File outputSource: survivor_annotsv_filter/filtered_tsv @@ -90,8 +101,9 @@ steps: snps_vcf: source: [snps_vcf] valueFrom: ${ return [ self ]; } + annotations: annotsv_annotations out: - [sv_variants_tsv] + [annotated_tsv, unannotated_tsv] survivor_annotsv_filter: run: ../tools/annotsv_filter.cwl in: @@ -130,13 +142,14 @@ steps: in: genome_build: genome_build input_vcf: filter_blocklist_bcftools/filtered_sv_vcf - output_tsv_name: - default: "bcftools-merged-AnnotSV.tsv" + output_base: + default: "bcftools-merged-AnnotSV" snps_vcf: source: [snps_vcf] valueFrom: ${ return [ self ]; } + annotations: annotsv_annotations out: - [sv_variants_tsv] + [annotated_tsv, unannotated_tsv] bcftools_annotsv_filter: run: ../tools/annotsv_filter.cwl in: diff --git a/definitions/subworkflows/single_sample_sv_callers.cwl b/definitions/subworkflows/single_sample_sv_callers.cwl index 01449b20..2275c156 100644 --- a/definitions/subworkflows/single_sample_sv_callers.cwl +++ b/definitions/subworkflows/single_sample_sv_callers.cwl @@ -77,6 +77,11 @@ inputs: type: int? blocklist_bedpe: type: File? + annotsv_annotations: + type: + - string + - Directory + doc: "directory/path of the annotsv annotations directory" outputs: cn_diagram: type: File? @@ -300,5 +305,6 @@ steps: sv_vcfs: source: [run_cnvkit_filter/filtered_vcf, run_cnvnator_filter/filtered_vcf, run_manta_filter/filtered_vcf, run_smoove_filter/filtered_vcf] linkMerge: merge_flattened + annotsv_annotations: annotsv_annotations out: [bcftools_merged_sv_vcf, bcftools_merged_annotated_tsv, bcftools_merged_filtered_annotated_tsv, survivor_merged_sv_vcf, survivor_merged_annotated_tsv] diff --git a/definitions/tools/annotsv.cwl b/definitions/tools/annotsv.cwl index 35c6e153..63f5f4b0 100644 --- a/definitions/tools/annotsv.cwl +++ b/definitions/tools/annotsv.cwl @@ -3,12 +3,12 @@ cwlVersion: v1.0 class: CommandLineTool -arguments: ["/opt/AnnotSV_2.1/bin/AnnotSV", "-bedtools", "/usr/bin/bedtools", "-outputDir", "$(runtime.outdir)"] +arguments: ["/opt/AnnotSV_2.3/bin/AnnotSV", "-bedtools", "/usr/bin/bedtools", "-outputDir", "$(runtime.outdir)", "-outputFile", "$(inputs.output_base).tsv"] requirements: - class: ResourceRequirement ramMin: 8000 - class: DockerRequirement - dockerPull: "mgibio/annotsv-cwl:2.1" + dockerPull: "mgibio/annotsv-cwl:2.3" inputs: genome_build: @@ -16,29 +16,39 @@ inputs: inputBinding: position: 2 prefix: "-genomeBuild" + doc: "genome build used, GRCh37(tool default), GRCh38, mm9, or mm10" input_vcf: type: File inputBinding: position: 3 prefix: "-SVinputFile" doc: "vcf file to filter" - output_tsv_name: + output_base: type: string? - default: "AnnotSV.tsv" + default: "AnnotSV" inputBinding: - position: 4 - prefix: "-outputFile" - doc: "output file name" + doc: "base for output file name" snps_vcf: type: File[]? inputBinding: position: 5 - prefix: "-vcfFiles" + prefix: "-snvIndelFiles" itemSeparator: "," doc: "snps vcf(s) for adding hom/het snp counts found within svs" - + annotations: + type: + - string + - Directory + inputBinding: + position: 6 + prefix: "-annotationsDir" + doc: "directory/path of the annotsv annotations directory" outputs: - sv_variants_tsv: + annotated_tsv: + type: File + outputBinding: + glob: "$(inputs.output_base).tsv" + unannotated_tsv: type: File outputBinding: - glob: $(inputs.output_tsv_name) + glob: "$(inputs.output_base).unannotated.tsv" From 70ab64ad67a0fd90ff9ae713495c8715432a9401 Mon Sep 17 00:00:00 2001 From: apaul7 Date: Thu, 15 Jul 2021 09:38:16 -0500 Subject: [PATCH 06/35] s/SURVIVOR/survivor/ and s/CNVnator/cnvnator/ changing output names for consistency --- definitions/subworkflows/merge_svs.cwl | 8 ++++---- definitions/tools/cnvnator.cwl | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/definitions/subworkflows/merge_svs.cwl b/definitions/subworkflows/merge_svs.cwl index 3e53b8ac..06f12044 100644 --- a/definitions/subworkflows/merge_svs.cwl +++ b/definitions/subworkflows/merge_svs.cwl @@ -79,7 +79,7 @@ steps: estimate_sv_distance: estimate_sv_distance minimum_sv_size: minimum_sv_size cohort_name: - default: "SURVIVOR-sv-merged.vcf" + default: "survivor-sv-merged.vcf" out: [merged_vcf] filter_blocklist_survivor: @@ -88,7 +88,7 @@ steps: input_vcf: survivor_merge_sv_vcfs/merged_vcf blocklist_bedpe: blocklist_bedpe output_vcf_basename: - default: "SURVIVOR-sv-merged" + default: "survivor-sv-merged" out: [filtered_sv_vcf] survivor_annotate_variants: @@ -96,8 +96,8 @@ steps: in: genome_build: genome_build input_vcf: filter_blocklist_survivor/filtered_sv_vcf - output_tsv_name: - default: "SURVIVOR-merged-AnnotSV.tsv" + output_base: + default: "survivor-merged-AnnotSV" snps_vcf: source: [snps_vcf] valueFrom: ${ return [ self ]; } diff --git a/definitions/tools/cnvnator.cwl b/definitions/tools/cnvnator.cwl index 0d2f6062..546ba4f2 100644 --- a/definitions/tools/cnvnator.cwl +++ b/definitions/tools/cnvnator.cwl @@ -45,10 +45,10 @@ requirements: # read depth signal partitioning cnvnator -root "$SAMPLE.root" -partition "$BIN_SIZE" -chrom $CHROMOSOMES # cnv calling - cnvnator -root "$SAMPLE.root" -call "$BIN_SIZE" -chrom $CHROMOSOMES > "$SAMPLE.CNVnator.cn" + cnvnator -root "$SAMPLE.root" -call "$BIN_SIZE" -chrom $CHROMOSOMES > "$SAMPLE.cnvnator.cn" # convert to vcf - cnvnator2VCF.pl -reference "$REFERENCE" "$SAMPLE.CNVnator.cn" FASTA_CHRS/ > "$SAMPLE.CNVnator.vcf" + cnvnator2VCF.pl -reference "$REFERENCE" "$SAMPLE.cnvnator.cn" FASTA_CHRS/ > "$SAMPLE.cnvnator.vcf" exit 0 inputs: bam: @@ -87,7 +87,7 @@ outputs: vcf: type: File outputBinding: - glob: "$(inputs.sample_name).CNVnator.vcf" + glob: "$(inputs.sample_name).cnvnator.vcf" root_file: type: File outputBinding: @@ -95,4 +95,4 @@ outputs: cn_file: type: File outputBinding: - glob: "$(inputs.sample_name).CNVnator.cn" + glob: "$(inputs.sample_name).cnvnator.cn" From 43b7c2c9b6bf9a2e1c0e65114495a7e60ed128ae Mon Sep 17 00:00:00 2001 From: apaul7 Date: Thu, 15 Jul 2021 09:39:51 -0500 Subject: [PATCH 07/35] outputbinding change s/merged_sv_vcf/merged_vcf/ --- definitions/subworkflows/merge_svs.cwl | 4 ++-- definitions/tools/bcftools_merge.cwl | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/definitions/subworkflows/merge_svs.cwl b/definitions/subworkflows/merge_svs.cwl index 06f12044..97a78bd2 100644 --- a/definitions/subworkflows/merge_svs.cwl +++ b/definitions/subworkflows/merge_svs.cwl @@ -127,11 +127,11 @@ steps: default: "bcftools-sv-merged.vcf" vcfs: sv_vcfs out: - [merged_sv_vcf] + [merged_vcf] filter_blocklist_bcftools: run: ../tools/filter_sv_vcf_blocklist_bedpe.cwl in: - input_vcf: bcftools_merge_sv_vcfs/merged_sv_vcf + input_vcf: bcftools_merge_sv_vcfs/merged_vcf blocklist_bedpe: blocklist_bedpe output_vcf_basename: default: "bcftools-sv-merged" diff --git a/definitions/tools/bcftools_merge.cwl b/definitions/tools/bcftools_merge.cwl index 4c45df4b..57daeaec 100644 --- a/definitions/tools/bcftools_merge.cwl +++ b/definitions/tools/bcftools_merge.cwl @@ -58,7 +58,7 @@ inputs: doc: "input bgzipped tabix indexed vcfs to merge" outputs: - merged_sv_vcf: + merged_vcf: type: File outputBinding: glob: $(inputs.output_vcf_name) From 0622bd92cb9191ce4c808854e7ec2739268c19e9 Mon Sep 17 00:00:00 2001 From: apaul7 Date: Thu, 15 Jul 2021 09:40:51 -0500 Subject: [PATCH 08/35] stage secondary files in gather_to_sub_directory added javascript to pass in any secondary files when staging output files. added --recursive to copy everything added --preserve to keep timestamps(cromwell does not stage files for this to matter...) added --no-clobber to error out if files are overwritten added optional directory input for staging files and a single directory. --- definitions/tools/gather_to_sub_directory.cwl | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/definitions/tools/gather_to_sub_directory.cwl b/definitions/tools/gather_to_sub_directory.cwl index cffb6a9f..1980cb47 100644 --- a/definitions/tools/gather_to_sub_directory.cwl +++ b/definitions/tools/gather_to_sub_directory.cwl @@ -19,7 +19,7 @@ requirements: files="${@:2}" mkdir $outdir chmod -R 777 $outdir - cp -t $outdir $files + cp --recursive --preserve --no-clobber --target-directory $outdir $files exit 0 @@ -32,6 +32,23 @@ inputs: type: File[] inputBinding: position: 2 + valueFrom: | + ${ + var results = [] + for(var i=0; i Date: Thu, 15 Jul 2021 09:46:11 -0500 Subject: [PATCH 09/35] added min confidence input to genotype_gvcf step --- definitions/subworkflows/joint_genotype.cwl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/definitions/subworkflows/joint_genotype.cwl b/definitions/subworkflows/joint_genotype.cwl index 3de95bd4..d3e35d84 100644 --- a/definitions/subworkflows/joint_genotype.cwl +++ b/definitions/subworkflows/joint_genotype.cwl @@ -64,9 +64,11 @@ inputs: final_tsv_prefix: type: string? default: 'variants' - filter_gnomAD_maximum_population_allele_frequency: + gnomad_max_pop_af: type: float default: 0.05 + min_conf_call: + type: float? outputs: raw_vcf: type: File @@ -106,6 +108,7 @@ steps: source: [combine_gvcfs/gvcf] linkMerge: merge_flattened intervals: intervals + min_conf_call: min_conf_call out: [genotype_vcf] merge_vcfs: @@ -140,7 +143,7 @@ steps: run: germline_filter_vcf.cwl in: annotated_vcf: annotate_variants/annotated_vcf - filter_gnomAD_maximum_population_allele_frequency: filter_gnomAD_maximum_population_allele_frequency + filter_gnomAD_maximum_population_allele_frequency: gnomad_max_pop_af gnomad_field_name: source: vep_custom_annotations valueFrom: | From 498236bc09c4073b035908a960fffe585dfc1666 Mon Sep 17 00:00:00 2001 From: apaul7 Date: Thu, 15 Jul 2021 09:47:57 -0500 Subject: [PATCH 10/35] add annotated vcf as output --- definitions/subworkflows/joint_genotype.cwl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/definitions/subworkflows/joint_genotype.cwl b/definitions/subworkflows/joint_genotype.cwl index d3e35d84..2a15cc94 100644 --- a/definitions/subworkflows/joint_genotype.cwl +++ b/definitions/subworkflows/joint_genotype.cwl @@ -74,6 +74,10 @@ outputs: type: File outputSource: merge_vcfs/merged_vcf secondaryFiles: [.tbi] + annotated_vcf: + type: File + outputSource: bgzip_index_annotated_vcf/indexed_vcf + secondaryFiles: [.tbi] final_vcf: type: File outputSource: filter_vcf/final_vcf From 3b4329e68580ea0ec9f05c00a68f7fcb6bf2ba74 Mon Sep 17 00:00:00 2001 From: apaul7 Date: Thu, 15 Jul 2021 09:48:44 -0500 Subject: [PATCH 11/35] add decompose and normalize step to joint genotype --- definitions/subworkflows/joint_genotype.cwl | 30 ++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/definitions/subworkflows/joint_genotype.cwl b/definitions/subworkflows/joint_genotype.cwl index 2a15cc94..5cc2faca 100644 --- a/definitions/subworkflows/joint_genotype.cwl +++ b/definitions/subworkflows/joint_genotype.cwl @@ -72,7 +72,7 @@ inputs: outputs: raw_vcf: type: File - outputSource: merge_vcfs/merged_vcf + outputSource: normalize_index/indexed_vcf secondaryFiles: [.tbi] annotated_vcf: type: File @@ -121,11 +121,35 @@ steps: vcfs: genotype_gvcf/genotype_vcf out: [merged_vcf] - + decompose: + run: ../tools/vt_decompose.cwl + in: + vcf: merge_vcfs/merged_vcf + out: + [decomposed_vcf] + decompose_index: + run: ../tools/index_vcf.cwl + in: + vcf: decompose/decomposed_vcf + out: + [indexed_vcf] + normalize: + run: ../tools/vt_normalize.cwl + in: + vcf: decompose_index/indexed_vcf + reference: reference + out: + [normalized_vcf] + normalize_index: + run: ../tools/index_vcf.cwl + in: + vcf: normalize/normalized_vcf + out: + [indexed_vcf] annotate_variants: run: ../tools/vep.cwl in: - vcf: merge_vcfs/merged_vcf + vcf: normalize_index/indexed_vcf cache_dir: vep_cache_dir ensembl_assembly: vep_ensembl_assembly ensembl_version: vep_ensembl_version From da8d329d14132fb7b070e96bb74051070122fcce Mon Sep 17 00:00:00 2001 From: apaul7 Date: Thu, 15 Jul 2021 09:59:37 -0500 Subject: [PATCH 12/35] add gatk soft filtering gatk soft filtering using hard filtering parameters. Based on https://gatk.broadinstitute.org/hc/en-us/articles/360035531112--How-to-Filter-variants-either-with-VQSR-or-by-hard-filtering#2 --- definitions/subworkflows/gatk_soft_filter.cwl | 84 +++++++++++++++++++ definitions/subworkflows/joint_genotype.cwl | 9 +- definitions/tools/variant_filtration.cwl | 55 ++++++++++++ 3 files changed, 147 insertions(+), 1 deletion(-) create mode 100644 definitions/subworkflows/gatk_soft_filter.cwl create mode 100644 definitions/tools/variant_filtration.cwl diff --git a/definitions/subworkflows/gatk_soft_filter.cwl b/definitions/subworkflows/gatk_soft_filter.cwl new file mode 100644 index 00000000..61780268 --- /dev/null +++ b/definitions/subworkflows/gatk_soft_filter.cwl @@ -0,0 +1,84 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: Workflow +label: "apply soft filtering to a gatk called vcf using hard filter paramaters" +requirements: + - class: SubworkflowFeatureRequirement + - class: StepInputExpressionRequirement + - class: MultipleInputFeatureRequirement +inputs: + reference: + type: + - string + - File + secondaryFiles: [.fai, ^.dict] + vcf: + type: File + secondaryFiles: [.tbi] +outputs: + filtered_vcf: + type: File + secondaryFiles: [.tbi] + outputSource: index_merged/indexed_vcf +steps: + split_snps: + run: ../tools/select_variants.cwl + in: + reference: reference + vcf: vcf + output_vcf_basename: + default: "SNPS" + select_type: + default: "SNP" + out: + [filtered_vcf] + split_indels: + run: ../tools/select_variants.cwl + in: + reference: reference + vcf: vcf + output_vcf_basename: + default: "INDELS" + select_type: + default: "INDEL" + out: + [filtered_vcf] + filter_snps: + run: ../tools/variant_filtration.cwl + in: + reference: reference + vcf: split_snps/filtered_vcf + filters: + default: ["QD<2.0;QD2", "QUAL<30.0;QUAL30", "SOR>3.0;SOR3", "FS>60.0;FS60", "MQ<40.0;MQ40", "MQRankSum<-12.5;MQRankSum-12.5", "ReadPosRankSum<-8.0;ReadPosRankSum-8"] + output_vcf_basename: + default: "SNPS.filtered" + out: + [filtered_vcf] + filter_indels: + run: ../tools/variant_filtration.cwl + in: + reference: reference + vcf: split_indels/filtered_vcf + filters: + default: ["QD<2.0;QD2", "QUAL<30.0;QUAL30", "FS>200.0;FS200", "ReadPosRankSum<-20.0;ReadPosRankSum-20"] + output_vcf_basename: + default: "INDELS.filtered" + out: + [filtered_vcf] + merge: + run: ../tools/merge_vcf.cwl + in: + merged_vcf_basename: + default: "soft_filtered" + vcfs: + source: [filter_snps/filtered_vcf, filter_indels/filtered_vcf] + linkMerge: merge_flattened + out: + [merged_vcf] + index_merged: + run: ../tools/index_vcf.cwl + in: + vcf: merge/merged_vcf + out: + [indexed_vcf] diff --git a/definitions/subworkflows/joint_genotype.cwl b/definitions/subworkflows/joint_genotype.cwl index 5cc2faca..76c6aa4c 100644 --- a/definitions/subworkflows/joint_genotype.cwl +++ b/definitions/subworkflows/joint_genotype.cwl @@ -167,10 +167,17 @@ steps: vcf: annotate_variants/annotated_vcf out: [indexed_vcf] + soft_filter: + run: gatk_soft_filter.cwl + in: + reference: reference + vcf: bgzip_index_annotated_vcf/indexed_vcf + out: + [filtered_vcf] filter_vcf: run: germline_filter_vcf.cwl in: - annotated_vcf: annotate_variants/annotated_vcf + annotated_vcf: soft_filter/filtered_vcf filter_gnomAD_maximum_population_allele_frequency: gnomad_max_pop_af gnomad_field_name: source: vep_custom_annotations diff --git a/definitions/tools/variant_filtration.cwl b/definitions/tools/variant_filtration.cwl new file mode 100644 index 00000000..1901bf13 --- /dev/null +++ b/definitions/tools/variant_filtration.cwl @@ -0,0 +1,55 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: CommandLineTool +label: "VariantFiltration (GATK 4.1.8.1)" +baseCommand: ["/gatk/gatk", "--java-options", "-Xmx4g", "VariantFiltration"] +requirements: + - class: ResourceRequirement + ramMin: 6000 + tmpdirMin: 25000 + - class: DockerRequirement + dockerPull: "broadinstitute/gatk:4.1.8.1" +arguments: + ["-O", { valueFrom: $(runtime.outdir)/$(inputs.output_vcf_basename).vcf.gz }] +inputs: + reference: + type: + - string + - File + secondaryFiles: [.fai, ^.dict] + inputBinding: + prefix: "-R" + position: 1 + vcf: + type: File + inputBinding: + prefix: "--variant" + position: 2 + secondaryFiles: [.tbi] + filters: + type: string[] + inputBinding: + position: 3 + valueFrom: | + ${ + var results = [] + for(var i=0; i3.0;SOR3'" + output_vcf_basename: + type: string? + default: select_variants +outputs: + filtered_vcf: + type: File + secondaryFiles: [.tbi] + outputBinding: + glob: $(inputs.output_vcf_basename).vcf.gz From 4ccb8d3d58885842415de3bb09c7ac8f67a2e015 Mon Sep 17 00:00:00 2001 From: apaul7 Date: Thu, 15 Jul 2021 10:02:48 -0500 Subject: [PATCH 13/35] add new normalize tool This uses VT to normalize a VCF. This is an alternative to GATK4 LeftAlignTrimVariants --- definitions/tools/vt_normalize.cwl | 32 ++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 definitions/tools/vt_normalize.cwl diff --git a/definitions/tools/vt_normalize.cwl b/definitions/tools/vt_normalize.cwl new file mode 100644 index 00000000..0e19086b --- /dev/null +++ b/definitions/tools/vt_normalize.cwl @@ -0,0 +1,32 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: CommandLineTool +label: "run vt normalize" +baseCommand: ["vt", "normalize"] +requirements: + - class: DockerRequirement + dockerPull: quay.io/biocontainers/vt:0.57721--hf74b74d_1 + - class: ResourceRequirement + ramMin: 4000 +arguments: + ["-o", { valueFrom: $(runtime.outdir)/normalized.vcf.gz }] +inputs: + vcf: + type: File + inputBinding: + position: 1 + secondaryFiles: [".tbi"] + reference: + type: + - string + - File + secondaryFiles: [".fai"] + inputBinding: + prefix: "-r" + position: 2 +outputs: + normalized_vcf: + type: File + outputBinding: + glob: "normalized.vcf.gz" From 8a983732300069c61421762037b36046a2cff900 Mon Sep 17 00:00:00 2001 From: apaul7 Date: Thu, 15 Jul 2021 10:05:46 -0500 Subject: [PATCH 14/35] add gather to subdirectory tool for directories --- .../tools/gather_to_sub_directory_dirs.cwl | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 definitions/tools/gather_to_sub_directory_dirs.cwl diff --git a/definitions/tools/gather_to_sub_directory_dirs.cwl b/definitions/tools/gather_to_sub_directory_dirs.cwl new file mode 100644 index 00000000..a83b45ad --- /dev/null +++ b/definitions/tools/gather_to_sub_directory_dirs.cwl @@ -0,0 +1,40 @@ +#! /usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: CommandLineTool +baseCommand: ["/bin/bash","directory_gatherer.sh"] + +requirements: + - class: DockerRequirement + dockerPull: "ubuntu:xenial" + - class: ResourceRequirement + ramMin: 1000 + - class: InitialWorkDirRequirement + listing: + - entryname: 'directory_gatherer.sh' + entry: | + set -eou pipefail + + outdir="$1" + files="${@:2}" + mkdir $outdir + chmod -R 777 $outdir + cp --recursive --preserve --no-clobber --target-directory $outdir $files + + exit 0 + +inputs: + outdir: + type: string + inputBinding: + position: 1 + directories: + type: Directory[] + inputBinding: + position: 2 +outputs: + gathered_directory: + type: Directory + outputBinding: + glob: "$(inputs.outdir)" + From fb99760251557057ab74580ecc35d9d333d1d409 Mon Sep 17 00:00:00 2001 From: apaul7 Date: Thu, 15 Jul 2021 10:07:24 -0500 Subject: [PATCH 15/35] add bcftools view tool This allows vcfs to be split by samples --- definitions/tools/bcftools_view.cwl | 53 +++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 definitions/tools/bcftools_view.cwl diff --git a/definitions/tools/bcftools_view.cwl b/definitions/tools/bcftools_view.cwl new file mode 100644 index 00000000..e43b67e4 --- /dev/null +++ b/definitions/tools/bcftools_view.cwl @@ -0,0 +1,53 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: CommandLineTool + +baseCommand: ["/opt/bcftools/bin/bcftools", "view"] + +requirements: + - class: ResourceRequirement + ramMin: 4000 + - class: DockerRequirement + dockerPull: "mgibio/bcftools-cwl:1.12" + +inputs: + sample_name: + type: string? + inputBinding: + position: 1 + prefix: "--samples" + doc: "comma separated list of samples to include (or exclude with '^' prefix)" + output_type: + type: + type: enum + symbols: ["b", "u", "z", "v"] + default: "z" + inputBinding: + position: 4 + prefix: "--output-type" + doc: "output file format" + output_vcf_name: + type: string? + default: "bcftools_split.vcf.gz" + inputBinding: + position: 5 + prefix: "--output-file" + doc: "output vcf file name" + variant_type: + type: string? + inputBinding: + position: 6 + prefix: "--types" + doc: "select comma-separated list of variant types: snps,indels,mnps,ref,bnd,other" + in_vcf: + type: File + inputBinding: + position: 7 + doc: "input bgzipped tabix indexed vcf to view" + +outputs: + vcf: + type: File + outputBinding: + glob: $(inputs.output_vcf_name) From f8947469866bfc25b7df8da2b24b8eb4b7ae6a15 Mon Sep 17 00:00:00 2001 From: apaul7 Date: Thu, 15 Jul 2021 10:08:23 -0500 Subject: [PATCH 16/35] add manta_germline tool This allows manta to be ran with multiple samples in joint calling fashion --- definitions/tools/manta_germline.cwl | 78 ++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 definitions/tools/manta_germline.cwl diff --git a/definitions/tools/manta_germline.cwl b/definitions/tools/manta_germline.cwl new file mode 100644 index 00000000..3c446ce8 --- /dev/null +++ b/definitions/tools/manta_germline.cwl @@ -0,0 +1,78 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: CommandLineTool +label: "Set up and execute manta over multiple samples" + +requirements: + - class: DockerRequirement + dockerPull: mgibio/manta_somatic-cwl:1.6.0 + - class: InlineJavascriptRequirement + - class: ShellCommandRequirement + - class: ResourceRequirement + coresMin: 12 + ramMin: 24000 + tmpdirMin: 10000 +baseCommand: ["/usr/bin/python", "/usr/bin/manta/bin/configManta.py"] +arguments: [ + { position: -1, valueFrom: $(runtime.outdir), prefix: "--runDir" }, + { shellQuote: false, valueFrom: "&&" }, + "/usr/bin/python", "runWorkflow.py", "-m", "local", + { position: 1, valueFrom: $(runtime.cores), prefix: "-j" } +] +inputs: + bams: + type: + type: array + items: File + inputBinding: + prefix: "--bam" + inputBinding: + position: -2 + reference: + type: + - string + - File + secondaryFiles: [.fai, ^.dict] + inputBinding: + position: -4 + prefix: "--referenceFasta" + call_regions: + type: File? + inputBinding: + position: -5 + prefix: "--callRegions" + secondaryFiles: [.tbi] + doc: bgzip-compressed, tabix-indexed BED file specifiying regions to which variant analysis will be restricted + non_wgs: + type: boolean? + inputBinding: + position: -6 + prefix: "--exome" + doc: toggles on settings for WES + output_contigs: + type: boolean? + inputBinding: + position: -7 + prefix: "--outputContig" + doc: if true, outputs assembled contig sequences in final VCF files, in the INFO field CONTIG +outputs: + diploid_variants: + type: File + outputBinding: + glob: results/variants/diploidSV.vcf.gz + secondaryFiles: [.tbi] + all_candidates: + type: File + outputBinding: + glob: results/variants/candidateSV.vcf.gz + secondaryFiles: [.tbi] + small_candidates: + type: File + outputBinding: + glob: results/variants/candidateSmallIndels.vcf.gz + secondaryFiles: [.tbi] + stats: + type: Directory + outputBinding: + glob: results/stats/ From 1c526f8e734e0f13fac77a6199e3915d5c5faa56 Mon Sep 17 00:00:00 2001 From: apaul7 Date: Thu, 15 Jul 2021 10:11:24 -0500 Subject: [PATCH 17/35] add joint cnvnator subworkflow This runs cnvnator in single sample mode over multiple samples. The sample rename step is required as the sample name in the output vcf can change from the input. examples: input name -> output name sample.1 -> sample sample.1.2 -> sample sample_1 -> sample_1 also stages output vcf name to follow $SAMPLE.cnvnator.vcf.gz format --- definitions/subworkflows/joint_cnvnator.cwl | 79 +++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 definitions/subworkflows/joint_cnvnator.cwl diff --git a/definitions/subworkflows/joint_cnvnator.cwl b/definitions/subworkflows/joint_cnvnator.cwl new file mode 100644 index 00000000..377caca1 --- /dev/null +++ b/definitions/subworkflows/joint_cnvnator.cwl @@ -0,0 +1,79 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: Workflow +label: "run cnvnator for multiple samples" +requirements: + - class: SubworkflowFeatureRequirement + - class: ScatterFeatureRequirement + - class: StepInputExpressionRequirement +inputs: + reference: + type: + - string + - File + secondaryFiles: [.fai, ^.dict] + sample_names: + type: string[] + bams: + type: File[] + secondaryFiles: [^.bai] + bin_size: + type: int? +outputs: + vcfs: + type: File[] + outputSource: index_cnvnator/indexed_vcf + secondaryFiles: [.tbi] + root_files: + type: File[] + outputSource: cnvnator/root_file + cn_files: + type: File[] + outputSource: cnvnator/cn_file +steps: + cnvnator: + scatter: [bam, sample_name] + scatterMethod: dotproduct + run: ../tools/cnvnator.cwl + in: + bam: bams + reference: reference + sample_name: sample_names + bin_size: bin_size + out: + [vcf, root_file, cn_file] + bgzip_index: + scatter: [vcf] + run: bgzip_and_index.cwl + in: + vcf: cnvnator/vcf + out: + [indexed_vcf] + sample_rename: + scatter: [input_vcf, new_sample_name] + scatterMethod: dotproduct + run: ../tools/replace_vcf_sample_name.cwl + in: + input_vcf: bgzip_index/indexed_vcf + new_sample_name: sample_names + sample_to_replace: + valueFrom: '${ + var old_name = inputs.new_sample_name.split(".")[0]; + return old_name; + }' + output_name: + valueFrom: '${ + var sample = inputs.new_sample_name; + var name = sample + ".cnvnator.vcf.gz"; + return name; + }' + out: + [renamed_vcf] + index_cnvnator: + scatter: [vcf] + run: ../tools/index_vcf.cwl + in: + vcf: sample_rename/renamed_vcf + out: + [indexed_vcf] From e6e621af695fbeac0ff6f1c57f5cce9a9ef120d3 Mon Sep 17 00:00:00 2001 From: apaul7 Date: Thu, 15 Jul 2021 10:15:19 -0500 Subject: [PATCH 18/35] add joint cnvkit subworkflow This runs cnvkit in single sample mode for multiple samples. The sample rename step is required as output sample name in the vcf is based on the input filename. Currently that is hardcoded to be `adjusted.tumor` also stage output file name to follow $SAMPLE.cnvkit.vcf.gz format --- definitions/subworkflows/joint_cnvkit.cwl | 92 +++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 definitions/subworkflows/joint_cnvkit.cwl diff --git a/definitions/subworkflows/joint_cnvkit.cwl b/definitions/subworkflows/joint_cnvkit.cwl new file mode 100644 index 00000000..de8ff4f1 --- /dev/null +++ b/definitions/subworkflows/joint_cnvkit.cwl @@ -0,0 +1,92 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: Workflow +label: "jointly run cnvkit for sv calls" +requirements: + - class: SubworkflowFeatureRequirement + - class: StepInputExpressionRequirement + - class: InlineJavascriptRequirement + - class: ScatterFeatureRequirement +inputs: + sample_names: + type: string[] + bams: + type: File[] + secondaryFiles: [^.bai] + reference_fasta: + type: + - string + - File + secondaryFiles: [.fai] + reference_cnn: + type: File? + doc: "can be a flat reference or reference based on a panel of normals" + method: + type: + - "null" + - type: enum + symbols: ["hybrid", "amplicon", "wgs"] + segment_filter: + type: + - "null" + - type: enum + symbols: ["ampdel", "ci", "cn", "sem"] +outputs: + vcfs: + type: File[] + outputSource: index_cnvkit/indexed_vcf + secondaryFiles: [.tbi] + cnr: + type: File[] + outputSource: cnvkit/tumor_bin_level_ratios + cns: + type: File[] + outputSource: cnvkit/tumor_segmented_ratios +steps: + cnvkit: + scatter: [tumor_bam, cnvkit_vcf_name] + scatterMethod: dotproduct + run: cnvkit_single_sample.cwl + in: + method: method + reference_cnn: reference_cnn + tumor_bam: bams + cnvkit_vcf_name: + source: [sample_names] + valueFrom: "$(self).cnvkit.vcf" + segment_filter: segment_filter + fasta_reference: reference_fasta + out: + [tumor_bin_level_ratios, tumor_segmented_ratios, cnvkit_vcf] + bgzip_and_index: + scatter: [vcf] + run: bgzip_and_index.cwl + in: + vcf: cnvkit/cnvkit_vcf + out: + [indexed_vcf] + sample_rename: + scatter: [input_vcf, new_sample_name] + scatterMethod: dotproduct + run: ../tools/replace_vcf_sample_name.cwl + in: + input_vcf: bgzip_and_index/indexed_vcf + new_sample_name: sample_names + sample_to_replace: + valueFrom: 'adjusted.tumor' + output_name: + valueFrom: '${ + var sample = inputs.new_sample_name; + var name = sample + ".cnvkit.vcf.gz"; + return name; + }' + out: + [renamed_vcf] + index_cnvkit: + scatter: [vcf] + run: ../tools/index_vcf.cwl + in: + vcf: sample_rename/renamed_vcf + out: + [indexed_vcf] From 66ac5892328476ca20437423da3854d35ac0dc72 Mon Sep 17 00:00:00 2001 From: apaul7 Date: Thu, 15 Jul 2021 10:18:32 -0500 Subject: [PATCH 19/35] add joint sv read caller filtering This subworkflow runs sv filtering for manta/smoove calls. Final sample names follow the $SAMPLE-$CALLER format. This allows easy tracking for the source of calls in output merged vcfs. --- .../sv_joint_read_caller_filter.cwl | 157 ++++++++++++++++++ 1 file changed, 157 insertions(+) create mode 100644 definitions/subworkflows/sv_joint_read_caller_filter.cwl diff --git a/definitions/subworkflows/sv_joint_read_caller_filter.cwl b/definitions/subworkflows/sv_joint_read_caller_filter.cwl new file mode 100644 index 00000000..21b71515 --- /dev/null +++ b/definitions/subworkflows/sv_joint_read_caller_filter.cwl @@ -0,0 +1,157 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: Workflow +label: "filter jointly called vcfs from read based callers" +requirements: + - class: SubworkflowFeatureRequirement + - class: StepInputExpressionRequirement + - class: InlineJavascriptRequirement + - class: ScatterFeatureRequirement +inputs: + reference: + type: + - string + - File + secondaryFiles: [.fai, ^.dict] + sample_names: + type: string[] + bams: + type: File[] + secondaryFiles: [^.bai] + filter_del_depth: + type: double? + filter_dup_depth: + type: double? + filter_paired_count: + type: int? + filter_split_count: + type: int? + filter_alt_abundance_percentage: + type: double? + sv_vcf: + type: File + secondaryFiles: [.tbi] + vcf_source: + type: + - type: enum + symbols: ["manta", "smoove"] +outputs: + vcfs: + type: File[] + outputSource: final_index/indexed_vcf + secondaryFiles: [.tbi] +steps: + read_support_filter: + run: ../tools/filter_sv_vcf_read_support.cwl + in: + abundance_percentage: filter_alt_abundance_percentage + input_vcf: sv_vcf + paired_count: filter_paired_count + split_count: filter_split_count + vcf_source: vcf_source + out: + [filtered_sv_vcf] + bgzip_index: + run: bgzip_and_index.cwl + in: + vcf: read_support_filter/filtered_sv_vcf + out: + [indexed_vcf] + split_vcf: + scatter: [sample_name] + run: ../tools/bcftools_view.cwl + in: + sample_name: sample_names + in_vcf: bgzip_index/indexed_vcf + out: + [vcf] + duphold: + scatter: [bam, sv_vcf] + scatterMethod: dotproduct + run: ../tools/duphold.cwl + in: + bam: bams + reference: reference + sv_vcf: split_vcf/vcf + out: + [annotated_sv_vcf] + bgzip_index_duphold: + scatter: [vcf] + scatterMethod: dotproduct + run: bgzip_and_index.cwl + in: + vcf: duphold/annotated_sv_vcf + out: + [indexed_vcf] + merge_vcfs: + run: ../tools/bcftools_merge.cwl + in: + vcfs: bgzip_index_duphold/indexed_vcf + out: + [merged_vcf] + depth_filter: + run: ../tools/filter_sv_vcf_depth.cwl + in: + input_vcf: merge_vcfs/merged_vcf + deletion_depth: filter_del_depth + duplication_depth: filter_dup_depth + vcf_source: + default: "duphold" + out: + [filtered_sv_vcf] + final_split_vcf: + scatter: [sample_name, output_vcf_name] + scatterMethod: dotproduct + run: ../tools/bcftools_view.cwl + in: + sample_name: sample_names + in_vcf: depth_filter/filtered_sv_vcf + vcf_source: vcf_source + output_vcf_name: + source: [sample_names] + valueFrom: | + ${ + var sample = self; + var caller = inputs.vcf_source; + var result = sample + "-" + caller + ".vcf.gz"; + return result; + } + out: + [vcf] + rename: + scatter: [input_vcf, sample_to_replace, new_sample_name, output_name] + scatterMethod: dotproduct + run: ../tools/replace_vcf_sample_name.cwl + in: + input_vcf: final_split_vcf/vcf + sample_to_replace: sample_names + vcf_source: vcf_source + new_sample_name: + source: [sample_names] + valueFrom: | + ${ + var sample = self; + var caller = inputs.vcf_source; + var result = sample + "-" + caller; + return result; + } + output_name: + source: [sample_names] + valueFrom: | + ${ + var sample = self; + var caller = inputs.vcf_source; + var result = sample + "-" + caller + ".vcf.gz"; + return result; + } + out: + [renamed_vcf] + final_index: + scatter: [vcf] + scatterMethod: dotproduct + run: ../tools/index_vcf.cwl + in: + vcf: rename/renamed_vcf + out: + [indexed_vcf] From 571bab75d7fa2549c4ac13b9c898c7b8eee86ff8 Mon Sep 17 00:00:00 2001 From: apaul7 Date: Thu, 15 Jul 2021 10:22:23 -0500 Subject: [PATCH 20/35] add joint sv filtering for depth callers This runs the depth filters for events called by cnvkit/cnvnator. Final sample names follow the $SAMPLE-$CALLER format. This allows easy tracking for the source of calls in final merged vcfs. added custom merge sv records. This allows calls to be merged together if they are of the same type and within a bp window. This does not remove calls just adds a new record in the output vcf. --- .../sv_joint_depth_caller_filter.cwl | 124 ++++++++++++++++++ definitions/tools/custom_merge_sv_records.cwl | 49 +++++++ 2 files changed, 173 insertions(+) create mode 100644 definitions/subworkflows/sv_joint_depth_caller_filter.cwl create mode 100644 definitions/tools/custom_merge_sv_records.cwl diff --git a/definitions/subworkflows/sv_joint_depth_caller_filter.cwl b/definitions/subworkflows/sv_joint_depth_caller_filter.cwl new file mode 100644 index 00000000..4cd676ea --- /dev/null +++ b/definitions/subworkflows/sv_joint_depth_caller_filter.cwl @@ -0,0 +1,124 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: Workflow +label: "Filter multiple sv vcfs from depth callers(cnvkit/cnvnator), returns single sample vcfs with the sample name as $SAMPLE-$CALLER" +requirements: + - class: SubworkflowFeatureRequirement + - class: StepInputExpressionRequirement + - class: InlineJavascriptRequirement + - class: ScatterFeatureRequirement +inputs: + bams: + type: File[] + secondaryFiles: [^.bai] + sample_names: + type: string[] + filter_del_depth: + type: double? + filter_dup_depth: + type: double? + min_sv_size: + type: int? + reference: + type: + - string + - File + secondaryFiles: [.fai, ^.dict] + sv_vcfs: + type: File[] + vcf_source: + type: + - type: enum + symbols: ["cnvkit", "cnvnator"] + merge_distance: + type: int? +outputs: + vcfs: + type: File[] + outputSource: bgzip_and_index/indexed_vcf + secondaryFiles: [.tbi] +steps: + merge_calls: + scatter: [input_vcf] + run: ../tools/custom_merge_sv_records.cwl + in: + input_vcf: sv_vcfs + distance: merge_distance + out: + [vcf] + size_filter: + scatter: [input_vcf] + run: ../tools/filter_sv_vcf_size.cwl + in: + input_vcf: merge_calls/vcf + size_method: + default: "min_len" + sv_size: min_sv_size + out: + [filtered_sv_vcf] + duphold: + scatter: [bam, sv_vcf] + scatterMethod: dotproduct + run: ../tools/duphold.cwl + in: + bam: bams + reference: reference + sv_vcf: size_filter/filtered_sv_vcf + out: + [annotated_sv_vcf] + depth_filter: + scatter: [input_vcf, output_vcf_name] + scatterMethod: dotproduct + run: ../tools/filter_sv_vcf_depth.cwl + in: + input_vcf: duphold/annotated_sv_vcf + deletion_depth: filter_del_depth + duplication_depth: filter_dup_depth + output_vcf_name: + source: [sample_names] + valueFrom: | + ${ + var sample = self; + var caller = inputs.vcf_source; + var vcf_name = sample + "-" + caller + ".vcf"; + return vcf_name; + } + vcf_source: + default: "duphold" + out: + [filtered_sv_vcf] + rename: + scatter: [input_vcf, new_sample_name, sample_to_replace, output_name] + scatterMethod: dotproduct + run: ../tools/replace_vcf_sample_name.cwl + in: + input_vcf: depth_filter/filtered_sv_vcf + sample_to_replace: sample_names + vcf_source: vcf_source + new_sample_name: + source: [sample_names] + valueFrom: | + ${ + var sample = self; + var caller = inputs.vcf_source; + var result = sample + "-" + caller; + return result; + } + output_name: + source: [sample_names] + valueFrom: | + ${ + var sample = self; + var caller = inputs.vcf_source; + var result = sample + "-" + caller + ".vcf.gz"; + return result; + } + out: + [renamed_vcf] + bgzip_and_index: + scatter: [vcf] + run: bgzip_and_index.cwl + in: + vcf: rename/renamed_vcf + out: [indexed_vcf] diff --git a/definitions/tools/custom_merge_sv_records.cwl b/definitions/tools/custom_merge_sv_records.cwl new file mode 100644 index 00000000..df6d267d --- /dev/null +++ b/definitions/tools/custom_merge_sv_records.cwl @@ -0,0 +1,49 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: CommandLineTool +label: "merges nearby DEL/DUP records within a certain window distance" + +baseCommand: ["/bin/bash", "run_merge.sh"] +requirements: + - class: ResourceRequirement + ramMin: 4000 + - class: DockerRequirement + dockerPull: "apaul7/analysis:1.0.0" + - class: InitialWorkDirRequirement + listing: + - entryname: "run_merge.sh" + entry: | + #!/bin/bash + set -eou pipefail + INPUT="$1" + OUTPUT="$2" + DISTANCE="$3" + /usr/local/bin/python3 /opt/git/merge-sv-records/merge.py -i $INPUT -o $OUTPUT -w $DISTANCE + + /usr/local/bin/bgzip $OUTPUT + /usr/local/bin/tabix -p vcf $OUTPUT.gz + + +inputs: + input_vcf: + type: File + inputBinding: + position: 1 + output_vcf_name: + type: string? + default: "record_merged.vcf" + inputBinding: + position: 2 + distance: + type: int? + default: 1000 + inputBinding: + position: 3 + +outputs: + vcf: + type: File + outputBinding: + glob: "$(inputs.output_vcf_name).gz" + secondaryFiles: [.tbi] From c8214e3d5d3a0e51ca9b4fdf11ff6ac50dbfd163 Mon Sep 17 00:00:00 2001 From: apaul7 Date: Thu, 15 Jul 2021 10:25:49 -0500 Subject: [PATCH 21/35] add joint detect svs subworkflows This runs the sv callers in joint mode, merges, annotates, filters, and stages the results in a directory structure --- definitions/subworkflows/joint_detect_svs.cwl | 293 ++++++++++++++++++ 1 file changed, 293 insertions(+) create mode 100644 definitions/subworkflows/joint_detect_svs.cwl diff --git a/definitions/subworkflows/joint_detect_svs.cwl b/definitions/subworkflows/joint_detect_svs.cwl new file mode 100644 index 00000000..9bbe2bce --- /dev/null +++ b/definitions/subworkflows/joint_detect_svs.cwl @@ -0,0 +1,293 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: Workflow +label: "joint detect svs" +requirements: + - class: SubworkflowFeatureRequirement + - class: ScatterFeatureRequirement +inputs: + reference: + type: + - string + - File + secondaryFiles: [.fai, ^.dict] + bams: + type: File[] + secondaryFiles: [^.bai] + sample_names: + type: string[] + cohort_name: + type: string + exclude_regions: + type: File? + manta_call_regions: + type: File? + manta_output_contigs: + type: boolean? + cnvnator_bin_size: + type: int? + cnvkit_method: + type: + - "null" + - type: enum + symbols: ["hybrid", "amplicon", "wgs"] + cnvkit_reference_cnn: + type: File? + cnvkit_segment_filter: + type: + - "null" + - type: enum + symbols: ["ampdel", "ci", "cn", "sem"] + filter_del_depth: + type: double? + filter_dup_depth: + type: double? + filter_paired_count: + type: int? + filter_split_count: + type: int? + filter_alt_abundance_percentage: + type: double? + filter_depth_caller_min_size: + type: int? + survivor_estimate_sv_distance: + type: boolean + genome_build: + type: string + survivor_max_distance_to_merge: + type: int + survivor_minimum_sv_calls: + type: int + survivor_minimum_sv_size: + type: int + survivor_same_strand: + type: boolean + survivor_same_type: + type: boolean + snps_vcf: + type: File? + filter_blocklist_bedpe: + type: File? + annotsv_filter_pop_af: + type: double? + annotsv_filter_no_CDS: + type: boolean? + annotsv_annotations: + type: + - string + - Directory +outputs: + all_staged: + type: Directory + outputSource: stage_all/gathered_directory +steps: +# stage 1, variant calling + smoove: + run: ../tools/smoove.cwl + in: + bams: bams + cohort_name: cohort_name + reference: reference + exclude_regions: exclude_regions + out: + [output_vcf] + index_smoove: + run: ../tools/index_vcf.cwl + in: + vcf: smoove/output_vcf + out: + [indexed_vcf] + stage_raw_smoove: + run: ../tools/gather_to_sub_directory.cwl + in: + outdir: + valueFrom: "smoove" + files: + source: [index_smoove/indexed_vcf] + linkMerge: merge_flattened + out: + [gathered_directory] + manta: + run: ../tools/manta_germline.cwl + in: + bams: bams + reference: reference + call_regions: manta_call_regions + output_contigs: manta_output_contigs + out: + [diploid_variants, all_candidates, small_candidates, stats] + stage_raw_manta: + run: ../tools/gather_to_sub_directory.cwl + in: + outdir: + valueFrom: "manta" + files: + source: [manta/diploid_variants, manta/all_candidates, manta/small_candidates] + linkMerge: merge_flattened + directory: manta/stats + out: + [gathered_directory] + cnvnator: + run: joint_cnvnator.cwl + in: + reference: reference + sample_names: sample_names + bams: bams + bin_size: cnvnator_bin_size + out: + [vcfs, root_files, cn_files] + stage_raw_cnvnator: + run: ../tools/gather_to_sub_directory.cwl + in: + outdir: + valueFrom: "cnvnator" + files: + source: [cnvnator/vcfs, cnvnator/root_files, cnvnator/cn_files] + linkMerge: merge_flattened + out: + [gathered_directory] + cnvkit: + run: joint_cnvkit.cwl + in: + sample_names: sample_names + bams: bams + reference_fasta: reference + reference_cnn: cnvkit_reference_cnn + method: cnvkit_method + segment_filter: cnvkit_segment_filter + out: + [vcfs, cnr, cns] + stage_raw_cnvkit: + run: ../tools/gather_to_sub_directory.cwl + in: + outdir: + valueFrom: "cnvkit" + files: + source: [cnvkit/vcfs, cnvkit/cnr, cnvkit/cns] + linkMerge: merge_flattened + out: + [gathered_directory] + stage_raw: + run: ../tools/gather_to_sub_directory_dirs.cwl + in: + outdir: + valueFrom: "raw" + directories: + source: [stage_raw_smoove/gathered_directory, stage_raw_manta/gathered_directory, stage_raw_cnvnator/gathered_directory, stage_raw_cnvkit/gathered_directory] + linkMerge: merge_flattened + out: + [gathered_directory] +# stage 2, filtering + filter_smoove: + run: sv_joint_read_caller_filter.cwl + in: + reference: reference + sample_names: sample_names + bams: bams + filter_del_depth: filter_del_depth + filter_dup_depth: filter_dup_depth + filter_paired_count: filter_paired_count + filter_split_count: filter_split_count + filter_alt_abundance_percentage: filter_alt_abundance_percentage + sv_vcf: index_smoove/indexed_vcf + vcf_source: + default: "smoove" + out: + [vcfs] + filter_manta: + run: sv_joint_read_caller_filter.cwl + in: + reference: reference + sample_names: sample_names + bams: bams + filter_del_depth: filter_del_depth + filter_dup_depth: filter_dup_depth + filter_paired_count: filter_paired_count + filter_split_count: filter_split_count + filter_alt_abundance_percentage: filter_alt_abundance_percentage + sv_vcf: manta/diploid_variants + vcf_source: + default: "manta" + out: + [vcfs] + filter_cnvnator: + run: sv_joint_depth_caller_filter.cwl + in: + reference: reference + sample_names: sample_names + bams: bams + filter_del_depth: filter_del_depth + filter_dup_depth: filter_dup_depth + sv_vcfs: cnvnator/vcfs + vcf_source: + default: "cnvnator" + min_sv_size: filter_depth_caller_min_size + out: + [vcfs] + filter_cnvkit: + run: sv_joint_depth_caller_filter.cwl + in: + reference: reference + sample_names: sample_names + bams: bams + filter_del_depth: filter_del_depth + filter_dup_depth: filter_dup_depth + sv_vcfs: cnvkit/vcfs + vcf_source: + default: "cnvkit" + min_sv_size: filter_depth_caller_min_size + out: + [vcfs] + stage_filtered: + run: ../tools/gather_to_sub_directory.cwl + in: + outdir: + valueFrom: "filtered" + files: + source: [filter_smoove/vcfs, filter_manta/vcfs, filter_cnvnator/vcfs, filter_cnvkit/vcfs] + linkMerge: merge_flattened + out: + [gathered_directory] +# stage3, merge+annotate+filter + merge_svs: + run: merge_svs.cwl + in: + cohort_name: cohort_name + estimate_sv_distance: survivor_estimate_sv_distance + genome_build: genome_build + max_distance_to_merge: survivor_max_distance_to_merge + minimum_sv_calls: survivor_minimum_sv_calls + minimum_sv_size: survivor_minimum_sv_size + same_strand: survivor_same_strand + same_type: survivor_same_type + snps_vcf: snps_vcf + sv_vcfs: + source: [filter_smoove/vcfs, filter_manta/vcfs, filter_cnvnator/vcfs, filter_cnvkit/vcfs] + linkMerge: merge_flattened + blocklist_bedpe: filter_blocklist_bedpe + filter_pop_af: annotsv_filter_pop_af + filter_no_CDS: annotsv_filter_no_CDS + annotsv_annotations: annotsv_annotations + out: + [bcftools_merged_sv_vcf, bcftools_merged_annotated_tsv, bcftools_merged_unannotated_tsv, bcftools_merged_filtered_annotated_tsv, survivor_merged_sv_vcf, survivor_merged_annotated_tsv, survivor_merged_unannotated_tsv, survivor_merged_filtered_annotated_tsv] + stage_merged: + run: ../tools/gather_to_sub_directory.cwl + in: + outdir: + valueFrom: "merged" + files: + source: [merge_svs/bcftools_merged_sv_vcf, merge_svs/bcftools_merged_annotated_tsv, merge_svs/bcftools_merged_unannotated_tsv, merge_svs/bcftools_merged_filtered_annotated_tsv, merge_svs/survivor_merged_sv_vcf, merge_svs/survivor_merged_annotated_tsv, merge_svs/survivor_merged_unannotated_tsv, merge_svs/survivor_merged_filtered_annotated_tsv] + linkMerge: merge_flattened + out: + [gathered_directory] + stage_all: + run: ../tools/gather_to_sub_directory_dirs.cwl + in: + outdir: + valueFrom: "SV_pipeline" + directories: + source: [stage_raw/gathered_directory, stage_filtered/gathered_directory, stage_merged/gathered_directory] + linkMerge: merge_flattened + out: + [gathered_directory] From 76237792509dbc7a8b969f38d21949ebfd739395 Mon Sep 17 00:00:00 2001 From: apaul7 Date: Thu, 15 Jul 2021 10:29:09 -0500 Subject: [PATCH 22/35] add joint detect snps subworkflow This generates per sample gvcf files, jointly calls variants with gatk, annotates, filters, and stages the outputs. --- .../subworkflows/joint_detect_snps.cwl | 190 ++++++++++++++++++ 1 file changed, 190 insertions(+) create mode 100644 definitions/subworkflows/joint_detect_snps.cwl diff --git a/definitions/subworkflows/joint_detect_snps.cwl b/definitions/subworkflows/joint_detect_snps.cwl new file mode 100644 index 00000000..a7ec4721 --- /dev/null +++ b/definitions/subworkflows/joint_detect_snps.cwl @@ -0,0 +1,190 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: Workflow +label: "joint germline snp variant detection" +requirements: + - class: MultipleInputFeatureRequirement + - class: SubworkflowFeatureRequirement + - class: SchemaDefRequirement + types: + - $import: ../types/vep_custom_annotation.yml + - class: StepInputExpressionRequirement + - class: InlineJavascriptRequirement + - class: ScatterFeatureRequirement +inputs: + reference: + type: + - string + - File + secondaryFiles: [.fai, ^.dict] + bams: + type: File[] + secondaryFiles: [^.bai] + sample_names: + type: string[] + gvcf_gq_bands: + type: string[] + intervals: + type: + type: array + items: + type: array + items: string + contamination_fraction: + type: string[] + ploidy: + type: int? + vep_cache_dir: + type: + - string + - Directory + vep_ensembl_assembly: + type: string + doc: "genome assembly to use in vep. Examples: GRCh38 or GRCm38" + vep_ensembl_version: + type: string + doc: "ensembl version - Must be present in the cache directory. Example: 95" + vep_ensembl_species: + type: string + doc: "ensembl species - Must be present in the cache directory. Examples: homo_sapiens or mus_musculus" + vep_plugins: + type: string[] + default: [Frameshift, Wildtype] + synonyms_file: + type: File? + annotate_coding_only: + type: boolean? + vep_custom_annotations: + type: ../types/vep_custom_annotation.yml#vep_custom_annotation[] + doc: "custom type, check types directory for input format" + limit_variant_intervals: + type: File + variants_to_table_fields: + type: string[] + default: ['CHROM','POS','ID','REF','ALT'] + variants_to_table_genotype_fields: + type: string[] + vep_to_table_fields: + type: string[] + final_tsv_prefix: + type: string? + default: 'variants' + gnomad_max_pop_af: + type: float + default: 0.05 + min_conf_call: + type: float? +outputs: + sample_gvcfs: + type: File[] + outputSource: per_sample_merge_gvcfs/gvcf + raw_vcf: + type: File + outputSource: genotype/raw_vcf + secondaryFiles: [.tbi] + final_vcf: + type: File + outputSource: genotype/final_vcf + secondaryFiles: [.tbi] + filtered_vcf: + type: File + outputSource: genotype/filtered_vcf + secondaryFiles: [.tbi] + vep_summary: + type: File + outputSource: genotype/vep_summary + final_tsv: + type: File + outputSource: genotype/final_tsv + filtered_tsv: + type: File + outputSource: genotype/filtered_tsv + all_staged: + type: Directory + outputSource: stage_all/gathered_directory +steps: + per_sample_make_gvcfs: + scatter: [bam, contamination_fraction] + scatterMethod: dotproduct + run: gatk_haplotypecaller_iterator.cwl + in: + reference: reference + bam: bams + emit_reference_confidence: + default: 'GVCF' + gvcf_gq_bands: gvcf_gq_bands + intervals: intervals + contamination_fraction: contamination_fraction + ploidy: ploidy + out: + [gvcf] + per_sample_merge_gvcfs: + scatter: [gvcfs, output_file_name] + scatterMethod: dotproduct + run: ../tools/combine_gvcfs.cwl + in: + reference: reference + gvcfs: per_sample_make_gvcfs/gvcf + output_file_name: + source: [sample_names] + valueFrom: "$(self).merged.g.vcf.gz" + out: + [gvcf] + genotype: + run: joint_genotype.cwl + in: + reference: reference + gvcfs: + source: [per_sample_merge_gvcfs/gvcf] + linkMerge: merge_flattened + intervals: intervals + vep_cache_dir: vep_cache_dir + vep_ensembl_assembly: vep_ensembl_assembly + vep_ensembl_version: vep_ensembl_version + vep_ensembl_species: vep_ensembl_species + vep_plugins: vep_plugins + synonyms_file: synonyms_file + annotate_coding_only: annotate_coding_only + vep_custom_annotations: vep_custom_annotations + roi_intervals: limit_variant_intervals + variants_to_table_fields: variants_to_table_fields + variants_to_table_genotype_fields: variants_to_table_genotype_fields + vep_to_table_fields: vep_to_table_fields + final_tsv_prefix: final_tsv_prefix + gnomad_max_pop_af: gnomad_max_pop_af + min_conf_call: min_conf_call + out: + [raw_vcf, annotated_vcf, final_vcf, filtered_vcf, vep_summary, final_tsv, filtered_tsv] + stage_gvcf: + run: ../tools/gather_to_sub_directory.cwl + in: + outdir: + valueFrom: "gvcfs" + files: + source: [per_sample_merge_gvcfs/gvcf] + linkMerge: merge_flattened + out: + [gathered_directory] + + stage_gatk: + run: ../tools/gather_to_sub_directory.cwl + in: + outdir: + valueFrom: "gatk" + files: + source: [genotype/raw_vcf, genotype/annotated_vcf, genotype/final_vcf, genotype/filtered_vcf, genotype/vep_summary, genotype/final_tsv, genotype/filtered_tsv] + linkMerge: merge_flattened + directory: stage_gvcf/gathered_directory + out: + [gathered_directory] + stage_all: + run: ../tools/gather_to_sub_directory_dirs.cwl + in: + outdir: + valueFrom: "SNP_pipeline" + directories: + source: [stage_gatk/gathered_directory] + linkMerge: merge_flattened + out: + [gathered_directory] From 065f8b33a49415719cd565b2f5a587155f142200 Mon Sep 17 00:00:00 2001 From: apaul7 Date: Thu, 15 Jul 2021 10:30:56 -0500 Subject: [PATCH 23/35] add joint detect variants This subworkflow calls the joint detect snps and joint detect svs subworkflows outputing the staged results --- .../subworkflows/joint_detect_variants.cwl | 203 ++++++++++++++++++ 1 file changed, 203 insertions(+) create mode 100644 definitions/subworkflows/joint_detect_variants.cwl diff --git a/definitions/subworkflows/joint_detect_variants.cwl b/definitions/subworkflows/joint_detect_variants.cwl new file mode 100644 index 00000000..e1cf579b --- /dev/null +++ b/definitions/subworkflows/joint_detect_variants.cwl @@ -0,0 +1,203 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: Workflow +label: "joint variant detection(snps,svs)" +requirements: + - class: MultipleInputFeatureRequirement + - class: SubworkflowFeatureRequirement + - class: SchemaDefRequirement + types: + - $import: ../types/vep_custom_annotation.yml + - class: StepInputExpressionRequirement + - class: InlineJavascriptRequirement + - class: ScatterFeatureRequirement +inputs: + reference: + type: + - string + - File + secondaryFiles: [.fai, ^.dict] + bams: + type: File[] + secondaryFiles: [^.bai] + sample_names: + type: string[] + cohort_name: + type: string + gvcf_gq_bands: + type: string[] + intervals: + type: + type: array + items: + type: array + items: string + contamination_fraction: + type: string[] + ploidy: + type: int? + vep_cache_dir: + type: + - string + - Directory + vep_ensembl_assembly: + type: string + doc: "genome assembly to use in vep. Examples: GRCh38 or GRCm38" + vep_ensembl_version: + type: string + doc: "ensembl version - Must be present in the cache directory. Example: 95" + vep_ensembl_species: + type: string + doc: "ensembl species - Must be present in the cache directory. Examples: homo_sapiens or mus_musculus" + vep_plugins: + type: string[] + default: [Frameshift, Wildtype] + synonyms_file: + type: File? + annotate_coding_only: + type: boolean? + vep_custom_annotations: + type: ../types/vep_custom_annotation.yml#vep_custom_annotation[] + doc: "custom type, check types directory for input format" + limit_variant_intervals: + type: File + snp_to_table_fields: + type: string[] + default: ['CHROM','POS','ID','REF','ALT'] + snp_to_table_genotype_fields: + type: string[] + vep_to_table_fields: + type: string[] + snp_final_tsv_prefix: + type: string? + default: 'variants' + snp_gnomad_max_pop_af: + type: float + default: 0.05 + gatk_min_conf_call: + type: float? + + + sv_exclude_regions: + type: File? + manta_call_regions: + type: File? + manta_output_contigs: + type: boolean? + cnvnator_bin_size: + type: int? + cnvkit_method: + type: + - "null" + - type: enum + symbols: ["hybrid", "amplicon", "wgs"] + cnvkit_reference_cnn: + type: File? + cnvkit_segment_filter: + type: + - "null" + - type: enum + symbols: ["ampdel", "ci", "cn", "sem"] + sv_filter_del_depth: + type: double? + sv_filter_dup_depth: + type: double? + sv_filter_paired_count: + type: int? + sv_filter_split_count: + type: int? + sv_filter_alt_abundance_percentage: + type: double? + sv_filter_depth_caller_min_size: + type: int? + survivor_estimate_sv_distance: + type: boolean + survivor_max_distance_to_merge: + type: int + survivor_minimum_sv_calls: + type: int + survivor_minimum_sv_size: + type: int + survivor_same_strand: + type: boolean + survivor_same_type: + type: boolean + sv_filter_blocklist_bedpe: + type: File? + annotsv_filter_pop_af: + type: double? + annotsv_filter_no_CDS: + type: boolean? + annotsv_annotations: + type: + - string + - Directory +outputs: + snps_staged: + type: Directory + outputSource: detect_snps/all_staged + svs_staged: + type: Directory + outputSource: detect_svs/all_staged +steps: + detect_snps: + run: joint_detect_snps.cwl + in: + reference: reference + bams: bams + sample_names: sample_names + gvcf_gq_bands: gvcf_gq_bands + intervals: intervals + contamination_fraction: contamination_fraction + ploidy: ploidy + vep_cache_dir: vep_cache_dir + vep_ensembl_assembly: vep_ensembl_assembly + vep_ensembl_version: vep_ensembl_version + vep_ensembl_species: vep_ensembl_species + vep_plugins: vep_plugins + synonyms_file: synonyms_file + annotate_coding_only: annotate_coding_only + vep_custom_annotations: vep_custom_annotations + limit_variant_intervals: limit_variant_intervals + variants_to_table_fields: snp_to_table_fields + variants_to_table_genotype_fields: snp_to_table_genotype_fields + vep_to_table_fields: vep_to_table_fields + final_tsv_prefix: snp_final_tsv_prefix + gnomad_max_pop_af: snp_gnomad_max_pop_af + min_conf_call: gatk_min_conf_call + out: + [raw_vcf, all_staged] + detect_svs: + run: joint_detect_svs.cwl + in: + reference: reference + bams: bams + sample_names: sample_names + cohort_name: cohort_name + genome_build: vep_ensembl_assembly + exclude_regions: sv_exclude_regions + manta_call_regions: manta_call_regions + manta_output_contigs: manta_output_contigs + cnvnator_bin_size: cnvnator_bin_size + cnvkit_method: cnvkit_method + cnvkit_reference_cnn: cnvkit_reference_cnn + cnvkit_segment_filter: cnvkit_segment_filter + filter_del_depth: sv_filter_del_depth + filter_dup_depth: sv_filter_dup_depth + filter_paired_count: sv_filter_paired_count + filter_split_count: sv_filter_split_count + filter_alt_abundance_percentage: sv_filter_alt_abundance_percentage + filter_depth_caller_min_size: sv_filter_depth_caller_min_size + survivor_estimate_sv_distance: survivor_estimate_sv_distance + survivor_max_distance_to_merge: survivor_max_distance_to_merge + survivor_minimum_sv_calls: survivor_minimum_sv_calls + survivor_minimum_sv_size: survivor_minimum_sv_size + survivor_same_strand: survivor_same_strand + survivor_same_type: survivor_same_type + snps_vcf: detect_snps/raw_vcf + filter_blocklist_bedpe: sv_filter_blocklist_bedpe + annotsv_filter_pop_af: annotsv_filter_pop_af + annotsv_annotations: annotsv_annotations + out: + [all_staged] From 70005be012b410c8ab66ee323b351966e26f3d65 Mon Sep 17 00:00:00 2001 From: apaul7 Date: Thu, 15 Jul 2021 10:41:30 -0500 Subject: [PATCH 24/35] pass annotsv_annotations input to subworkflow --- definitions/pipelines/germline_wgs.cwl | 1 + 1 file changed, 1 insertion(+) diff --git a/definitions/pipelines/germline_wgs.cwl b/definitions/pipelines/germline_wgs.cwl index a29bc142..ca61187a 100644 --- a/definitions/pipelines/germline_wgs.cwl +++ b/definitions/pipelines/germline_wgs.cwl @@ -473,6 +473,7 @@ steps: sv_split_count: sv_filter_split_count genome_build: vep_ensembl_assembly blocklist_bedpe: blocklist_bedpe + annotsv_annotations: annotsv_annotations out: [cn_diagram, cn_scatter_plot, tumor_antitarget_coverage, tumor_target_coverage, tumor_bin_level_ratios, tumor_segmented_ratios, cnvkit_vcf, cnvnator_cn_file, cnvnator_root, cnvnator_vcf, manta_diploid_variants, manta_somatic_variants, manta_all_candidates, manta_small_candidates, manta_tumor_only_variants, smoove_output_variants, cnvkit_filtered_vcf, cnvnator_filtered_vcf, manta_filtered_vcf, smoove_filtered_vcf, survivor_merged_vcf, survivor_merged_annotated_tsv, bcftools_merged_vcf, bcftools_merged_annotated_tsv, bcftools_merged_filtered_annotated_tsv] add_disclaimer_survivor_sv_vcf: From 0885e03b9381c1e5c84f8a2f45f6e8f52c4e6f35 Mon Sep 17 00:00:00 2001 From: apaul7 Date: Thu, 15 Jul 2021 12:49:30 -0500 Subject: [PATCH 25/35] pass soft filtered annotated vcf as output --- definitions/subworkflows/joint_genotype.cwl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/definitions/subworkflows/joint_genotype.cwl b/definitions/subworkflows/joint_genotype.cwl index 76c6aa4c..d4bb1edb 100644 --- a/definitions/subworkflows/joint_genotype.cwl +++ b/definitions/subworkflows/joint_genotype.cwl @@ -76,7 +76,7 @@ outputs: secondaryFiles: [.tbi] annotated_vcf: type: File - outputSource: bgzip_index_annotated_vcf/indexed_vcf + outputSource: soft_filter/filtered_vcf secondaryFiles: [.tbi] final_vcf: type: File From e0449dce23bf7444902b3f9dd6e273e6d44fe9ef Mon Sep 17 00:00:00 2001 From: apaul7 Date: Wed, 17 Nov 2021 10:30:56 -0600 Subject: [PATCH 26/35] remove doc line for easy to understand input --- definitions/tools/bcftools_view.cwl | 1 - 1 file changed, 1 deletion(-) diff --git a/definitions/tools/bcftools_view.cwl b/definitions/tools/bcftools_view.cwl index e43b67e4..b43bcb03 100644 --- a/definitions/tools/bcftools_view.cwl +++ b/definitions/tools/bcftools_view.cwl @@ -26,7 +26,6 @@ inputs: inputBinding: position: 4 prefix: "--output-type" - doc: "output file format" output_vcf_name: type: string? default: "bcftools_split.vcf.gz" From 20f96d842ae2adc64f8dbfe511589abca22bc4a5 Mon Sep 17 00:00:00 2001 From: apaul7 Date: Wed, 17 Nov 2021 10:31:52 -0600 Subject: [PATCH 27/35] ubuntu:xenial -> ubuntu:focal docker image --- definitions/tools/gather_to_sub_directory.cwl | 2 +- definitions/tools/gather_to_sub_directory_dirs.cwl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/definitions/tools/gather_to_sub_directory.cwl b/definitions/tools/gather_to_sub_directory.cwl index 1980cb47..8a885d14 100644 --- a/definitions/tools/gather_to_sub_directory.cwl +++ b/definitions/tools/gather_to_sub_directory.cwl @@ -6,7 +6,7 @@ baseCommand: ["/bin/bash","directory_gatherer.sh"] requirements: - class: DockerRequirement - dockerPull: "ubuntu:xenial" + dockerPull: "ubuntu:focal" - class: ResourceRequirement ramMin: 1000 - class: InitialWorkDirRequirement diff --git a/definitions/tools/gather_to_sub_directory_dirs.cwl b/definitions/tools/gather_to_sub_directory_dirs.cwl index a83b45ad..226cc6a8 100644 --- a/definitions/tools/gather_to_sub_directory_dirs.cwl +++ b/definitions/tools/gather_to_sub_directory_dirs.cwl @@ -6,7 +6,7 @@ baseCommand: ["/bin/bash","directory_gatherer.sh"] requirements: - class: DockerRequirement - dockerPull: "ubuntu:xenial" + dockerPull: "ubuntu:focal" - class: ResourceRequirement ramMin: 1000 - class: InitialWorkDirRequirement From 91b0e5fbd0e9ca4f2bc0467239aed40b9d087627 Mon Sep 17 00:00:00 2001 From: apaul7 Date: Wed, 17 Nov 2021 10:37:14 -0600 Subject: [PATCH 28/35] quote parameters in script --- definitions/tools/custom_merge_sv_records.cwl | 6 +++--- definitions/tools/gather_to_sub_directory.cwl | 2 +- definitions/tools/gather_to_sub_directory_dirs.cwl | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/definitions/tools/custom_merge_sv_records.cwl b/definitions/tools/custom_merge_sv_records.cwl index df6d267d..e0770013 100644 --- a/definitions/tools/custom_merge_sv_records.cwl +++ b/definitions/tools/custom_merge_sv_records.cwl @@ -19,10 +19,10 @@ requirements: INPUT="$1" OUTPUT="$2" DISTANCE="$3" - /usr/local/bin/python3 /opt/git/merge-sv-records/merge.py -i $INPUT -o $OUTPUT -w $DISTANCE + /usr/local/bin/python3 /opt/git/merge-sv-records/merge.py -i "$INPUT" -o "$OUTPUT" -w "$DISTANCE" - /usr/local/bin/bgzip $OUTPUT - /usr/local/bin/tabix -p vcf $OUTPUT.gz + /usr/local/bin/bgzip "$OUTPUT" + /usr/local/bin/tabix -p vcf "$OUTPUT".gz inputs: diff --git a/definitions/tools/gather_to_sub_directory.cwl b/definitions/tools/gather_to_sub_directory.cwl index 8a885d14..16a759a5 100644 --- a/definitions/tools/gather_to_sub_directory.cwl +++ b/definitions/tools/gather_to_sub_directory.cwl @@ -19,7 +19,7 @@ requirements: files="${@:2}" mkdir $outdir chmod -R 777 $outdir - cp --recursive --preserve --no-clobber --target-directory $outdir $files + cp --recursive --preserve --no-clobber --target-directory "$outdir" "$files" exit 0 diff --git a/definitions/tools/gather_to_sub_directory_dirs.cwl b/definitions/tools/gather_to_sub_directory_dirs.cwl index 226cc6a8..7fc56595 100644 --- a/definitions/tools/gather_to_sub_directory_dirs.cwl +++ b/definitions/tools/gather_to_sub_directory_dirs.cwl @@ -19,7 +19,7 @@ requirements: files="${@:2}" mkdir $outdir chmod -R 777 $outdir - cp --recursive --preserve --no-clobber --target-directory $outdir $files + cp --recursive --preserve --no-clobber --target-directory "$outdir" "$files" exit 0 From 8079ab457780c2d9f7e5f5488529e6a87dfd0163 Mon Sep 17 00:00:00 2001 From: apaul7 Date: Fri, 3 Dec 2021 15:47:16 -0600 Subject: [PATCH 29/35] fix quotes --- definitions/tools/gather_to_sub_directory.cwl | 6 +++--- definitions/tools/gather_to_sub_directory_dirs.cwl | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/definitions/tools/gather_to_sub_directory.cwl b/definitions/tools/gather_to_sub_directory.cwl index 16a759a5..10b9df49 100644 --- a/definitions/tools/gather_to_sub_directory.cwl +++ b/definitions/tools/gather_to_sub_directory.cwl @@ -17,9 +17,9 @@ requirements: outdir="$1" files="${@:2}" - mkdir $outdir - chmod -R 777 $outdir - cp --recursive --preserve --no-clobber --target-directory "$outdir" "$files" + mkdir "$outdir" + chmod -R 777 "$outdir" + cp --recursive --preserve --no-clobber --target-directory "$outdir" $files exit 0 diff --git a/definitions/tools/gather_to_sub_directory_dirs.cwl b/definitions/tools/gather_to_sub_directory_dirs.cwl index 7fc56595..88c6c0c8 100644 --- a/definitions/tools/gather_to_sub_directory_dirs.cwl +++ b/definitions/tools/gather_to_sub_directory_dirs.cwl @@ -17,9 +17,9 @@ requirements: outdir="$1" files="${@:2}" - mkdir $outdir - chmod -R 777 $outdir - cp --recursive --preserve --no-clobber --target-directory "$outdir" "$files" + mkdir "$outdir" + chmod -R 777 "$outdir" + cp --recursive --preserve --no-clobber --target-directory "$outdir" $files exit 0 From 9b6a9cb81e9208a8dc8e99acfbde866c57efe6cf Mon Sep 17 00:00:00 2001 From: apaul7 Date: Fri, 3 Dec 2021 15:49:40 -0600 Subject: [PATCH 30/35] move script inline cwl file --- definitions/tools/custom_merge_sv_records.cwl | 69 +++++++++++++++---- 1 file changed, 56 insertions(+), 13 deletions(-) diff --git a/definitions/tools/custom_merge_sv_records.cwl b/definitions/tools/custom_merge_sv_records.cwl index e0770013..1f134761 100644 --- a/definitions/tools/custom_merge_sv_records.cwl +++ b/definitions/tools/custom_merge_sv_records.cwl @@ -4,46 +4,89 @@ cwlVersion: v1.0 class: CommandLineTool label: "merges nearby DEL/DUP records within a certain window distance" -baseCommand: ["/bin/bash", "run_merge.sh"] +baseCommand: ["python3", "merge.py"] requirements: - class: ResourceRequirement ramMin: 4000 - class: DockerRequirement - dockerPull: "apaul7/analysis:1.0.0" + dockerPull: "griffithlab/vatools:4.1.0" - class: InitialWorkDirRequirement listing: - - entryname: "run_merge.sh" + - entryname: "merge.py" entry: | - #!/bin/bash - set -eou pipefail - INPUT="$1" - OUTPUT="$2" - DISTANCE="$3" - /usr/local/bin/python3 /opt/git/merge-sv-records/merge.py -i "$INPUT" -o "$OUTPUT" -w "$DISTANCE" + import argparse + import vcfpy + from collections import OrderedDict - /usr/local/bin/bgzip "$OUTPUT" - /usr/local/bin/tabix -p vcf "$OUTPUT".gz + parser = argparse.ArgumentParser() + parser.add_argument('--input', '-i', dest="input", help='input vcf file', required=True, action="store") + parser.add_argument('--output', '-o', dest="output", help='output vcf file', required=False, default="out.vcf", action="store") + parser.add_argument('--window', '-w', dest="window", help='max merge window size', required=False, default=1000, type=int, action="store") + args = parser.parse_args() + in_vcf_name = args.input + out_vcf_name = args.output + window_size = args.window + + reader = vcfpy.Reader.from_path(in_vcf_name) + new_header = reader.header + new_header.add_filter_line(vcfpy.OrderedDict([('ID', 'MERGED_CALL'), ('Description', 'Record merged from 2 or more individual records')])) + + writer = vcfpy.Writer.from_path(out_vcf_name, new_header) + new_record_count = 0 + merge_records = [] + for record in reader: + if((len(merge_records) == 0) or (merge_records[-1].CHROM != record.CHROM) or (merge_records[-1].INFO['SVTYPE'] != record.INFO['SVTYPE']) or (abs(merge_records[-1].INFO['END'] - record.POS) > window_size)): + if(len(merge_records) > 1): + new_record_count = new_record_count + 1 + new_record_chr = merge_records[0].CHROM + new_record_start = merge_records[0].POS + new_record_end = merge_records[-1].INFO['END'] + new_record_type = merge_records[0].INFO['SVTYPE'] + new_record_svlen = new_record_end - new_record_start + + info = OrderedDict({"SVTYPE": new_record_type, "END": new_record_end, "SVLEN": new_record_svlen}) + alt = vcfpy.SymbolicAllele(new_record_type) + sample_calls = [] + for sample in merge_records[0].calls: + gt = OrderedDict({"GT": "/".join(map(str, sample.gt_alleles)).replace("None",".")}) + name = sample.sample + sample_calls.append(vcfpy.Call(name, gt)) + + new_record = vcfpy.Record(new_record_chr, new_record_start, [], "N", [alt], ".", ["MERGED_CALL"], info, ["GT"], sample_calls) + writer.write_record(new_record) + merge_records = [record] + else: + merge_records = [record] + writer.write_record(record) + next + + dist = abs(merge_records[-1].INFO['END'] - record.POS) + if(dist < window_size): + merge_records.append(record) + print(f"Found {new_record_count} records that can be merged based on the input {window_size} distance") inputs: input_vcf: type: File inputBinding: + prefix: "-i" position: 1 output_vcf_name: type: string? default: "record_merged.vcf" inputBinding: + prefix: "-o" position: 2 distance: type: int? default: 1000 inputBinding: + prefix: "-w" position: 3 outputs: vcf: type: File outputBinding: - glob: "$(inputs.output_vcf_name).gz" - secondaryFiles: [.tbi] + glob: "$(inputs.output_vcf_name)" From 6e5cb2304fba57d37374c9fbc449ad1b080ef258 Mon Sep 17 00:00:00 2001 From: apaul7 Date: Fri, 3 Dec 2021 15:52:04 -0600 Subject: [PATCH 31/35] add input option for output file basename --- definitions/subworkflows/gatk_soft_filter.cwl | 6 ++++-- definitions/subworkflows/joint_genotype.cwl | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/definitions/subworkflows/gatk_soft_filter.cwl b/definitions/subworkflows/gatk_soft_filter.cwl index 61780268..4f3b76af 100644 --- a/definitions/subworkflows/gatk_soft_filter.cwl +++ b/definitions/subworkflows/gatk_soft_filter.cwl @@ -16,6 +16,9 @@ inputs: vcf: type: File secondaryFiles: [.tbi] + output_basename: + type: string? + default: "soft_filtered" outputs: filtered_vcf: type: File @@ -69,8 +72,7 @@ steps: merge: run: ../tools/merge_vcf.cwl in: - merged_vcf_basename: - default: "soft_filtered" + merged_vcf_basename: output_basename vcfs: source: [filter_snps/filtered_vcf, filter_indels/filtered_vcf] linkMerge: merge_flattened diff --git a/definitions/subworkflows/joint_genotype.cwl b/definitions/subworkflows/joint_genotype.cwl index d4bb1edb..80e8bff6 100644 --- a/definitions/subworkflows/joint_genotype.cwl +++ b/definitions/subworkflows/joint_genotype.cwl @@ -172,6 +172,8 @@ steps: in: reference: reference vcf: bgzip_index_annotated_vcf/indexed_vcf + output_basename: + default: "annotated" out: [filtered_vcf] filter_vcf: From ac09653fb1931113cca2585d5888934077624d17 Mon Sep 17 00:00:00 2001 From: Alex Paul Date: Mon, 6 Dec 2021 10:07:29 -0600 Subject: [PATCH 32/35] Update definitions/subworkflows/gatk_soft_filter.cwl Co-authored-by: Thomas B. Mooney --- definitions/subworkflows/gatk_soft_filter.cwl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/definitions/subworkflows/gatk_soft_filter.cwl b/definitions/subworkflows/gatk_soft_filter.cwl index 4f3b76af..eaf6845b 100644 --- a/definitions/subworkflows/gatk_soft_filter.cwl +++ b/definitions/subworkflows/gatk_soft_filter.cwl @@ -2,7 +2,7 @@ cwlVersion: v1.0 class: Workflow -label: "apply soft filtering to a gatk called vcf using hard filter paramaters" +label: "apply soft filtering to a gatk called vcf using hard filter parameters" requirements: - class: SubworkflowFeatureRequirement - class: StepInputExpressionRequirement From 1cb02546fca81fa997503c1035478e7527cdb60f Mon Sep 17 00:00:00 2001 From: Alex Paul Date: Mon, 6 Dec 2021 10:07:35 -0600 Subject: [PATCH 33/35] Update definitions/tools/bcftools_view.cwl Co-authored-by: Thomas B. Mooney --- definitions/tools/bcftools_view.cwl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/definitions/tools/bcftools_view.cwl b/definitions/tools/bcftools_view.cwl index b43bcb03..4efeb9a0 100644 --- a/definitions/tools/bcftools_view.cwl +++ b/definitions/tools/bcftools_view.cwl @@ -17,7 +17,7 @@ inputs: inputBinding: position: 1 prefix: "--samples" - doc: "comma separated list of samples to include (or exclude with '^' prefix)" + doc: "comma-separated list of samples to include (or exclude with '^' prefix)" output_type: type: type: enum From c253e6f8513462c43612d859d11f1a59bcb88405 Mon Sep 17 00:00:00 2001 From: apaul7 Date: Tue, 7 Dec 2021 08:36:36 -0600 Subject: [PATCH 34/35] add doc for output type --- definitions/tools/bcftools_merge.cwl | 2 +- definitions/tools/bcftools_view.cwl | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/definitions/tools/bcftools_merge.cwl b/definitions/tools/bcftools_merge.cwl index 57daeaec..17909417 100644 --- a/definitions/tools/bcftools_merge.cwl +++ b/definitions/tools/bcftools_merge.cwl @@ -43,7 +43,7 @@ inputs: inputBinding: position: 4 prefix: "--output-type" - doc: "output file format" + doc: "output file format, b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF" output_vcf_name: type: string? default: "bcftools_merged.vcf.gz" diff --git a/definitions/tools/bcftools_view.cwl b/definitions/tools/bcftools_view.cwl index 4efeb9a0..fa2e18e6 100644 --- a/definitions/tools/bcftools_view.cwl +++ b/definitions/tools/bcftools_view.cwl @@ -26,6 +26,7 @@ inputs: inputBinding: position: 4 prefix: "--output-type" + doc: "output file format, b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF" output_vcf_name: type: string? default: "bcftools_split.vcf.gz" From 140e6ebbcdbe95f917d5f71728d1fa4cf19b2bb7 Mon Sep 17 00:00:00 2001 From: apaul7 Date: Thu, 9 Dec 2021 09:37:16 -0600 Subject: [PATCH 35/35] use bash arrays to quote multiple vars --- definitions/tools/gather_to_sub_directory.cwl | 4 ++-- definitions/tools/gather_to_sub_directory_dirs.cwl | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/definitions/tools/gather_to_sub_directory.cwl b/definitions/tools/gather_to_sub_directory.cwl index 10b9df49..d4ad5fad 100644 --- a/definitions/tools/gather_to_sub_directory.cwl +++ b/definitions/tools/gather_to_sub_directory.cwl @@ -16,10 +16,10 @@ requirements: set -eou pipefail outdir="$1" - files="${@:2}" + files=("${@:2}") mkdir "$outdir" chmod -R 777 "$outdir" - cp --recursive --preserve --no-clobber --target-directory "$outdir" $files + cp --recursive --preserve --no-clobber --target-directory "$outdir" "${files[@]}" exit 0 diff --git a/definitions/tools/gather_to_sub_directory_dirs.cwl b/definitions/tools/gather_to_sub_directory_dirs.cwl index 88c6c0c8..efc14d63 100644 --- a/definitions/tools/gather_to_sub_directory_dirs.cwl +++ b/definitions/tools/gather_to_sub_directory_dirs.cwl @@ -16,10 +16,10 @@ requirements: set -eou pipefail outdir="$1" - files="${@:2}" + files=("${@:2}") mkdir "$outdir" chmod -R 777 "$outdir" - cp --recursive --preserve --no-clobber --target-directory "$outdir" $files + cp --recursive --preserve --no-clobber --target-directory "$outdir" "${files[@]}" exit 0