From 5ef76b23d0d593c33ae354a1e658cef4553f6a02 Mon Sep 17 00:00:00 2001
From: apaul7 <alex.paul@wustl.edu>
Date: Thu, 15 Jul 2021 08:55:48 -0500
Subject: [PATCH 01/35] add input to control full output filename

---
 definitions/tools/replace_vcf_sample_name.cwl | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/definitions/tools/replace_vcf_sample_name.cwl b/definitions/tools/replace_vcf_sample_name.cwl
index 6c1e82fa..a584c2bc 100644
--- a/definitions/tools/replace_vcf_sample_name.cwl
+++ b/definitions/tools/replace_vcf_sample_name.cwl
@@ -16,8 +16,6 @@ requirements:
         entry: |
           #!/bin/bash
           set -eou pipefail
-          basen=`basename "$3"`
-          basen="renamed.$basen"
 
           #escape spaces, otherwise bcftools will try to use them as a delimiter
           #triple backslash to escape within backticks and then again within sed
@@ -25,7 +23,7 @@ requirements:
           new_name=`echo "$2" | sed 's/ /\\\ /g'`
 
           echo "$old_name $new_name" > sample_update.txt
-          /opt/bcftools/bin/bcftools reheader -s sample_update.txt -o "$basen" "$3"
+          /opt/bcftools/bin/bcftools reheader -s sample_update.txt -o "$4" "$3"
 
 inputs:
     input_vcf:
@@ -43,9 +41,14 @@ inputs:
         inputBinding:
             position: 2
         doc: "Sample name to replace the other"
-
+    output_name:
+       type: string?
+       inputBinding:
+           position: 4
+       default: "renamed.$(inputs.input_vcf.basename)"
+       doc: "output filename for vcf"
 outputs:
     renamed_vcf:
         type: File
         outputBinding:
-            glob: $("renamed." + inputs.input_vcf.basename)
+            glob: "$(inputs.output_name)"

From 15db8c7904d56466e6b48ec043f929b7255f5ab8 Mon Sep 17 00:00:00 2001
From: apaul7 <alex.paul@wustl.edu>
Date: Thu, 15 Jul 2021 08:58:30 -0500
Subject: [PATCH 02/35] add minimum confidence input for gatk calls

---
 definitions/tools/gatk_genotypegvcfs.cwl | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/definitions/tools/gatk_genotypegvcfs.cwl b/definitions/tools/gatk_genotypegvcfs.cwl
index 23c4f1b6..258fd42d 100644
--- a/definitions/tools/gatk_genotypegvcfs.cwl
+++ b/definitions/tools/gatk_genotypegvcfs.cwl
@@ -44,6 +44,12 @@ inputs:
                   prefix: "-L"
         inputBinding:
             position: 4
+    min_conf_call:
+        type: float?
+        inputBinding:
+            prefix: "-stand-call-conf"
+            position: 5
+        doc: "The minimum phred-scaled confidence threshold at which variants should be called"
 outputs:
     genotype_vcf:
         type: File

From cc85818d7d3dfcb135e0365041b740586375e61d Mon Sep 17 00:00:00 2001
From: apaul7 <alex.paul@wustl.edu>
Date: Thu, 15 Jul 2021 09:15:31 -0500
Subject: [PATCH 03/35] s/all_cds/no_cds/

update input name to be clear that the no_cds filter does not run the coding sequences filter
---
 definitions/subworkflows/merge_svs.cwl |  3 +++
 definitions/tools/annotsv_filter.cwl   | 10 +++++-----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/definitions/subworkflows/merge_svs.cwl b/definitions/subworkflows/merge_svs.cwl
index 02373d00..1f6585a6 100644
--- a/definitions/subworkflows/merge_svs.cwl
+++ b/definitions/subworkflows/merge_svs.cwl
@@ -32,6 +32,8 @@ inputs:
         type: File[]
     blocklist_bedpe:
         type: File?
+    filter_no_CDS:
+        type: boolean?
 outputs:
     bcftools_merged_sv_vcf:
         type: File
@@ -123,5 +125,6 @@ steps:
             annotsv_tsv: bcftools_annotate_variants/sv_variants_tsv
             filtering_frequency:
                 default: "0.05"
+            no_CDS: filter_no_CDS
         out:
             [filtered_tsv]
diff --git a/definitions/tools/annotsv_filter.cwl b/definitions/tools/annotsv_filter.cwl
index 85a16272..9ed40a43 100644
--- a/definitions/tools/annotsv_filter.cwl
+++ b/definitions/tools/annotsv_filter.cwl
@@ -23,14 +23,14 @@ requirements:
             parser.add_argument('--input', '-i', dest="input", help='input AnnotSV tsv file', required=True, action="store")
             parser.add_argument('--output', '-o', dest="output", help='output tsv file name', required=True, action="store")
             parser.add_argument('--filtering_frequency', dest="filtering_frequency", help="frequency to filter with", action="store", type=float, default="0.05")
-            parser.add_argument('--all-CDS', dest="CDS", help="Do not require a positive CoDing Sequence overlap", action="store_true")
+            parser.add_argument('--no-CDS', dest="CDS", help="Do not require a positive CoDing Sequence overlap", action="store_true")
             parser.add_argument('--ignore-pass-filter', dest="filter", help="Do not require calls to have a PASS filter", action="store_true")
 
             args = parser.parse_args()
             input_file_name  = args.input
             output_file_name = args.output
             filtering_frequency = args.filtering_frequency
-            all_cds = args.CDS
+            no_cds = args.CDS
             ignore_pass_filter = args.filter
 
             with open(input_file_name, 'r') as file_in, open(output_file_name, 'w') as file_out:
@@ -43,7 +43,7 @@ requirements:
                     total_sv_count += 1
                     if(row['AnnotSV type'] == 'split' \
                         and (row['FILTER'] == 'PASS' or ignore_pass_filter) \
-                        and (int(row['CDS length']) > 0 or all_cds) \
+                        and (int(row['CDS length']) > 0 or no_cds) \
                         and float(row['IMH_AF']) < filtering_frequency
                         and float(row['1000g_max_AF']) < filtering_frequency
                         and not(float(row['DGV_LOSS_Frequency']) > filtering_frequency and 'DEL' in row['SV type']) 
@@ -55,11 +55,11 @@ requirements:
                 print("total sv passed count:",pass_sv_count)
 
 inputs:
-    all_CDS:
+    no_CDS:
         type: boolean?
         inputBinding:
             position: 1
-            prefix: "--all-CDS"
+            prefix: "--no-CDS"
     annotsv_tsv:
         type: File
         inputBinding:

From 9e8876d043191ef1f2761b5622bec916674d53ea Mon Sep 17 00:00:00 2001
From: apaul7 <alex.paul@wustl.edu>
Date: Thu, 15 Jul 2021 09:25:31 -0500
Subject: [PATCH 04/35] add survivor merged annotsv tsv filtering

allows filtering of survivor merged annotsv tsv. Also allow control over
the population allele frequency value, still defaults to 0.05.
---
 definitions/subworkflows/merge_svs.cwl | 28 +++++++++++++++++++++++---
 definitions/tools/annotsv_filter.cwl   | 14 ++++++++++---
 2 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/definitions/subworkflows/merge_svs.cwl b/definitions/subworkflows/merge_svs.cwl
index 1f6585a6..464529e6 100644
--- a/definitions/subworkflows/merge_svs.cwl
+++ b/definitions/subworkflows/merge_svs.cwl
@@ -32,6 +32,9 @@ inputs:
         type: File[]
     blocklist_bedpe:
         type: File?
+    filter_pop_af:
+        type: double?
+        default: "0.05"
     filter_no_CDS:
         type: boolean?
 outputs:
@@ -50,6 +53,9 @@ outputs:
     survivor_merged_annotated_tsv:
         type: File
         outputSource: survivor_annotate_variants/sv_variants_tsv
+    survivor_merged_filtered_annotated_tsv:
+        type: File
+        outputSource: survivor_annotsv_filter/filtered_tsv
 steps:
     survivor_merge_sv_vcfs:
         run: ../tools/survivor.cwl
@@ -86,6 +92,18 @@ steps:
                 valueFrom: ${ return [ self ]; }
         out:
             [sv_variants_tsv]
+    survivor_annotsv_filter:
+        run: ../tools/annotsv_filter.cwl
+        in:
+            annotsv_tsv: survivor_annotate_variants/annotated_tsv
+            filtering_frequency: filter_pop_af
+            no_CDS: filter_no_CDS
+            survivor_merged:
+                default: true
+            output_tsv_name:
+                default: "survivor-merged-AnnotSV-filtered.tsv"
+        out:
+            [filtered_tsv]
     bcftools_merge_sv_vcfs:
         run: ../tools/bcftools_merge.cwl
         in:
@@ -122,9 +140,13 @@ steps:
     bcftools_annotsv_filter:
         run: ../tools/annotsv_filter.cwl
         in:
-            annotsv_tsv: bcftools_annotate_variants/sv_variants_tsv
-            filtering_frequency:
-                default: "0.05"
+            annotsv_tsv: bcftools_annotate_variants/annotated_tsv
+            filtering_frequency: filter_pop_af
             no_CDS: filter_no_CDS
+            survivor_merged:
+                default: false
+            output_tsv_name:
+                default: "bcftools-merged-AnnotSV-filtered.tsv"
+
         out:
             [filtered_tsv]
diff --git a/definitions/tools/annotsv_filter.cwl b/definitions/tools/annotsv_filter.cwl
index 9ed40a43..e69c4f30 100644
--- a/definitions/tools/annotsv_filter.cwl
+++ b/definitions/tools/annotsv_filter.cwl
@@ -25,6 +25,7 @@ requirements:
             parser.add_argument('--filtering_frequency', dest="filtering_frequency", help="frequency to filter with", action="store", type=float, default="0.05")
             parser.add_argument('--no-CDS', dest="CDS", help="Do not require a positive CoDing Sequence overlap", action="store_true")
             parser.add_argument('--ignore-pass-filter', dest="filter", help="Do not require calls to have a PASS filter", action="store_true")
+            parser.add_argument('--survivor-merged', dest="survivor", help="survivor merge filtering, drop the last filter step", action="store_true")
 
             args = parser.parse_args()
             input_file_name  = args.input
@@ -32,6 +33,7 @@ requirements:
             filtering_frequency = args.filtering_frequency
             no_cds = args.CDS
             ignore_pass_filter = args.filter
+            survivor_merged = args.survivor
 
             with open(input_file_name, 'r') as file_in, open(output_file_name, 'w') as file_out:
                 file_in = csv.DictReader(file_in, delimiter='\t')
@@ -47,8 +49,8 @@ requirements:
                         and float(row['IMH_AF']) < filtering_frequency
                         and float(row['1000g_max_AF']) < filtering_frequency
                         and not(float(row['DGV_LOSS_Frequency']) > filtering_frequency and 'DEL' in row['SV type']) 
-                        and not(float(row['DGV_GAIN_Frequency']) < filtering_frequency and ('DUP' in row['SV type'] or 'INS' in row['SV type']))
-                        and not(('Manta' in row['ID'] and 'IMPRECISE' in row['INFO']) or (row['QUAL'] != '.' and 'IMPRECISE' in row['INFO'])) ):
+                        and not(float(row['DGV_GAIN_Frequency']) > filtering_frequency and ('DUP' in row['SV type'] or 'INS' in row['SV type']))
+                        and (survivor_merged or not(('Manta' in row['ID'] and 'IMPRECISE' in row['INFO']) or (row['QUAL'] != '.' and 'IMPRECISE' in row['INFO'])))):
                         file_out.writerow(row)
                         pass_sv_count += 1
                 print("total sv count:",total_sv_count)
@@ -75,11 +77,17 @@ inputs:
         inputBinding:
             position: 4
             prefix: "--ignore-pass-filter"
+    survivor_merged:
+        type: boolean
+        default: false
+        inputBinding:
+            position: 5
+            prefix: "--survivor-merged"
     output_tsv_name:
         type: string?
         default: "filtered-bcftools-merged-AnnotSV.tsv"
         inputBinding:
-            position: 5
+            position: 6
             prefix: "--output"
 
 outputs:

From 47c368aa9861c24c52b79482bff920997ce1ebd1 Mon Sep 17 00:00:00 2001
From: apaul7 <alex.paul@wustl.edu>
Date: Thu, 15 Jul 2021 09:34:55 -0500
Subject: [PATCH 05/35] update annotsv to version 2.3

version 2.3 requires the annotation directory to be passed as an input.
Also capture the unannotated event tsv as an output.
---
 definitions/pipelines/germline_wgs.cwl        |  5 +++
 definitions/subworkflows/merge_svs.cwl        | 25 +++++++++++----
 .../subworkflows/single_sample_sv_callers.cwl |  6 ++++
 definitions/tools/annotsv.cwl                 | 32 ++++++++++++-------
 4 files changed, 51 insertions(+), 17 deletions(-)

diff --git a/definitions/pipelines/germline_wgs.cwl b/definitions/pipelines/germline_wgs.cwl
index a6650249..a29bc142 100644
--- a/definitions/pipelines/germline_wgs.cwl
+++ b/definitions/pipelines/germline_wgs.cwl
@@ -152,6 +152,11 @@ inputs:
     disclaimer_text:
         type: string?
         default: 'Workflow source can be found at https://github.com/genome/analysis-workflows'
+    annotsv_annotations:
+        type:
+            - string
+            - Directory
+        doc: "directory/path of the annotsv annotations directory"
 outputs:
     cram:
         type: File
diff --git a/definitions/subworkflows/merge_svs.cwl b/definitions/subworkflows/merge_svs.cwl
index 464529e6..3e53b8ac 100644
--- a/definitions/subworkflows/merge_svs.cwl
+++ b/definitions/subworkflows/merge_svs.cwl
@@ -37,13 +37,21 @@ inputs:
         default: "0.05"
     filter_no_CDS:
         type: boolean?
+    annotsv_annotations:
+        type:
+            - string
+            - Directory
+        doc: "directory/path of the annotsv annotations directory"
 outputs:
     bcftools_merged_sv_vcf:
         type: File
         outputSource: filter_blocklist_bcftools/filtered_sv_vcf
     bcftools_merged_annotated_tsv:
         type: File
-        outputSource: bcftools_annotate_variants/sv_variants_tsv
+        outputSource: bcftools_annotate_variants/annotated_tsv
+    bcftools_merged_unannotated_tsv:
+        type: File
+        outputSource: bcftools_annotate_variants/unannotated_tsv
     bcftools_merged_filtered_annotated_tsv:
        type: File
        outputSource: bcftools_annotsv_filter/filtered_tsv
@@ -52,7 +60,10 @@ outputs:
         outputSource: filter_blocklist_survivor/filtered_sv_vcf
     survivor_merged_annotated_tsv:
         type: File
-        outputSource: survivor_annotate_variants/sv_variants_tsv
+        outputSource: survivor_annotate_variants/annotated_tsv
+    survivor_merged_unannotated_tsv:
+        type: File
+        outputSource: survivor_annotate_variants/unannotated_tsv
     survivor_merged_filtered_annotated_tsv:
         type: File
         outputSource: survivor_annotsv_filter/filtered_tsv
@@ -90,8 +101,9 @@ steps:
             snps_vcf:
                 source: [snps_vcf]
                 valueFrom: ${ return [ self ]; }
+            annotations: annotsv_annotations
         out:
-            [sv_variants_tsv]
+            [annotated_tsv, unannotated_tsv]
     survivor_annotsv_filter:
         run: ../tools/annotsv_filter.cwl
         in:
@@ -130,13 +142,14 @@ steps:
         in:
             genome_build: genome_build
             input_vcf: filter_blocklist_bcftools/filtered_sv_vcf
-            output_tsv_name:
-                default: "bcftools-merged-AnnotSV.tsv"
+            output_base:
+                default: "bcftools-merged-AnnotSV"
             snps_vcf:
                 source: [snps_vcf]
                 valueFrom: ${ return [ self ]; }
+            annotations: annotsv_annotations
         out:
-            [sv_variants_tsv]
+            [annotated_tsv, unannotated_tsv]
     bcftools_annotsv_filter:
         run: ../tools/annotsv_filter.cwl
         in:
diff --git a/definitions/subworkflows/single_sample_sv_callers.cwl b/definitions/subworkflows/single_sample_sv_callers.cwl
index 01449b20..2275c156 100644
--- a/definitions/subworkflows/single_sample_sv_callers.cwl
+++ b/definitions/subworkflows/single_sample_sv_callers.cwl
@@ -77,6 +77,11 @@ inputs:
         type: int?
     blocklist_bedpe:
         type: File?
+    annotsv_annotations:
+        type:
+            - string
+            - Directory
+        doc: "directory/path of the annotsv annotations directory"
 outputs:
     cn_diagram:
         type: File?
@@ -300,5 +305,6 @@ steps:
             sv_vcfs:
                 source: [run_cnvkit_filter/filtered_vcf, run_cnvnator_filter/filtered_vcf, run_manta_filter/filtered_vcf, run_smoove_filter/filtered_vcf]
                 linkMerge: merge_flattened
+            annotsv_annotations: annotsv_annotations
         out:
             [bcftools_merged_sv_vcf, bcftools_merged_annotated_tsv, bcftools_merged_filtered_annotated_tsv, survivor_merged_sv_vcf, survivor_merged_annotated_tsv]
diff --git a/definitions/tools/annotsv.cwl b/definitions/tools/annotsv.cwl
index 35c6e153..63f5f4b0 100644
--- a/definitions/tools/annotsv.cwl
+++ b/definitions/tools/annotsv.cwl
@@ -3,12 +3,12 @@
 cwlVersion: v1.0
 class: CommandLineTool
 
-arguments: ["/opt/AnnotSV_2.1/bin/AnnotSV", "-bedtools", "/usr/bin/bedtools", "-outputDir", "$(runtime.outdir)"]
+arguments: ["/opt/AnnotSV_2.3/bin/AnnotSV", "-bedtools", "/usr/bin/bedtools", "-outputDir", "$(runtime.outdir)",  "-outputFile", "$(inputs.output_base).tsv"]
 requirements:
     - class: ResourceRequirement
       ramMin: 8000
     - class: DockerRequirement
-      dockerPull: "mgibio/annotsv-cwl:2.1"
+      dockerPull: "mgibio/annotsv-cwl:2.3"
 
 inputs:
     genome_build:
@@ -16,29 +16,39 @@ inputs:
         inputBinding:
             position: 2
             prefix: "-genomeBuild"
+        doc: "genome build used, GRCh37(tool default), GRCh38, mm9, or mm10"
     input_vcf:
         type: File
         inputBinding:
             position: 3
             prefix: "-SVinputFile"
         doc: "vcf file to filter"
-    output_tsv_name:
+    output_base:
         type: string?
-        default: "AnnotSV.tsv"
+        default: "AnnotSV"
         inputBinding:
-            position: 4
-            prefix: "-outputFile"
-        doc: "output file name"
+        doc: "base for output file name"
     snps_vcf:
         type: File[]?
         inputBinding:
             position: 5
-            prefix: "-vcfFiles"
+            prefix: "-snvIndelFiles"
             itemSeparator: ","
         doc: "snps vcf(s) for adding hom/het snp counts found within svs"
-
+    annotations:
+        type:
+            - string
+            - Directory
+        inputBinding:
+            position: 6
+            prefix: "-annotationsDir"
+        doc: "directory/path of the annotsv annotations directory"
 outputs:
-    sv_variants_tsv:
+    annotated_tsv:
+        type: File
+        outputBinding:
+            glob: "$(inputs.output_base).tsv"
+    unannotated_tsv:
         type: File
         outputBinding:
-            glob: $(inputs.output_tsv_name)
+            glob: "$(inputs.output_base).unannotated.tsv"

From 70ab64ad67a0fd90ff9ae713495c8715432a9401 Mon Sep 17 00:00:00 2001
From: apaul7 <alex.paul@wustl.edu>
Date: Thu, 15 Jul 2021 09:38:16 -0500
Subject: [PATCH 06/35] s/SURVIVOR/survivor/ and s/CNVnator/cnvnator/

changing output names for consistency
---
 definitions/subworkflows/merge_svs.cwl | 8 ++++----
 definitions/tools/cnvnator.cwl         | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/definitions/subworkflows/merge_svs.cwl b/definitions/subworkflows/merge_svs.cwl
index 3e53b8ac..06f12044 100644
--- a/definitions/subworkflows/merge_svs.cwl
+++ b/definitions/subworkflows/merge_svs.cwl
@@ -79,7 +79,7 @@ steps:
             estimate_sv_distance: estimate_sv_distance
             minimum_sv_size: minimum_sv_size
             cohort_name:
-                default: "SURVIVOR-sv-merged.vcf"
+                default: "survivor-sv-merged.vcf"
         out:
             [merged_vcf]
     filter_blocklist_survivor:
@@ -88,7 +88,7 @@ steps:
             input_vcf: survivor_merge_sv_vcfs/merged_vcf
             blocklist_bedpe: blocklist_bedpe
             output_vcf_basename:
-                default: "SURVIVOR-sv-merged"
+                default: "survivor-sv-merged"
         out:
             [filtered_sv_vcf]
     survivor_annotate_variants:
@@ -96,8 +96,8 @@ steps:
         in:
             genome_build: genome_build
             input_vcf: filter_blocklist_survivor/filtered_sv_vcf
-            output_tsv_name:
-                default: "SURVIVOR-merged-AnnotSV.tsv"
+            output_base:
+                default: "survivor-merged-AnnotSV"
             snps_vcf:
                 source: [snps_vcf]
                 valueFrom: ${ return [ self ]; }
diff --git a/definitions/tools/cnvnator.cwl b/definitions/tools/cnvnator.cwl
index 0d2f6062..546ba4f2 100644
--- a/definitions/tools/cnvnator.cwl
+++ b/definitions/tools/cnvnator.cwl
@@ -45,10 +45,10 @@ requirements:
           # read depth signal partitioning
           cnvnator -root "$SAMPLE.root" -partition "$BIN_SIZE" -chrom $CHROMOSOMES
           # cnv calling
-          cnvnator -root "$SAMPLE.root" -call "$BIN_SIZE" -chrom $CHROMOSOMES > "$SAMPLE.CNVnator.cn"
+          cnvnator -root "$SAMPLE.root" -call "$BIN_SIZE" -chrom $CHROMOSOMES > "$SAMPLE.cnvnator.cn"
 
           # convert to vcf
-          cnvnator2VCF.pl -reference "$REFERENCE" "$SAMPLE.CNVnator.cn" FASTA_CHRS/ >  "$SAMPLE.CNVnator.vcf"
+          cnvnator2VCF.pl -reference "$REFERENCE" "$SAMPLE.cnvnator.cn" FASTA_CHRS/ >  "$SAMPLE.cnvnator.vcf"
           exit 0
 inputs:
     bam:
@@ -87,7 +87,7 @@ outputs:
     vcf:
         type: File
         outputBinding:
-            glob: "$(inputs.sample_name).CNVnator.vcf"
+            glob: "$(inputs.sample_name).cnvnator.vcf"
     root_file:
         type: File
         outputBinding:
@@ -95,4 +95,4 @@ outputs:
     cn_file:
         type: File
         outputBinding:
-            glob: "$(inputs.sample_name).CNVnator.cn"
+            glob: "$(inputs.sample_name).cnvnator.cn"

From 43b7c2c9b6bf9a2e1c0e65114495a7e60ed128ae Mon Sep 17 00:00:00 2001
From: apaul7 <alex.paul@wustl.edu>
Date: Thu, 15 Jul 2021 09:39:51 -0500
Subject: [PATCH 07/35] outputbinding change s/merged_sv_vcf/merged_vcf/

---
 definitions/subworkflows/merge_svs.cwl | 4 ++--
 definitions/tools/bcftools_merge.cwl   | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/definitions/subworkflows/merge_svs.cwl b/definitions/subworkflows/merge_svs.cwl
index 06f12044..97a78bd2 100644
--- a/definitions/subworkflows/merge_svs.cwl
+++ b/definitions/subworkflows/merge_svs.cwl
@@ -127,11 +127,11 @@ steps:
                 default: "bcftools-sv-merged.vcf"
             vcfs: sv_vcfs
         out:
-            [merged_sv_vcf]
+            [merged_vcf]
     filter_blocklist_bcftools:
         run: ../tools/filter_sv_vcf_blocklist_bedpe.cwl
         in:
-            input_vcf: bcftools_merge_sv_vcfs/merged_sv_vcf
+            input_vcf: bcftools_merge_sv_vcfs/merged_vcf
             blocklist_bedpe: blocklist_bedpe
             output_vcf_basename:
                 default: "bcftools-sv-merged"
diff --git a/definitions/tools/bcftools_merge.cwl b/definitions/tools/bcftools_merge.cwl
index 4c45df4b..57daeaec 100644
--- a/definitions/tools/bcftools_merge.cwl
+++ b/definitions/tools/bcftools_merge.cwl
@@ -58,7 +58,7 @@ inputs:
         doc: "input bgzipped tabix indexed vcfs to merge"
 
 outputs:
-    merged_sv_vcf:
+    merged_vcf:
         type: File
         outputBinding:
             glob: $(inputs.output_vcf_name)

From 0622bd92cb9191ce4c808854e7ec2739268c19e9 Mon Sep 17 00:00:00 2001
From: apaul7 <alex.paul@wustl.edu>
Date: Thu, 15 Jul 2021 09:40:51 -0500
Subject: [PATCH 08/35] stage secondary files in gather_to_sub_directory

added javascript to pass in any secondary files when staging output
files.
added --recursive to copy everything
added --preserve to keep timestamps(cromwell does not stage files for
this to matter...)
added --no-clobber to error out if files are overwritten
added optional directory input for staging files and a single directory.
---
 definitions/tools/gather_to_sub_directory.cwl | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/definitions/tools/gather_to_sub_directory.cwl b/definitions/tools/gather_to_sub_directory.cwl
index cffb6a9f..1980cb47 100644
--- a/definitions/tools/gather_to_sub_directory.cwl
+++ b/definitions/tools/gather_to_sub_directory.cwl
@@ -19,7 +19,7 @@ requirements:
             files="${@:2}"
             mkdir $outdir
             chmod -R 777 $outdir
-            cp -t $outdir $files
+            cp --recursive --preserve --no-clobber --target-directory $outdir $files
 
             exit 0
 
@@ -32,6 +32,23 @@ inputs:
         type: File[]
         inputBinding:
             position: 2
+            valueFrom: |
+              ${
+                var results = []
+                for(var i=0; i<self.length; i++){
+                  results.push(self[i])
+                  if(self[i].hasOwnProperty('secondaryFiles')){
+                    for(var j=0; j<self[i].secondaryFiles.length; j++){
+                      results.push(self[i].secondaryFiles[j])
+                    }
+                  }
+                }
+                return results
+              }
+    directory:
+         type: Directory?
+         inputBinding:
+            position: 3
 outputs:
     gathered_directory:
         type: Directory

From 68f43a30dacecb74cbac9abae93ad289cf04d80f Mon Sep 17 00:00:00 2001
From: apaul7 <alex.paul@wustl.edu>
Date: Thu, 15 Jul 2021 09:46:11 -0500
Subject: [PATCH 09/35] added min confidence input to genotype_gvcf step

---
 definitions/subworkflows/joint_genotype.cwl | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/definitions/subworkflows/joint_genotype.cwl b/definitions/subworkflows/joint_genotype.cwl
index 3de95bd4..d3e35d84 100644
--- a/definitions/subworkflows/joint_genotype.cwl
+++ b/definitions/subworkflows/joint_genotype.cwl
@@ -64,9 +64,11 @@ inputs:
     final_tsv_prefix:
         type: string?
         default: 'variants'
-    filter_gnomAD_maximum_population_allele_frequency:
+    gnomad_max_pop_af:
         type: float
         default: 0.05
+    min_conf_call:
+        type: float?
 outputs:
     raw_vcf:
         type: File
@@ -106,6 +108,7 @@ steps:
                 source: [combine_gvcfs/gvcf]
                 linkMerge: merge_flattened
             intervals: intervals
+            min_conf_call: min_conf_call
         out:
             [genotype_vcf]
     merge_vcfs:
@@ -140,7 +143,7 @@ steps:
         run: germline_filter_vcf.cwl
         in:
             annotated_vcf: annotate_variants/annotated_vcf
-            filter_gnomAD_maximum_population_allele_frequency: filter_gnomAD_maximum_population_allele_frequency
+            filter_gnomAD_maximum_population_allele_frequency: gnomad_max_pop_af
             gnomad_field_name:
                source: vep_custom_annotations
                valueFrom: |

From 498236bc09c4073b035908a960fffe585dfc1666 Mon Sep 17 00:00:00 2001
From: apaul7 <alex.paul@wustl.edu>
Date: Thu, 15 Jul 2021 09:47:57 -0500
Subject: [PATCH 10/35] add annotated vcf as output

---
 definitions/subworkflows/joint_genotype.cwl | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/definitions/subworkflows/joint_genotype.cwl b/definitions/subworkflows/joint_genotype.cwl
index d3e35d84..2a15cc94 100644
--- a/definitions/subworkflows/joint_genotype.cwl
+++ b/definitions/subworkflows/joint_genotype.cwl
@@ -74,6 +74,10 @@ outputs:
         type: File
         outputSource: merge_vcfs/merged_vcf
         secondaryFiles: [.tbi]
+    annotated_vcf:
+        type: File
+        outputSource: bgzip_index_annotated_vcf/indexed_vcf
+        secondaryFiles: [.tbi]
     final_vcf:
         type: File
         outputSource: filter_vcf/final_vcf

From 3b4329e68580ea0ec9f05c00a68f7fcb6bf2ba74 Mon Sep 17 00:00:00 2001
From: apaul7 <alex.paul@wustl.edu>
Date: Thu, 15 Jul 2021 09:48:44 -0500
Subject: [PATCH 11/35] add decompose and normalize step to joint genotype

---
 definitions/subworkflows/joint_genotype.cwl | 30 ++++++++++++++++++---
 1 file changed, 27 insertions(+), 3 deletions(-)

diff --git a/definitions/subworkflows/joint_genotype.cwl b/definitions/subworkflows/joint_genotype.cwl
index 2a15cc94..5cc2faca 100644
--- a/definitions/subworkflows/joint_genotype.cwl
+++ b/definitions/subworkflows/joint_genotype.cwl
@@ -72,7 +72,7 @@ inputs:
 outputs:
     raw_vcf:
         type: File
-        outputSource: merge_vcfs/merged_vcf
+        outputSource: normalize_index/indexed_vcf
         secondaryFiles: [.tbi]
     annotated_vcf:
         type: File
@@ -121,11 +121,35 @@ steps:
             vcfs: genotype_gvcf/genotype_vcf
         out:
             [merged_vcf]
-
+    decompose:
+        run: ../tools/vt_decompose.cwl
+        in:
+            vcf: merge_vcfs/merged_vcf
+        out:
+            [decomposed_vcf]
+    decompose_index:
+        run: ../tools/index_vcf.cwl
+        in:
+            vcf: decompose/decomposed_vcf
+        out:
+            [indexed_vcf]
+    normalize:
+        run: ../tools/vt_normalize.cwl
+        in:
+            vcf: decompose_index/indexed_vcf
+            reference: reference
+        out:
+            [normalized_vcf]
+    normalize_index:
+        run: ../tools/index_vcf.cwl
+        in:
+            vcf: normalize/normalized_vcf
+        out:
+            [indexed_vcf]
     annotate_variants:
         run: ../tools/vep.cwl
         in:
-            vcf: merge_vcfs/merged_vcf
+            vcf: normalize_index/indexed_vcf
             cache_dir: vep_cache_dir
             ensembl_assembly: vep_ensembl_assembly
             ensembl_version: vep_ensembl_version

From da8d329d14132fb7b070e96bb74051070122fcce Mon Sep 17 00:00:00 2001
From: apaul7 <alex.paul@wustl.edu>
Date: Thu, 15 Jul 2021 09:59:37 -0500
Subject: [PATCH 12/35] add gatk soft filtering

gatk soft filtering using hard filtering parameters. Based on
https://gatk.broadinstitute.org/hc/en-us/articles/360035531112--How-to-Filter-variants-either-with-VQSR-or-by-hard-filtering#2
---
 definitions/subworkflows/gatk_soft_filter.cwl | 84 +++++++++++++++++++
 definitions/subworkflows/joint_genotype.cwl   |  9 +-
 definitions/tools/variant_filtration.cwl      | 55 ++++++++++++
 3 files changed, 147 insertions(+), 1 deletion(-)
 create mode 100644 definitions/subworkflows/gatk_soft_filter.cwl
 create mode 100644 definitions/tools/variant_filtration.cwl

diff --git a/definitions/subworkflows/gatk_soft_filter.cwl b/definitions/subworkflows/gatk_soft_filter.cwl
new file mode 100644
index 00000000..61780268
--- /dev/null
+++ b/definitions/subworkflows/gatk_soft_filter.cwl
@@ -0,0 +1,84 @@
+#!/usr/bin/env cwl-runner
+
+cwlVersion: v1.0
+class: Workflow
+label: "apply soft filtering to a gatk called vcf using hard filter paramaters"
+requirements:
+    - class: SubworkflowFeatureRequirement
+    - class: StepInputExpressionRequirement
+    - class: MultipleInputFeatureRequirement
+inputs:
+    reference:
+        type:
+            - string
+            - File
+        secondaryFiles: [.fai, ^.dict]
+    vcf:
+        type: File
+        secondaryFiles: [.tbi]
+outputs:
+    filtered_vcf:
+        type: File
+        secondaryFiles: [.tbi]
+        outputSource: index_merged/indexed_vcf
+steps:
+    split_snps:
+        run: ../tools/select_variants.cwl
+        in:
+            reference: reference
+            vcf: vcf
+            output_vcf_basename:
+                default: "SNPS"
+            select_type:
+                default: "SNP"
+        out:
+            [filtered_vcf]
+    split_indels:
+        run: ../tools/select_variants.cwl
+        in:
+            reference: reference
+            vcf: vcf
+            output_vcf_basename:
+                default: "INDELS"
+            select_type:
+                default: "INDEL"
+        out:
+            [filtered_vcf]
+    filter_snps:
+        run: ../tools/variant_filtration.cwl
+        in:
+            reference: reference
+            vcf: split_snps/filtered_vcf
+            filters:
+                default: ["QD<2.0;QD2", "QUAL<30.0;QUAL30", "SOR>3.0;SOR3", "FS>60.0;FS60", "MQ<40.0;MQ40", "MQRankSum<-12.5;MQRankSum-12.5", "ReadPosRankSum<-8.0;ReadPosRankSum-8"]
+            output_vcf_basename:
+                default: "SNPS.filtered"
+        out:
+            [filtered_vcf]
+    filter_indels:
+        run: ../tools/variant_filtration.cwl
+        in:
+            reference: reference
+            vcf: split_indels/filtered_vcf
+            filters:
+                default: ["QD<2.0;QD2", "QUAL<30.0;QUAL30", "FS>200.0;FS200", "ReadPosRankSum<-20.0;ReadPosRankSum-20"]
+            output_vcf_basename:
+                default: "INDELS.filtered"
+        out:
+            [filtered_vcf]
+    merge:
+        run: ../tools/merge_vcf.cwl
+        in:
+            merged_vcf_basename:
+                default: "soft_filtered"
+            vcfs:
+                source: [filter_snps/filtered_vcf, filter_indels/filtered_vcf]
+                linkMerge: merge_flattened
+        out:
+            [merged_vcf]
+    index_merged:
+        run: ../tools/index_vcf.cwl
+        in:
+            vcf: merge/merged_vcf
+        out:
+            [indexed_vcf]
diff --git a/definitions/subworkflows/joint_genotype.cwl b/definitions/subworkflows/joint_genotype.cwl
index 5cc2faca..76c6aa4c 100644
--- a/definitions/subworkflows/joint_genotype.cwl
+++ b/definitions/subworkflows/joint_genotype.cwl
@@ -167,10 +167,17 @@ steps:
             vcf: annotate_variants/annotated_vcf
         out:
             [indexed_vcf]
+    soft_filter:
+        run: gatk_soft_filter.cwl
+        in:
+            reference: reference
+            vcf: bgzip_index_annotated_vcf/indexed_vcf
+        out:
+            [filtered_vcf]
     filter_vcf:
         run: germline_filter_vcf.cwl
         in:
-            annotated_vcf: annotate_variants/annotated_vcf
+            annotated_vcf: soft_filter/filtered_vcf
             filter_gnomAD_maximum_population_allele_frequency: gnomad_max_pop_af
             gnomad_field_name:
                source: vep_custom_annotations
diff --git a/definitions/tools/variant_filtration.cwl b/definitions/tools/variant_filtration.cwl
new file mode 100644
index 00000000..1901bf13
--- /dev/null
+++ b/definitions/tools/variant_filtration.cwl
@@ -0,0 +1,55 @@
+#!/usr/bin/env cwl-runner
+
+cwlVersion: v1.0
+class: CommandLineTool
+label: "VariantFiltration (GATK 4.1.8.1)"
+baseCommand: ["/gatk/gatk", "--java-options", "-Xmx4g", "VariantFiltration"]
+requirements:
+    - class: ResourceRequirement
+      ramMin: 6000
+      tmpdirMin: 25000
+    - class: DockerRequirement
+      dockerPull: "broadinstitute/gatk:4.1.8.1"
+arguments:
+    ["-O", { valueFrom: $(runtime.outdir)/$(inputs.output_vcf_basename).vcf.gz }]
+inputs:
+    reference:
+        type:
+            - string
+            - File
+        secondaryFiles: [.fai, ^.dict]
+        inputBinding:
+            prefix: "-R"
+            position: 1
+    vcf:
+        type: File
+        inputBinding:
+            prefix: "--variant"
+            position: 2
+        secondaryFiles: [.tbi]
+    filters:
+        type: string[]
+        inputBinding:
+            position: 3
+            valueFrom: |
+              ${
+                var results = []
+                for(var i=0; i<self.length; i++){
+                  var [filter, name] = self[i].split(";")
+                  results.push("-filter")
+                  results.push(filter)
+                  results.push("--filter-name")
+                  results.push(name)
+                }
+                return results
+              }
+        doc: "input array of strings with filter expression and filter name, split by a ';', Examples: 'QD<2.0;QD2', 'QUAL<30.0;QUAL30', 'SOR>3.0;SOR3'"
+    output_vcf_basename:
+        type: string?
+        default: select_variants
+outputs:
+    filtered_vcf:
+        type: File
+        secondaryFiles: [.tbi]
+        outputBinding:
+            glob: $(inputs.output_vcf_basename).vcf.gz

From 4ccb8d3d58885842415de3bb09c7ac8f67a2e015 Mon Sep 17 00:00:00 2001
From: apaul7 <alex.paul@wustl.edu>
Date: Thu, 15 Jul 2021 10:02:48 -0500
Subject: [PATCH 13/35] add new normalize tool

This uses VT to normalize a VCF. This is an alternative to GATK4
LeftAlignTrimVariants
---
 definitions/tools/vt_normalize.cwl | 32 ++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)
 create mode 100644 definitions/tools/vt_normalize.cwl

diff --git a/definitions/tools/vt_normalize.cwl b/definitions/tools/vt_normalize.cwl
new file mode 100644
index 00000000..0e19086b
--- /dev/null
+++ b/definitions/tools/vt_normalize.cwl
@@ -0,0 +1,32 @@
+#!/usr/bin/env cwl-runner
+
+cwlVersion: v1.0
+class: CommandLineTool
+label: "run vt normalize"
+baseCommand: ["vt", "normalize"]
+requirements:
+    - class: DockerRequirement
+      dockerPull: quay.io/biocontainers/vt:0.57721--hf74b74d_1
+    - class: ResourceRequirement
+      ramMin: 4000
+arguments:
+    ["-o", { valueFrom: $(runtime.outdir)/normalized.vcf.gz }]
+inputs:
+    vcf:
+        type: File
+        inputBinding:
+            position: 1
+        secondaryFiles: [".tbi"]
+    reference:
+        type:
+            - string
+            - File
+        secondaryFiles: [".fai"]
+        inputBinding:
+            prefix: "-r"
+            position: 2
+outputs:
+    normalized_vcf:
+        type: File
+        outputBinding:
+            glob: "normalized.vcf.gz"

From 8a983732300069c61421762037b36046a2cff900 Mon Sep 17 00:00:00 2001
From: apaul7 <alex.paul@wustl.edu>
Date: Thu, 15 Jul 2021 10:05:46 -0500
Subject: [PATCH 14/35] add gather to subdirectory tool for directories

---
 .../tools/gather_to_sub_directory_dirs.cwl    | 40 +++++++++++++++++++
 1 file changed, 40 insertions(+)
 create mode 100644 definitions/tools/gather_to_sub_directory_dirs.cwl

diff --git a/definitions/tools/gather_to_sub_directory_dirs.cwl b/definitions/tools/gather_to_sub_directory_dirs.cwl
new file mode 100644
index 00000000..a83b45ad
--- /dev/null
+++ b/definitions/tools/gather_to_sub_directory_dirs.cwl
@@ -0,0 +1,40 @@
+#! /usr/bin/env cwl-runner
+
+cwlVersion: v1.0
+class: CommandLineTool
+baseCommand: ["/bin/bash","directory_gatherer.sh"]
+
+requirements:
+    - class: DockerRequirement
+      dockerPull: "ubuntu:xenial"
+    - class: ResourceRequirement
+      ramMin: 1000
+    - class: InitialWorkDirRequirement
+      listing:
+      - entryname: 'directory_gatherer.sh'
+        entry: |
+            set -eou pipefail
+
+            outdir="$1"
+            files="${@:2}"
+            mkdir $outdir
+            chmod -R 777 $outdir
+            cp --recursive --preserve --no-clobber --target-directory $outdir $files
+
+            exit 0
+
+inputs:
+    outdir:
+        type: string
+        inputBinding:
+            position: 1
+    directories:
+         type: Directory[]
+         inputBinding:
+            position: 2
+outputs:
+    gathered_directory:
+        type: Directory
+        outputBinding:
+            glob: "$(inputs.outdir)"
+

From fb99760251557057ab74580ecc35d9d333d1d409 Mon Sep 17 00:00:00 2001
From: apaul7 <alex.paul@wustl.edu>
Date: Thu, 15 Jul 2021 10:07:24 -0500
Subject: [PATCH 15/35] add bcftools view tool

This allows vcfs to be split by samples
---
 definitions/tools/bcftools_view.cwl | 53 +++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)
 create mode 100644 definitions/tools/bcftools_view.cwl

diff --git a/definitions/tools/bcftools_view.cwl b/definitions/tools/bcftools_view.cwl
new file mode 100644
index 00000000..e43b67e4
--- /dev/null
+++ b/definitions/tools/bcftools_view.cwl
@@ -0,0 +1,53 @@
+#!/usr/bin/env cwl-runner
+
+cwlVersion: v1.0
+class: CommandLineTool
+
+baseCommand: ["/opt/bcftools/bin/bcftools", "view"]
+
+requirements:
+    - class: ResourceRequirement
+      ramMin: 4000
+    - class: DockerRequirement
+      dockerPull: "mgibio/bcftools-cwl:1.12"
+
+inputs:
+    sample_name:
+        type: string?
+        inputBinding:
+            position: 1
+            prefix: "--samples"
+        doc: "comma separated list of samples to include (or exclude with '^' prefix)"
+    output_type:
+        type:
+            type: enum
+            symbols: ["b", "u", "z", "v"]
+        default: "z"
+        inputBinding:
+            position: 4
+            prefix: "--output-type"
+        doc: "output file format"
+    output_vcf_name:
+        type: string?
+        default: "bcftools_split.vcf.gz"
+        inputBinding:
+            position: 5
+            prefix: "--output-file"
+        doc: "output vcf file name"
+    variant_type:
+        type: string?
+        inputBinding:
+             position: 6
+             prefix: "--types"
+        doc: "select comma-separated list of variant types: snps,indels,mnps,ref,bnd,other"
+    in_vcf:
+        type: File
+        inputBinding:
+            position: 7
+        doc: "input bgzipped tabix indexed vcf to view"
+
+outputs:
+    vcf:
+        type: File
+        outputBinding:
+            glob: $(inputs.output_vcf_name)

From f8947469866bfc25b7df8da2b24b8eb4b7ae6a15 Mon Sep 17 00:00:00 2001
From: apaul7 <alex.paul@wustl.edu>
Date: Thu, 15 Jul 2021 10:08:23 -0500
Subject: [PATCH 16/35] add manta_germline tool

This allows manta to be ran with multiple samples in joint calling fashion
---
 definitions/tools/manta_germline.cwl | 78 ++++++++++++++++++++++++++++
 1 file changed, 78 insertions(+)
 create mode 100644 definitions/tools/manta_germline.cwl

diff --git a/definitions/tools/manta_germline.cwl b/definitions/tools/manta_germline.cwl
new file mode 100644
index 00000000..3c446ce8
--- /dev/null
+++ b/definitions/tools/manta_germline.cwl
@@ -0,0 +1,78 @@
+#!/usr/bin/env cwl-runner
+
+cwlVersion: v1.0
+class: CommandLineTool
+label: "Set up and execute manta over multiple samples"
+
+requirements:
+    - class: DockerRequirement
+      dockerPull: mgibio/manta_somatic-cwl:1.6.0
+    - class: InlineJavascriptRequirement
+    - class: ShellCommandRequirement
+    - class: ResourceRequirement
+      coresMin: 12
+      ramMin: 24000
+      tmpdirMin: 10000
+baseCommand: ["/usr/bin/python", "/usr/bin/manta/bin/configManta.py"]
+arguments: [
+    { position: -1, valueFrom: $(runtime.outdir), prefix: "--runDir" },
+    { shellQuote: false, valueFrom: "&&" },
+    "/usr/bin/python", "runWorkflow.py", "-m", "local",
+    { position: 1, valueFrom: $(runtime.cores), prefix: "-j" }
+]
+inputs:
+    bams:
+        type:
+           type: array
+           items: File
+           inputBinding:
+               prefix: "--bam"
+        inputBinding:
+            position: -2
+    reference:
+        type:
+            - string
+            - File
+        secondaryFiles: [.fai, ^.dict]
+        inputBinding:
+            position: -4
+            prefix: "--referenceFasta"
+    call_regions:
+        type: File?
+        inputBinding:
+            position: -5
+            prefix: "--callRegions"
+        secondaryFiles: [.tbi]
+        doc: bgzip-compressed, tabix-indexed BED file specifiying regions to which variant analysis will be restricted
+    non_wgs:
+        type: boolean?
+        inputBinding:
+            position: -6
+            prefix: "--exome"
+        doc: toggles on settings for WES
+    output_contigs:
+        type: boolean?
+        inputBinding:
+            position: -7
+            prefix: "--outputContig"
+        doc: if true, outputs assembled contig sequences in final VCF files, in the INFO field CONTIG
+outputs:
+    diploid_variants:
+        type: File
+        outputBinding:
+            glob: results/variants/diploidSV.vcf.gz
+        secondaryFiles: [.tbi]
+    all_candidates:
+        type: File
+        outputBinding:
+            glob: results/variants/candidateSV.vcf.gz
+        secondaryFiles: [.tbi]
+    small_candidates:
+        type: File
+        outputBinding:
+            glob: results/variants/candidateSmallIndels.vcf.gz
+        secondaryFiles: [.tbi]
+    stats:
+        type: Directory
+        outputBinding:
+            glob: results/stats/

From 1c526f8e734e0f13fac77a6199e3915d5c5faa56 Mon Sep 17 00:00:00 2001
From: apaul7 <alex.paul@wustl.edu>
Date: Thu, 15 Jul 2021 10:11:24 -0500
Subject: [PATCH 17/35] add joint cnvnator subworkflow

This runs cnvnator in single sample mode over multiple samples. The sample
rename step is required as the sample name in the output vcf can change
from the input. examples:
  input name -> output name
  sample.1 -> sample
  sample.1.2 -> sample
  sample_1 -> sample_1
also stages output vcf name to follow $SAMPLE.cnvnator.vcf.gz format
---
 definitions/subworkflows/joint_cnvnator.cwl | 79 +++++++++++++++++++++
 1 file changed, 79 insertions(+)
 create mode 100644 definitions/subworkflows/joint_cnvnator.cwl

diff --git a/definitions/subworkflows/joint_cnvnator.cwl b/definitions/subworkflows/joint_cnvnator.cwl
new file mode 100644
index 00000000..377caca1
--- /dev/null
+++ b/definitions/subworkflows/joint_cnvnator.cwl
@@ -0,0 +1,79 @@
+#!/usr/bin/env cwl-runner
+
+cwlVersion: v1.0
+class: Workflow
+label: "run cnvnator for multiple samples"
+requirements:
+    - class: SubworkflowFeatureRequirement
+    - class: ScatterFeatureRequirement
+    - class: StepInputExpressionRequirement
+inputs:
+    reference:
+        type:
+            - string
+            - File
+        secondaryFiles: [.fai, ^.dict]
+    sample_names:
+        type: string[]
+    bams:
+        type: File[]
+        secondaryFiles: [^.bai]
+    bin_size:
+        type: int?
+outputs:
+    vcfs:
+        type: File[]
+        outputSource: index_cnvnator/indexed_vcf
+        secondaryFiles: [.tbi]
+    root_files:
+        type: File[]
+        outputSource: cnvnator/root_file
+    cn_files:
+        type: File[]
+        outputSource: cnvnator/cn_file
+steps:
+    cnvnator:
+        scatter: [bam, sample_name]
+        scatterMethod: dotproduct
+        run: ../tools/cnvnator.cwl
+        in:
+            bam: bams
+            reference: reference
+            sample_name: sample_names
+            bin_size: bin_size
+        out:
+            [vcf, root_file, cn_file]
+    bgzip_index:
+        scatter: [vcf]
+        run: bgzip_and_index.cwl
+        in:
+            vcf: cnvnator/vcf
+        out:
+            [indexed_vcf]
+    sample_rename:
+        scatter: [input_vcf, new_sample_name]
+        scatterMethod: dotproduct
+        run: ../tools/replace_vcf_sample_name.cwl
+        in:
+            input_vcf: bgzip_index/indexed_vcf
+            new_sample_name: sample_names
+            sample_to_replace:
+                valueFrom: '${
+                    var old_name = inputs.new_sample_name.split(".")[0];
+                    return old_name;
+                }'
+            output_name:
+                valueFrom: '${
+                    var sample = inputs.new_sample_name;
+                    var name = sample + ".cnvnator.vcf.gz";
+                    return name;
+                }'
+        out:
+            [renamed_vcf]
+    index_cnvnator:
+        scatter: [vcf]
+        run: ../tools/index_vcf.cwl
+        in:
+            vcf: sample_rename/renamed_vcf
+        out:
+            [indexed_vcf]

From e6e621af695fbeac0ff6f1c57f5cce9a9ef120d3 Mon Sep 17 00:00:00 2001
From: apaul7 <alex.paul@wustl.edu>
Date: Thu, 15 Jul 2021 10:15:19 -0500
Subject: [PATCH 18/35] add joint cnvkit subworkflow

This runs cnvkit in single sample mode for multiple samples.
The sample rename step is required as output sample name in the vcf is
based on the input filename. Currently that is hardcoded to be
`adjusted.tumor`
also stage output file name to follow $SAMPLE.cnvkit.vcf.gz format
---
 definitions/subworkflows/joint_cnvkit.cwl | 92 +++++++++++++++++++++++
 1 file changed, 92 insertions(+)
 create mode 100644 definitions/subworkflows/joint_cnvkit.cwl

diff --git a/definitions/subworkflows/joint_cnvkit.cwl b/definitions/subworkflows/joint_cnvkit.cwl
new file mode 100644
index 00000000..de8ff4f1
--- /dev/null
+++ b/definitions/subworkflows/joint_cnvkit.cwl
@@ -0,0 +1,92 @@
+#!/usr/bin/env cwl-runner
+
+cwlVersion: v1.0
+class: Workflow
+label: "jointly run cnvkit for sv calls"
+requirements:
+    - class: SubworkflowFeatureRequirement
+    - class: StepInputExpressionRequirement
+    - class: InlineJavascriptRequirement
+    - class: ScatterFeatureRequirement
+inputs:
+    sample_names:
+        type: string[]
+    bams:
+        type: File[]
+        secondaryFiles: [^.bai]
+    reference_fasta:
+        type:
+            - string
+            - File
+        secondaryFiles: [.fai]
+    reference_cnn:
+        type: File?
+        doc: "can be a flat reference or reference based on a panel of normals"
+    method:
+        type:
+          - "null"
+          - type: enum
+            symbols: ["hybrid", "amplicon", "wgs"]
+    segment_filter:
+        type:
+          - "null"
+          - type: enum
+            symbols: ["ampdel", "ci", "cn", "sem"]
+outputs:
+    vcfs:
+        type: File[]
+        outputSource: index_cnvkit/indexed_vcf
+        secondaryFiles: [.tbi]
+    cnr:
+        type: File[]
+        outputSource: cnvkit/tumor_bin_level_ratios
+    cns:
+        type: File[]
+        outputSource: cnvkit/tumor_segmented_ratios
+steps:
+    cnvkit:
+        scatter: [tumor_bam, cnvkit_vcf_name]
+        scatterMethod: dotproduct
+        run: cnvkit_single_sample.cwl
+        in:
+            method: method
+            reference_cnn: reference_cnn
+            tumor_bam: bams
+            cnvkit_vcf_name:
+                source: [sample_names]
+                valueFrom: "$(self).cnvkit.vcf"
+            segment_filter: segment_filter
+            fasta_reference: reference_fasta
+        out:
+            [tumor_bin_level_ratios, tumor_segmented_ratios, cnvkit_vcf]
+    bgzip_and_index:
+        scatter: [vcf]
+        run: bgzip_and_index.cwl
+        in:
+            vcf: cnvkit/cnvkit_vcf
+        out:
+            [indexed_vcf]
+    sample_rename:
+        scatter: [input_vcf, new_sample_name]
+        scatterMethod: dotproduct
+        run: ../tools/replace_vcf_sample_name.cwl
+        in:
+            input_vcf: bgzip_and_index/indexed_vcf
+            new_sample_name: sample_names
+            sample_to_replace:
+                valueFrom: 'adjusted.tumor'
+            output_name:
+                valueFrom: '${
+                    var sample = inputs.new_sample_name;
+                    var name = sample + ".cnvkit.vcf.gz";
+                    return name;
+                }'
+        out:
+            [renamed_vcf]
+    index_cnvkit:
+        scatter: [vcf]
+        run: ../tools/index_vcf.cwl
+        in:
+            vcf: sample_rename/renamed_vcf
+        out:
+            [indexed_vcf]

From 66ac5892328476ca20437423da3854d35ac0dc72 Mon Sep 17 00:00:00 2001
From: apaul7 <alex.paul@wustl.edu>
Date: Thu, 15 Jul 2021 10:18:32 -0500
Subject: [PATCH 19/35] add joint sv read caller filtering

This subworkflow runs sv filtering for manta/smoove calls. Final sample
names follow the $SAMPLE-$CALLER format. This allows easy tracking for
the source of calls in output merged vcfs.
---
 .../sv_joint_read_caller_filter.cwl           | 157 ++++++++++++++++++
 1 file changed, 157 insertions(+)
 create mode 100644 definitions/subworkflows/sv_joint_read_caller_filter.cwl

diff --git a/definitions/subworkflows/sv_joint_read_caller_filter.cwl b/definitions/subworkflows/sv_joint_read_caller_filter.cwl
new file mode 100644
index 00000000..21b71515
--- /dev/null
+++ b/definitions/subworkflows/sv_joint_read_caller_filter.cwl
@@ -0,0 +1,157 @@
+#!/usr/bin/env cwl-runner
+
+cwlVersion: v1.0
+class: Workflow
+label: "filter jointly called vcfs from read based callers"
+requirements:
+    - class: SubworkflowFeatureRequirement
+    - class: StepInputExpressionRequirement
+    - class: InlineJavascriptRequirement
+    - class: ScatterFeatureRequirement
+inputs:
+    reference:
+        type:
+            - string
+            - File
+        secondaryFiles: [.fai, ^.dict]
+    sample_names:
+        type: string[]
+    bams:
+        type: File[]
+        secondaryFiles: [^.bai]
+    filter_del_depth:
+        type: double?
+    filter_dup_depth:
+        type: double?
+    filter_paired_count:
+        type: int?
+    filter_split_count:
+        type: int?
+    filter_alt_abundance_percentage:
+        type: double?
+    sv_vcf:
+        type: File
+        secondaryFiles: [.tbi]
+    vcf_source:
+        type:
+          - type: enum
+            symbols: ["manta", "smoove"]
+outputs:
+    vcfs:
+        type: File[]
+        outputSource: final_index/indexed_vcf
+        secondaryFiles: [.tbi]
+steps:
+    read_support_filter:
+        run: ../tools/filter_sv_vcf_read_support.cwl
+        in:
+            abundance_percentage: filter_alt_abundance_percentage
+            input_vcf: sv_vcf
+            paired_count: filter_paired_count
+            split_count: filter_split_count
+            vcf_source: vcf_source
+        out:
+            [filtered_sv_vcf]
+    bgzip_index:
+        run: bgzip_and_index.cwl
+        in:
+            vcf: read_support_filter/filtered_sv_vcf
+        out:
+            [indexed_vcf]
+    split_vcf:
+        scatter: [sample_name]
+        run: ../tools/bcftools_view.cwl
+        in:
+            sample_name: sample_names
+            in_vcf: bgzip_index/indexed_vcf
+        out:
+            [vcf]
+    duphold:
+        scatter: [bam, sv_vcf]
+        scatterMethod: dotproduct
+        run: ../tools/duphold.cwl
+        in:
+            bam: bams
+            reference: reference
+            sv_vcf: split_vcf/vcf
+        out:
+            [annotated_sv_vcf]
+    bgzip_index_duphold:
+        scatter: [vcf]
+        scatterMethod: dotproduct
+        run: bgzip_and_index.cwl
+        in:
+            vcf: duphold/annotated_sv_vcf
+        out:
+            [indexed_vcf]
+    merge_vcfs:
+        run: ../tools/bcftools_merge.cwl
+        in:
+            vcfs: bgzip_index_duphold/indexed_vcf
+        out:
+            [merged_vcf]
+    depth_filter:
+        run: ../tools/filter_sv_vcf_depth.cwl
+        in:
+            input_vcf: merge_vcfs/merged_vcf
+            deletion_depth: filter_del_depth
+            duplication_depth: filter_dup_depth
+            vcf_source:
+                default: "duphold"
+        out:
+            [filtered_sv_vcf]
+    final_split_vcf:
+        scatter: [sample_name, output_vcf_name]
+        scatterMethod: dotproduct
+        run: ../tools/bcftools_view.cwl
+        in:
+            sample_name: sample_names
+            in_vcf: depth_filter/filtered_sv_vcf
+            vcf_source: vcf_source
+            output_vcf_name:
+                source: [sample_names]
+                valueFrom: |
+                    ${
+                      var sample = self;
+                      var caller = inputs.vcf_source;
+                      var result = sample + "-" + caller + ".vcf.gz";
+                      return result;
+                    }
+        out:
+            [vcf]
+    rename:
+        scatter: [input_vcf, sample_to_replace, new_sample_name, output_name]
+        scatterMethod: dotproduct
+        run: ../tools/replace_vcf_sample_name.cwl
+        in:
+            input_vcf: final_split_vcf/vcf
+            sample_to_replace: sample_names
+            vcf_source: vcf_source
+            new_sample_name:
+                source: [sample_names]
+                valueFrom: |
+                    ${
+                      var sample = self;
+                      var caller = inputs.vcf_source;
+                      var result = sample + "-" + caller;
+                      return result;
+                    }
+            output_name:
+                source: [sample_names]
+                valueFrom: |
+                    ${
+                      var sample = self;
+                      var caller = inputs.vcf_source;
+                      var result = sample + "-" + caller + ".vcf.gz";
+                      return result;
+                    }
+        out:
+            [renamed_vcf]
+    final_index:
+        scatter: [vcf]
+        scatterMethod: dotproduct
+        run: ../tools/index_vcf.cwl
+        in:
+            vcf: rename/renamed_vcf
+        out:
+            [indexed_vcf]

From 571bab75d7fa2549c4ac13b9c898c7b8eee86ff8 Mon Sep 17 00:00:00 2001
From: apaul7 <alex.paul@wustl.edu>
Date: Thu, 15 Jul 2021 10:22:23 -0500
Subject: [PATCH 20/35] add joint sv filtering for depth callers

This runs the depth filters for events called by cnvkit/cnvnator. Final
sample names follow the $SAMPLE-$CALLER format. This allows easy
tracking for the source of calls in final merged vcfs.
added custom merge sv records. This allows calls to be merged together
if they are of the same type and within a bp window. This does not
remove calls just adds a new record in the output vcf.
---
 .../sv_joint_depth_caller_filter.cwl          | 124 ++++++++++++++++++
 definitions/tools/custom_merge_sv_records.cwl |  49 +++++++
 2 files changed, 173 insertions(+)
 create mode 100644 definitions/subworkflows/sv_joint_depth_caller_filter.cwl
 create mode 100644 definitions/tools/custom_merge_sv_records.cwl

diff --git a/definitions/subworkflows/sv_joint_depth_caller_filter.cwl b/definitions/subworkflows/sv_joint_depth_caller_filter.cwl
new file mode 100644
index 00000000..4cd676ea
--- /dev/null
+++ b/definitions/subworkflows/sv_joint_depth_caller_filter.cwl
@@ -0,0 +1,124 @@
+#!/usr/bin/env cwl-runner
+
+cwlVersion: v1.0
+class: Workflow
+label: "Filter multiple sv vcfs from depth callers(cnvkit/cnvnator), returns single sample vcfs with the sample name as $SAMPLE-$CALLER"
+requirements:
+    - class: SubworkflowFeatureRequirement
+    - class: StepInputExpressionRequirement
+    - class: InlineJavascriptRequirement
+    - class: ScatterFeatureRequirement
+inputs:
+    bams:
+        type: File[]
+        secondaryFiles: [^.bai]
+    sample_names:
+        type: string[]
+    filter_del_depth:
+        type: double?
+    filter_dup_depth:
+        type: double?
+    min_sv_size:
+        type: int?
+    reference:
+        type:
+            - string
+            - File
+        secondaryFiles: [.fai, ^.dict]
+    sv_vcfs:
+        type: File[]
+    vcf_source:
+        type:
+          - type: enum
+            symbols: ["cnvkit", "cnvnator"]
+    merge_distance:
+        type: int?
+outputs:
+    vcfs:
+        type: File[]
+        outputSource: bgzip_and_index/indexed_vcf
+        secondaryFiles: [.tbi]
+steps:
+    merge_calls:
+        scatter: [input_vcf]
+        run: ../tools/custom_merge_sv_records.cwl
+        in:
+            input_vcf: sv_vcfs
+            distance: merge_distance
+        out:
+            [vcf]
+    size_filter:
+        scatter: [input_vcf]
+        run: ../tools/filter_sv_vcf_size.cwl
+        in:
+            input_vcf: merge_calls/vcf
+            size_method:
+                default: "min_len"
+            sv_size: min_sv_size
+        out:
+            [filtered_sv_vcf]
+    duphold:
+        scatter: [bam, sv_vcf]
+        scatterMethod: dotproduct
+        run: ../tools/duphold.cwl
+        in:
+            bam: bams
+            reference: reference
+            sv_vcf: size_filter/filtered_sv_vcf
+        out:
+            [annotated_sv_vcf]
+    depth_filter:
+        scatter: [input_vcf, output_vcf_name]
+        scatterMethod: dotproduct
+        run: ../tools/filter_sv_vcf_depth.cwl
+        in:
+            input_vcf: duphold/annotated_sv_vcf
+            deletion_depth: filter_del_depth
+            duplication_depth: filter_dup_depth
+            output_vcf_name:
+                source: [sample_names]
+                valueFrom: |
+                    ${
+                      var sample = self;
+                      var caller = inputs.vcf_source;
+                      var vcf_name = sample + "-" + caller  + ".vcf";
+                      return vcf_name;
+                    }
+            vcf_source:
+                default: "duphold"
+        out:
+            [filtered_sv_vcf]
+    rename:
+        scatter: [input_vcf, new_sample_name, sample_to_replace, output_name]
+        scatterMethod: dotproduct
+        run: ../tools/replace_vcf_sample_name.cwl
+        in:
+            input_vcf: depth_filter/filtered_sv_vcf
+            sample_to_replace: sample_names
+            vcf_source: vcf_source
+            new_sample_name:
+                source: [sample_names]
+                valueFrom: |
+                    ${
+                      var sample = self;
+                      var caller = inputs.vcf_source;
+                      var result = sample + "-" + caller;
+                      return result;
+                    }
+            output_name:
+                source: [sample_names]
+                valueFrom: |
+                    ${
+                      var sample = self;
+                      var caller = inputs.vcf_source;
+                      var result = sample + "-" + caller + ".vcf.gz";
+                      return result;
+                    }
+        out:
+            [renamed_vcf]
+    bgzip_and_index:
+        scatter: [vcf]
+        run: bgzip_and_index.cwl
+        in:
+            vcf: rename/renamed_vcf
+        out: [indexed_vcf]
diff --git a/definitions/tools/custom_merge_sv_records.cwl b/definitions/tools/custom_merge_sv_records.cwl
new file mode 100644
index 00000000..df6d267d
--- /dev/null
+++ b/definitions/tools/custom_merge_sv_records.cwl
@@ -0,0 +1,49 @@
+#!/usr/bin/env cwl-runner
+
+cwlVersion: v1.0
+class: CommandLineTool
+label: "merges nearby DEL/DUP records within a certain window distance"
+
+baseCommand: ["/bin/bash", "run_merge.sh"]
+requirements:
+    - class: ResourceRequirement
+      ramMin: 4000
+    - class: DockerRequirement
+      dockerPull: "apaul7/analysis:1.0.0"
+    - class: InitialWorkDirRequirement
+      listing:
+      - entryname: "run_merge.sh"
+        entry: |
+          #!/bin/bash
+          set -eou pipefail
+          INPUT="$1"
+          OUTPUT="$2"
+          DISTANCE="$3"
+          /usr/local/bin/python3 /opt/git/merge-sv-records/merge.py -i $INPUT -o $OUTPUT -w $DISTANCE
+
+          /usr/local/bin/bgzip $OUTPUT
+          /usr/local/bin/tabix -p vcf $OUTPUT.gz
+
+
+inputs:
+    input_vcf:
+        type: File
+        inputBinding:
+            position: 1
+    output_vcf_name:
+        type: string?
+        default: "record_merged.vcf"
+        inputBinding:
+            position: 2
+    distance:
+        type: int?
+        default: 1000
+        inputBinding:
+            position: 3
+
+outputs:
+    vcf:
+        type: File
+        outputBinding:
+            glob: "$(inputs.output_vcf_name).gz"
+        secondaryFiles: [.tbi]

From c8214e3d5d3a0e51ca9b4fdf11ff6ac50dbfd163 Mon Sep 17 00:00:00 2001
From: apaul7 <alex.paul@wustl.edu>
Date: Thu, 15 Jul 2021 10:25:49 -0500
Subject: [PATCH 21/35] add joint detect svs subworkflows

This runs the sv callers in joint mode, merges, annotates, filters, and
stages the results in a directory structure
---
 definitions/subworkflows/joint_detect_svs.cwl | 293 ++++++++++++++++++
 1 file changed, 293 insertions(+)
 create mode 100644 definitions/subworkflows/joint_detect_svs.cwl

diff --git a/definitions/subworkflows/joint_detect_svs.cwl b/definitions/subworkflows/joint_detect_svs.cwl
new file mode 100644
index 00000000..9bbe2bce
--- /dev/null
+++ b/definitions/subworkflows/joint_detect_svs.cwl
@@ -0,0 +1,293 @@
+#!/usr/bin/env cwl-runner
+
+cwlVersion: v1.0
+class: Workflow
+label: "joint detect svs"
+requirements:
+    - class: SubworkflowFeatureRequirement
+    - class: ScatterFeatureRequirement
+inputs:
+    reference:
+        type:
+            - string
+            - File
+        secondaryFiles: [.fai, ^.dict]
+    bams:
+        type: File[]
+        secondaryFiles: [^.bai]
+    sample_names:
+        type: string[]
+    cohort_name:
+        type: string
+    exclude_regions:
+        type: File?
+    manta_call_regions:
+        type: File?
+    manta_output_contigs:
+        type: boolean?
+    cnvnator_bin_size:
+        type: int?
+    cnvkit_method:
+        type:
+          - "null"
+          - type: enum
+            symbols: ["hybrid", "amplicon", "wgs"]
+    cnvkit_reference_cnn:
+        type: File?
+    cnvkit_segment_filter:
+        type:
+          - "null"
+          - type: enum
+            symbols: ["ampdel", "ci", "cn", "sem"]
+    filter_del_depth:
+        type: double?
+    filter_dup_depth:
+        type: double?
+    filter_paired_count:
+        type: int?
+    filter_split_count:
+        type: int?
+    filter_alt_abundance_percentage:
+        type: double?
+    filter_depth_caller_min_size:
+        type: int?
+    survivor_estimate_sv_distance:
+        type: boolean
+    genome_build:
+        type: string
+    survivor_max_distance_to_merge:
+        type: int
+    survivor_minimum_sv_calls:
+        type: int
+    survivor_minimum_sv_size:
+        type: int
+    survivor_same_strand:
+        type: boolean
+    survivor_same_type:
+        type: boolean
+    snps_vcf:
+        type: File?
+    filter_blocklist_bedpe:
+        type: File?
+    annotsv_filter_pop_af:
+        type: double?
+    annotsv_filter_no_CDS:
+        type: boolean?
+    annotsv_annotations:
+        type:
+            - string
+            - Directory
+outputs:
+    all_staged:
+        type: Directory
+        outputSource: stage_all/gathered_directory
+steps:
+# stage 1, variant calling
+    smoove:
+        run: ../tools/smoove.cwl
+        in:
+            bams: bams
+            cohort_name: cohort_name
+            reference: reference
+            exclude_regions: exclude_regions
+        out:
+            [output_vcf]
+    index_smoove:
+        run: ../tools/index_vcf.cwl
+        in:
+            vcf: smoove/output_vcf
+        out:
+            [indexed_vcf]
+    stage_raw_smoove:
+        run: ../tools/gather_to_sub_directory.cwl
+        in:
+            outdir:
+                valueFrom: "smoove"
+            files:
+                source: [index_smoove/indexed_vcf]
+                linkMerge: merge_flattened
+        out:
+            [gathered_directory]
+    manta:
+        run: ../tools/manta_germline.cwl
+        in:
+            bams: bams
+            reference: reference
+            call_regions: manta_call_regions
+            output_contigs: manta_output_contigs
+        out:
+            [diploid_variants, all_candidates, small_candidates, stats]
+    stage_raw_manta:
+        run: ../tools/gather_to_sub_directory.cwl
+        in:
+            outdir:
+                valueFrom: "manta"
+            files:
+                source: [manta/diploid_variants, manta/all_candidates, manta/small_candidates]
+                linkMerge: merge_flattened
+            directory: manta/stats
+        out:
+            [gathered_directory]
+    cnvnator:
+        run: joint_cnvnator.cwl
+        in:
+            reference: reference
+            sample_names: sample_names
+            bams: bams
+            bin_size: cnvnator_bin_size
+        out:
+            [vcfs, root_files, cn_files]
+    stage_raw_cnvnator:
+        run: ../tools/gather_to_sub_directory.cwl
+        in:
+            outdir:
+                valueFrom: "cnvnator"
+            files:
+                source: [cnvnator/vcfs, cnvnator/root_files, cnvnator/cn_files]
+                linkMerge: merge_flattened
+        out:
+            [gathered_directory]
+    cnvkit:
+        run: joint_cnvkit.cwl
+        in:
+            sample_names: sample_names
+            bams: bams
+            reference_fasta: reference
+            reference_cnn: cnvkit_reference_cnn
+            method: cnvkit_method
+            segment_filter: cnvkit_segment_filter
+        out:
+            [vcfs, cnr, cns]
+    stage_raw_cnvkit:
+        run: ../tools/gather_to_sub_directory.cwl
+        in:
+            outdir:
+                valueFrom: "cnvkit"
+            files:
+                source: [cnvkit/vcfs, cnvkit/cnr, cnvkit/cns]
+                linkMerge: merge_flattened
+        out:
+            [gathered_directory]
+    stage_raw:
+        run: ../tools/gather_to_sub_directory_dirs.cwl
+        in:
+             outdir:
+                 valueFrom: "raw"
+             directories:
+                 source: [stage_raw_smoove/gathered_directory, stage_raw_manta/gathered_directory, stage_raw_cnvnator/gathered_directory, stage_raw_cnvkit/gathered_directory]
+                 linkMerge: merge_flattened
+        out:
+            [gathered_directory]
+# stage 2, filtering
+    filter_smoove:
+        run: sv_joint_read_caller_filter.cwl
+        in:
+            reference: reference
+            sample_names: sample_names
+            bams: bams
+            filter_del_depth: filter_del_depth
+            filter_dup_depth: filter_dup_depth
+            filter_paired_count: filter_paired_count
+            filter_split_count: filter_split_count
+            filter_alt_abundance_percentage: filter_alt_abundance_percentage
+            sv_vcf: index_smoove/indexed_vcf
+            vcf_source:
+                default: "smoove"
+        out:
+            [vcfs]
+    filter_manta:
+        run: sv_joint_read_caller_filter.cwl
+        in:
+            reference: reference
+            sample_names: sample_names
+            bams: bams
+            filter_del_depth: filter_del_depth
+            filter_dup_depth: filter_dup_depth
+            filter_paired_count: filter_paired_count
+            filter_split_count: filter_split_count
+            filter_alt_abundance_percentage: filter_alt_abundance_percentage
+            sv_vcf: manta/diploid_variants
+            vcf_source:
+                default: "manta"
+        out:
+            [vcfs]
+    filter_cnvnator:
+        run: sv_joint_depth_caller_filter.cwl
+        in:
+            reference: reference
+            sample_names: sample_names
+            bams: bams
+            filter_del_depth: filter_del_depth
+            filter_dup_depth: filter_dup_depth
+            sv_vcfs: cnvnator/vcfs
+            vcf_source:
+                default: "cnvnator"
+            min_sv_size: filter_depth_caller_min_size
+        out:
+            [vcfs]
+    filter_cnvkit:
+        run: sv_joint_depth_caller_filter.cwl
+        in:
+            reference: reference
+            sample_names: sample_names
+            bams: bams
+            filter_del_depth: filter_del_depth
+            filter_dup_depth: filter_dup_depth
+            sv_vcfs: cnvkit/vcfs
+            vcf_source:
+                default: "cnvkit"
+            min_sv_size: filter_depth_caller_min_size
+        out:
+            [vcfs]
+    stage_filtered:
+        run: ../tools/gather_to_sub_directory.cwl
+        in:
+            outdir:
+                valueFrom: "filtered"
+            files:
+                source: [filter_smoove/vcfs, filter_manta/vcfs, filter_cnvnator/vcfs, filter_cnvkit/vcfs]
+                linkMerge: merge_flattened
+        out:
+            [gathered_directory]
+#  stage3, merge+annotate+filter
+    merge_svs:
+        run: merge_svs.cwl
+        in:
+            cohort_name: cohort_name
+            estimate_sv_distance: survivor_estimate_sv_distance
+            genome_build: genome_build
+            max_distance_to_merge: survivor_max_distance_to_merge
+            minimum_sv_calls: survivor_minimum_sv_calls
+            minimum_sv_size: survivor_minimum_sv_size
+            same_strand: survivor_same_strand
+            same_type: survivor_same_type
+            snps_vcf: snps_vcf
+            sv_vcfs:
+                source: [filter_smoove/vcfs, filter_manta/vcfs, filter_cnvnator/vcfs, filter_cnvkit/vcfs]
+                linkMerge: merge_flattened
+            blocklist_bedpe: filter_blocklist_bedpe
+            filter_pop_af: annotsv_filter_pop_af
+            filter_no_CDS: annotsv_filter_no_CDS
+            annotsv_annotations: annotsv_annotations
+        out:
+            [bcftools_merged_sv_vcf, bcftools_merged_annotated_tsv, bcftools_merged_unannotated_tsv, bcftools_merged_filtered_annotated_tsv, survivor_merged_sv_vcf, survivor_merged_annotated_tsv, survivor_merged_unannotated_tsv, survivor_merged_filtered_annotated_tsv]
+    stage_merged:
+        run: ../tools/gather_to_sub_directory.cwl
+        in:
+            outdir:
+                valueFrom: "merged"
+            files:
+                source: [merge_svs/bcftools_merged_sv_vcf, merge_svs/bcftools_merged_annotated_tsv, merge_svs/bcftools_merged_unannotated_tsv, merge_svs/bcftools_merged_filtered_annotated_tsv, merge_svs/survivor_merged_sv_vcf, merge_svs/survivor_merged_annotated_tsv, merge_svs/survivor_merged_unannotated_tsv, merge_svs/survivor_merged_filtered_annotated_tsv]
+                linkMerge: merge_flattened
+        out:
+            [gathered_directory]
+    stage_all:
+        run: ../tools/gather_to_sub_directory_dirs.cwl
+        in:
+             outdir:
+                 valueFrom: "SV_pipeline"
+             directories:
+                 source: [stage_raw/gathered_directory, stage_filtered/gathered_directory, stage_merged/gathered_directory]
+                 linkMerge: merge_flattened
+        out:
+            [gathered_directory]

From 76237792509dbc7a8b969f38d21949ebfd739395 Mon Sep 17 00:00:00 2001
From: apaul7 <alex.paul@wustl.edu>
Date: Thu, 15 Jul 2021 10:29:09 -0500
Subject: [PATCH 22/35] add joint detect snps subworkflow

This generates per sample gvcf files, jointly calls variants with gatk,
annotates, filters, and stages the outputs.
---
 .../subworkflows/joint_detect_snps.cwl        | 190 ++++++++++++++++++
 1 file changed, 190 insertions(+)
 create mode 100644 definitions/subworkflows/joint_detect_snps.cwl

diff --git a/definitions/subworkflows/joint_detect_snps.cwl b/definitions/subworkflows/joint_detect_snps.cwl
new file mode 100644
index 00000000..a7ec4721
--- /dev/null
+++ b/definitions/subworkflows/joint_detect_snps.cwl
@@ -0,0 +1,190 @@
+#!/usr/bin/env cwl-runner
+
+cwlVersion: v1.0
+class: Workflow
+label: "joint germline snp variant detection"
+requirements:
+    - class: MultipleInputFeatureRequirement
+    - class: SubworkflowFeatureRequirement
+    - class: SchemaDefRequirement
+      types:
+          - $import: ../types/vep_custom_annotation.yml
+    - class: StepInputExpressionRequirement
+    - class: InlineJavascriptRequirement
+    - class: ScatterFeatureRequirement
+inputs:
+    reference:
+        type:
+            - string
+            - File
+        secondaryFiles: [.fai, ^.dict]
+    bams:
+        type: File[]
+        secondaryFiles: [^.bai]
+    sample_names:
+        type: string[]
+    gvcf_gq_bands:
+        type: string[]
+    intervals:
+        type:
+            type: array
+            items:
+                type: array
+                items: string
+    contamination_fraction:
+        type: string[]
+    ploidy:
+        type: int?
+    vep_cache_dir:
+        type:
+            - string
+            - Directory
+    vep_ensembl_assembly:
+        type: string
+        doc: "genome assembly to use in vep. Examples: GRCh38 or GRCm38"
+    vep_ensembl_version:
+        type: string
+        doc: "ensembl version - Must be present in the cache directory. Example: 95"
+    vep_ensembl_species:
+        type: string
+        doc: "ensembl species - Must be present in the cache directory. Examples: homo_sapiens or mus_musculus"
+    vep_plugins:
+        type: string[]
+        default: [Frameshift, Wildtype]
+    synonyms_file:
+        type: File?
+    annotate_coding_only:
+        type: boolean?
+    vep_custom_annotations:
+        type: ../types/vep_custom_annotation.yml#vep_custom_annotation[]
+        doc: "custom type, check types directory for input format"
+    limit_variant_intervals:
+        type: File
+    variants_to_table_fields:
+        type: string[]
+        default: ['CHROM','POS','ID','REF','ALT']
+    variants_to_table_genotype_fields:
+        type: string[]
+    vep_to_table_fields:
+        type: string[]
+    final_tsv_prefix:
+        type: string?
+        default: 'variants'
+    gnomad_max_pop_af:
+        type: float
+        default: 0.05
+    min_conf_call:
+        type: float?
+outputs:
+    sample_gvcfs:
+        type: File[]
+        outputSource: per_sample_merge_gvcfs/gvcf
+    raw_vcf:
+        type: File
+        outputSource: genotype/raw_vcf
+        secondaryFiles: [.tbi]
+    final_vcf:
+        type: File
+        outputSource: genotype/final_vcf
+        secondaryFiles: [.tbi]
+    filtered_vcf:
+        type: File
+        outputSource: genotype/filtered_vcf
+        secondaryFiles: [.tbi]
+    vep_summary:
+        type: File
+        outputSource: genotype/vep_summary
+    final_tsv:
+        type: File
+        outputSource: genotype/final_tsv
+    filtered_tsv:
+        type: File
+        outputSource: genotype/filtered_tsv
+    all_staged:
+        type: Directory
+        outputSource: stage_all/gathered_directory
+steps:
+    per_sample_make_gvcfs:
+        scatter: [bam, contamination_fraction]
+        scatterMethod: dotproduct
+        run: gatk_haplotypecaller_iterator.cwl
+        in:
+            reference: reference
+            bam: bams
+            emit_reference_confidence:
+                 default: 'GVCF'
+            gvcf_gq_bands: gvcf_gq_bands
+            intervals: intervals
+            contamination_fraction: contamination_fraction
+            ploidy: ploidy
+        out:
+            [gvcf]
+    per_sample_merge_gvcfs:
+        scatter: [gvcfs, output_file_name]
+        scatterMethod: dotproduct
+        run: ../tools/combine_gvcfs.cwl
+        in:
+            reference: reference
+            gvcfs: per_sample_make_gvcfs/gvcf
+            output_file_name:
+                source: [sample_names]
+                valueFrom: "$(self).merged.g.vcf.gz"
+        out:
+            [gvcf]
+    genotype:
+        run: joint_genotype.cwl
+        in:
+            reference: reference
+            gvcfs:
+                source: [per_sample_merge_gvcfs/gvcf]
+                linkMerge: merge_flattened
+            intervals: intervals
+            vep_cache_dir: vep_cache_dir
+            vep_ensembl_assembly: vep_ensembl_assembly
+            vep_ensembl_version: vep_ensembl_version
+            vep_ensembl_species: vep_ensembl_species
+            vep_plugins: vep_plugins
+            synonyms_file: synonyms_file
+            annotate_coding_only: annotate_coding_only
+            vep_custom_annotations: vep_custom_annotations
+            roi_intervals: limit_variant_intervals
+            variants_to_table_fields: variants_to_table_fields
+            variants_to_table_genotype_fields: variants_to_table_genotype_fields
+            vep_to_table_fields: vep_to_table_fields
+            final_tsv_prefix: final_tsv_prefix
+            gnomad_max_pop_af: gnomad_max_pop_af
+            min_conf_call: min_conf_call
+        out:
+            [raw_vcf, annotated_vcf, final_vcf, filtered_vcf, vep_summary, final_tsv, filtered_tsv]
+    stage_gvcf:
+        run: ../tools/gather_to_sub_directory.cwl
+        in:
+            outdir:
+                valueFrom: "gvcfs"
+            files:
+                source: [per_sample_merge_gvcfs/gvcf]
+                linkMerge: merge_flattened
+        out:
+            [gathered_directory]
+
+    stage_gatk:
+        run: ../tools/gather_to_sub_directory.cwl
+        in:
+            outdir:
+                valueFrom: "gatk"
+            files:
+                source: [genotype/raw_vcf, genotype/annotated_vcf, genotype/final_vcf, genotype/filtered_vcf, genotype/vep_summary, genotype/final_tsv, genotype/filtered_tsv]
+                linkMerge: merge_flattened
+            directory: stage_gvcf/gathered_directory
+        out:
+            [gathered_directory]
+    stage_all:
+        run: ../tools/gather_to_sub_directory_dirs.cwl
+        in:
+             outdir:
+                 valueFrom: "SNP_pipeline"
+             directories:
+                 source: [stage_gatk/gathered_directory]
+                 linkMerge: merge_flattened
+        out:
+            [gathered_directory]

From 065f8b33a49415719cd565b2f5a587155f142200 Mon Sep 17 00:00:00 2001
From: apaul7 <alex.paul@wustl.edu>
Date: Thu, 15 Jul 2021 10:30:56 -0500
Subject: [PATCH 23/35] add joint detect variants

This subworkflow calls the joint detect snps and joint detect svs
subworkflows outputing the staged results
---
 .../subworkflows/joint_detect_variants.cwl    | 203 ++++++++++++++++++
 1 file changed, 203 insertions(+)
 create mode 100644 definitions/subworkflows/joint_detect_variants.cwl

diff --git a/definitions/subworkflows/joint_detect_variants.cwl b/definitions/subworkflows/joint_detect_variants.cwl
new file mode 100644
index 00000000..e1cf579b
--- /dev/null
+++ b/definitions/subworkflows/joint_detect_variants.cwl
@@ -0,0 +1,203 @@
+#!/usr/bin/env cwl-runner
+
+cwlVersion: v1.0
+class: Workflow
+label: "joint variant detection(snps,svs)"
+requirements:
+    - class: MultipleInputFeatureRequirement
+    - class: SubworkflowFeatureRequirement
+    - class: SchemaDefRequirement
+      types:
+          - $import: ../types/vep_custom_annotation.yml
+    - class: StepInputExpressionRequirement
+    - class: InlineJavascriptRequirement
+    - class: ScatterFeatureRequirement
+inputs:
+    reference:
+        type:
+            - string
+            - File
+        secondaryFiles: [.fai, ^.dict]
+    bams:
+        type: File[]
+        secondaryFiles: [^.bai]
+    sample_names:
+        type: string[]
+    cohort_name:
+        type: string
+    gvcf_gq_bands:
+        type: string[]
+    intervals:
+        type:
+            type: array
+            items:
+                type: array
+                items: string
+    contamination_fraction:
+        type: string[]
+    ploidy:
+        type: int?
+    vep_cache_dir:
+        type:
+            - string
+            - Directory
+    vep_ensembl_assembly:
+        type: string
+        doc: "genome assembly to use in vep. Examples: GRCh38 or GRCm38"
+    vep_ensembl_version:
+        type: string
+        doc: "ensembl version - Must be present in the cache directory. Example: 95"
+    vep_ensembl_species:
+        type: string
+        doc: "ensembl species - Must be present in the cache directory. Examples: homo_sapiens or mus_musculus"
+    vep_plugins:
+        type: string[]
+        default: [Frameshift, Wildtype]
+    synonyms_file:
+        type: File?
+    annotate_coding_only:
+        type: boolean?
+    vep_custom_annotations:
+        type: ../types/vep_custom_annotation.yml#vep_custom_annotation[]
+        doc: "custom type, check types directory for input format"
+    limit_variant_intervals:
+        type: File
+    snp_to_table_fields:
+        type: string[]
+        default: ['CHROM','POS','ID','REF','ALT']
+    snp_to_table_genotype_fields:
+        type: string[]
+    vep_to_table_fields:
+        type: string[]
+    snp_final_tsv_prefix:
+        type: string?
+        default: 'variants'
+    snp_gnomad_max_pop_af:
+        type: float
+        default: 0.05
+    gatk_min_conf_call:
+        type: float?
+
+
+    sv_exclude_regions:
+        type: File?
+    manta_call_regions:
+        type: File?
+    manta_output_contigs:
+        type: boolean?
+    cnvnator_bin_size:
+        type: int?
+    cnvkit_method:
+        type:
+          - "null"
+          - type: enum
+            symbols: ["hybrid", "amplicon", "wgs"]
+    cnvkit_reference_cnn:
+        type: File?
+    cnvkit_segment_filter:
+        type:
+          - "null"
+          - type: enum
+            symbols: ["ampdel", "ci", "cn", "sem"]
+    sv_filter_del_depth:
+        type: double?
+    sv_filter_dup_depth:
+        type: double?
+    sv_filter_paired_count:
+        type: int?
+    sv_filter_split_count:
+        type: int?
+    sv_filter_alt_abundance_percentage:
+        type: double?
+    sv_filter_depth_caller_min_size:
+        type: int?
+    survivor_estimate_sv_distance:
+        type: boolean
+    survivor_max_distance_to_merge:
+        type: int
+    survivor_minimum_sv_calls:
+        type: int
+    survivor_minimum_sv_size:
+        type: int
+    survivor_same_strand:
+        type: boolean
+    survivor_same_type:
+        type: boolean
+    sv_filter_blocklist_bedpe:
+        type: File?
+    annotsv_filter_pop_af:
+        type: double?
+    annotsv_filter_no_CDS:
+        type: boolean?
+    annotsv_annotations:
+        type:
+            - string
+            - Directory
+outputs:
+    snps_staged:
+        type: Directory
+        outputSource: detect_snps/all_staged
+    svs_staged:
+        type: Directory
+        outputSource: detect_svs/all_staged
+steps:
+    detect_snps:
+        run: joint_detect_snps.cwl
+        in:
+            reference: reference
+            bams: bams
+            sample_names: sample_names
+            gvcf_gq_bands: gvcf_gq_bands
+            intervals: intervals
+            contamination_fraction: contamination_fraction
+            ploidy: ploidy
+            vep_cache_dir: vep_cache_dir
+            vep_ensembl_assembly: vep_ensembl_assembly
+            vep_ensembl_version: vep_ensembl_version
+            vep_ensembl_species: vep_ensembl_species
+            vep_plugins: vep_plugins
+            synonyms_file: synonyms_file
+            annotate_coding_only: annotate_coding_only
+            vep_custom_annotations: vep_custom_annotations
+            limit_variant_intervals: limit_variant_intervals
+            variants_to_table_fields: snp_to_table_fields
+            variants_to_table_genotype_fields: snp_to_table_genotype_fields
+            vep_to_table_fields: vep_to_table_fields
+            final_tsv_prefix: snp_final_tsv_prefix
+            gnomad_max_pop_af: snp_gnomad_max_pop_af
+            min_conf_call: gatk_min_conf_call
+        out:
+            [raw_vcf, all_staged]
+    detect_svs:
+        run: joint_detect_svs.cwl
+        in:
+            reference: reference
+            bams: bams
+            sample_names: sample_names
+            cohort_name: cohort_name
+            genome_build: vep_ensembl_assembly
+            exclude_regions: sv_exclude_regions
+            manta_call_regions: manta_call_regions
+            manta_output_contigs: manta_output_contigs
+            cnvnator_bin_size: cnvnator_bin_size
+            cnvkit_method: cnvkit_method
+            cnvkit_reference_cnn: cnvkit_reference_cnn
+            cnvkit_segment_filter: cnvkit_segment_filter
+            filter_del_depth: sv_filter_del_depth
+            filter_dup_depth: sv_filter_dup_depth
+            filter_paired_count: sv_filter_paired_count
+            filter_split_count: sv_filter_split_count
+            filter_alt_abundance_percentage: sv_filter_alt_abundance_percentage
+            filter_depth_caller_min_size: sv_filter_depth_caller_min_size
+            survivor_estimate_sv_distance: survivor_estimate_sv_distance
+            survivor_max_distance_to_merge: survivor_max_distance_to_merge
+            survivor_minimum_sv_calls: survivor_minimum_sv_calls
+            survivor_minimum_sv_size: survivor_minimum_sv_size
+            survivor_same_strand: survivor_same_strand
+            survivor_same_type: survivor_same_type
+            snps_vcf: detect_snps/raw_vcf
+            filter_blocklist_bedpe: sv_filter_blocklist_bedpe
+            annotsv_filter_pop_af: annotsv_filter_pop_af
+            annotsv_annotations: annotsv_annotations
+        out:
+            [all_staged]

From 70005be012b410c8ab66ee323b351966e26f3d65 Mon Sep 17 00:00:00 2001
From: apaul7 <alex.paul@wustl.edu>
Date: Thu, 15 Jul 2021 10:41:30 -0500
Subject: [PATCH 24/35] pass annotsv_annotations input to subworkflow

---
 definitions/pipelines/germline_wgs.cwl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/definitions/pipelines/germline_wgs.cwl b/definitions/pipelines/germline_wgs.cwl
index a29bc142..ca61187a 100644
--- a/definitions/pipelines/germline_wgs.cwl
+++ b/definitions/pipelines/germline_wgs.cwl
@@ -473,6 +473,7 @@ steps:
             sv_split_count: sv_filter_split_count
             genome_build: vep_ensembl_assembly
             blocklist_bedpe: blocklist_bedpe
+            annotsv_annotations: annotsv_annotations
         out: 
            [cn_diagram, cn_scatter_plot, tumor_antitarget_coverage, tumor_target_coverage, tumor_bin_level_ratios, tumor_segmented_ratios, cnvkit_vcf, cnvnator_cn_file, cnvnator_root, cnvnator_vcf, manta_diploid_variants, manta_somatic_variants, manta_all_candidates, manta_small_candidates, manta_tumor_only_variants, smoove_output_variants, cnvkit_filtered_vcf, cnvnator_filtered_vcf, manta_filtered_vcf, smoove_filtered_vcf, survivor_merged_vcf, survivor_merged_annotated_tsv, bcftools_merged_vcf, bcftools_merged_annotated_tsv, bcftools_merged_filtered_annotated_tsv]
     add_disclaimer_survivor_sv_vcf:

From 0885e03b9381c1e5c84f8a2f45f6e8f52c4e6f35 Mon Sep 17 00:00:00 2001
From: apaul7 <alex.paul@wustl.edu>
Date: Thu, 15 Jul 2021 12:49:30 -0500
Subject: [PATCH 25/35] pass soft filtered annotated vcf as output

---
 definitions/subworkflows/joint_genotype.cwl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/definitions/subworkflows/joint_genotype.cwl b/definitions/subworkflows/joint_genotype.cwl
index 76c6aa4c..d4bb1edb 100644
--- a/definitions/subworkflows/joint_genotype.cwl
+++ b/definitions/subworkflows/joint_genotype.cwl
@@ -76,7 +76,7 @@ outputs:
         secondaryFiles: [.tbi]
     annotated_vcf:
         type: File
-        outputSource: bgzip_index_annotated_vcf/indexed_vcf
+        outputSource: soft_filter/filtered_vcf
         secondaryFiles: [.tbi]
     final_vcf:
         type: File

From e0449dce23bf7444902b3f9dd6e273e6d44fe9ef Mon Sep 17 00:00:00 2001
From: apaul7 <alex.paul@wustl.edu>
Date: Wed, 17 Nov 2021 10:30:56 -0600
Subject: [PATCH 26/35] remove doc line for easy to understand input

---
 definitions/tools/bcftools_view.cwl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/definitions/tools/bcftools_view.cwl b/definitions/tools/bcftools_view.cwl
index e43b67e4..b43bcb03 100644
--- a/definitions/tools/bcftools_view.cwl
+++ b/definitions/tools/bcftools_view.cwl
@@ -26,7 +26,6 @@ inputs:
         inputBinding:
             position: 4
             prefix: "--output-type"
-        doc: "output file format"
     output_vcf_name:
         type: string?
         default: "bcftools_split.vcf.gz"

From 20f96d842ae2adc64f8dbfe511589abca22bc4a5 Mon Sep 17 00:00:00 2001
From: apaul7 <alex.paul@wustl.edu>
Date: Wed, 17 Nov 2021 10:31:52 -0600
Subject: [PATCH 27/35] ubuntu:xenial -> ubuntu:focal docker image

---
 definitions/tools/gather_to_sub_directory.cwl      | 2 +-
 definitions/tools/gather_to_sub_directory_dirs.cwl | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/definitions/tools/gather_to_sub_directory.cwl b/definitions/tools/gather_to_sub_directory.cwl
index 1980cb47..8a885d14 100644
--- a/definitions/tools/gather_to_sub_directory.cwl
+++ b/definitions/tools/gather_to_sub_directory.cwl
@@ -6,7 +6,7 @@ baseCommand: ["/bin/bash","directory_gatherer.sh"]
 
 requirements:
     - class: DockerRequirement
-      dockerPull: "ubuntu:xenial"
+      dockerPull: "ubuntu:focal"
     - class: ResourceRequirement
       ramMin: 1000
     - class: InitialWorkDirRequirement
diff --git a/definitions/tools/gather_to_sub_directory_dirs.cwl b/definitions/tools/gather_to_sub_directory_dirs.cwl
index a83b45ad..226cc6a8 100644
--- a/definitions/tools/gather_to_sub_directory_dirs.cwl
+++ b/definitions/tools/gather_to_sub_directory_dirs.cwl
@@ -6,7 +6,7 @@ baseCommand: ["/bin/bash","directory_gatherer.sh"]
 
 requirements:
     - class: DockerRequirement
-      dockerPull: "ubuntu:xenial"
+      dockerPull: "ubuntu:focal"
     - class: ResourceRequirement
       ramMin: 1000
     - class: InitialWorkDirRequirement

From 91b0e5fbd0e9ca4f2bc0467239aed40b9d087627 Mon Sep 17 00:00:00 2001
From: apaul7 <alex.paul@wustl.edu>
Date: Wed, 17 Nov 2021 10:37:14 -0600
Subject: [PATCH 28/35] quote parameters in script

---
 definitions/tools/custom_merge_sv_records.cwl      | 6 +++---
 definitions/tools/gather_to_sub_directory.cwl      | 2 +-
 definitions/tools/gather_to_sub_directory_dirs.cwl | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/definitions/tools/custom_merge_sv_records.cwl b/definitions/tools/custom_merge_sv_records.cwl
index df6d267d..e0770013 100644
--- a/definitions/tools/custom_merge_sv_records.cwl
+++ b/definitions/tools/custom_merge_sv_records.cwl
@@ -19,10 +19,10 @@ requirements:
           INPUT="$1"
           OUTPUT="$2"
           DISTANCE="$3"
-          /usr/local/bin/python3 /opt/git/merge-sv-records/merge.py -i $INPUT -o $OUTPUT -w $DISTANCE
+          /usr/local/bin/python3 /opt/git/merge-sv-records/merge.py -i "$INPUT" -o "$OUTPUT" -w "$DISTANCE"
 
-          /usr/local/bin/bgzip $OUTPUT
-          /usr/local/bin/tabix -p vcf $OUTPUT.gz
+          /usr/local/bin/bgzip "$OUTPUT"
+          /usr/local/bin/tabix -p vcf "$OUTPUT".gz
 
 
 inputs:
diff --git a/definitions/tools/gather_to_sub_directory.cwl b/definitions/tools/gather_to_sub_directory.cwl
index 8a885d14..16a759a5 100644
--- a/definitions/tools/gather_to_sub_directory.cwl
+++ b/definitions/tools/gather_to_sub_directory.cwl
@@ -19,7 +19,7 @@ requirements:
             files="${@:2}"
             mkdir $outdir
             chmod -R 777 $outdir
-            cp --recursive --preserve --no-clobber --target-directory $outdir $files
+            cp --recursive --preserve --no-clobber --target-directory "$outdir" "$files"
 
             exit 0
 
diff --git a/definitions/tools/gather_to_sub_directory_dirs.cwl b/definitions/tools/gather_to_sub_directory_dirs.cwl
index 226cc6a8..7fc56595 100644
--- a/definitions/tools/gather_to_sub_directory_dirs.cwl
+++ b/definitions/tools/gather_to_sub_directory_dirs.cwl
@@ -19,7 +19,7 @@ requirements:
             files="${@:2}"
             mkdir $outdir
             chmod -R 777 $outdir
-            cp --recursive --preserve --no-clobber --target-directory $outdir $files
+            cp --recursive --preserve --no-clobber --target-directory "$outdir" "$files"
 
             exit 0
 

From 8079ab457780c2d9f7e5f5488529e6a87dfd0163 Mon Sep 17 00:00:00 2001
From: apaul7 <alex.paul@wustl.edu>
Date: Fri, 3 Dec 2021 15:47:16 -0600
Subject: [PATCH 29/35] fix quotes

---
 definitions/tools/gather_to_sub_directory.cwl      | 6 +++---
 definitions/tools/gather_to_sub_directory_dirs.cwl | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/definitions/tools/gather_to_sub_directory.cwl b/definitions/tools/gather_to_sub_directory.cwl
index 16a759a5..10b9df49 100644
--- a/definitions/tools/gather_to_sub_directory.cwl
+++ b/definitions/tools/gather_to_sub_directory.cwl
@@ -17,9 +17,9 @@ requirements:
 
             outdir="$1"
             files="${@:2}"
-            mkdir $outdir
-            chmod -R 777 $outdir
-            cp --recursive --preserve --no-clobber --target-directory "$outdir" "$files"
+            mkdir "$outdir"
+            chmod -R 777 "$outdir"
+            cp --recursive --preserve --no-clobber --target-directory "$outdir" $files
 
             exit 0
 
diff --git a/definitions/tools/gather_to_sub_directory_dirs.cwl b/definitions/tools/gather_to_sub_directory_dirs.cwl
index 7fc56595..88c6c0c8 100644
--- a/definitions/tools/gather_to_sub_directory_dirs.cwl
+++ b/definitions/tools/gather_to_sub_directory_dirs.cwl
@@ -17,9 +17,9 @@ requirements:
 
             outdir="$1"
             files="${@:2}"
-            mkdir $outdir
-            chmod -R 777 $outdir
-            cp --recursive --preserve --no-clobber --target-directory "$outdir" "$files"
+            mkdir "$outdir"
+            chmod -R 777 "$outdir"
+            cp --recursive --preserve --no-clobber --target-directory "$outdir" $files
 
             exit 0
 

From 9b6a9cb81e9208a8dc8e99acfbde866c57efe6cf Mon Sep 17 00:00:00 2001
From: apaul7 <alex.paul@wustl.edu>
Date: Fri, 3 Dec 2021 15:49:40 -0600
Subject: [PATCH 30/35] move script inline cwl file

---
 definitions/tools/custom_merge_sv_records.cwl | 69 +++++++++++++++----
 1 file changed, 56 insertions(+), 13 deletions(-)

diff --git a/definitions/tools/custom_merge_sv_records.cwl b/definitions/tools/custom_merge_sv_records.cwl
index e0770013..1f134761 100644
--- a/definitions/tools/custom_merge_sv_records.cwl
+++ b/definitions/tools/custom_merge_sv_records.cwl
@@ -4,46 +4,89 @@ cwlVersion: v1.0
 class: CommandLineTool
 label: "merges nearby DEL/DUP records within a certain window distance"
 
-baseCommand: ["/bin/bash", "run_merge.sh"]
+baseCommand: ["python3", "merge.py"]
 requirements:
     - class: ResourceRequirement
       ramMin: 4000
     - class: DockerRequirement
-      dockerPull: "apaul7/analysis:1.0.0"
+      dockerPull: "griffithlab/vatools:4.1.0"
     - class: InitialWorkDirRequirement
       listing:
-      - entryname: "run_merge.sh"
+      - entryname: "merge.py"
         entry: |
-          #!/bin/bash
-          set -eou pipefail
-          INPUT="$1"
-          OUTPUT="$2"
-          DISTANCE="$3"
-          /usr/local/bin/python3 /opt/git/merge-sv-records/merge.py -i "$INPUT" -o "$OUTPUT" -w "$DISTANCE"
+          import argparse
+          import vcfpy
+          from collections import OrderedDict
 
-          /usr/local/bin/bgzip "$OUTPUT"
-          /usr/local/bin/tabix -p vcf "$OUTPUT".gz
+          parser = argparse.ArgumentParser()
+          parser.add_argument('--input', '-i', dest="input", help='input vcf file', required=True, action="store")
+          parser.add_argument('--output', '-o', dest="output", help='output vcf file', required=False, default="out.vcf", action="store")
+          parser.add_argument('--window', '-w', dest="window", help='max merge window size', required=False, default=1000, type=int, action="store")
 
+          args = parser.parse_args()
+          in_vcf_name  = args.input
+          out_vcf_name = args.output
+          window_size = args.window
+
+          reader = vcfpy.Reader.from_path(in_vcf_name)
+          new_header = reader.header
+          new_header.add_filter_line(vcfpy.OrderedDict([('ID', 'MERGED_CALL'), ('Description', 'Record merged from 2 or more individual records')]))
+
+          writer = vcfpy.Writer.from_path(out_vcf_name, new_header)
+          new_record_count = 0
+          merge_records = []
+          for record in reader:
+              if((len(merge_records) == 0) or (merge_records[-1].CHROM != record.CHROM) or (merge_records[-1].INFO['SVTYPE'] != record.INFO['SVTYPE']) or (abs(merge_records[-1].INFO['END'] - record.POS) > window_size)):
+                  if(len(merge_records) > 1):
+                      new_record_count = new_record_count + 1
+                      new_record_chr = merge_records[0].CHROM
+                      new_record_start = merge_records[0].POS
+                      new_record_end = merge_records[-1].INFO['END']
+                      new_record_type = merge_records[0].INFO['SVTYPE']
+                      new_record_svlen = new_record_end - new_record_start
+
+                      info = OrderedDict({"SVTYPE": new_record_type, "END": new_record_end, "SVLEN": new_record_svlen})
+                      alt = vcfpy.SymbolicAllele(new_record_type)
+                      sample_calls = []
+                      for sample in merge_records[0].calls:
+                          gt = OrderedDict({"GT": "/".join(map(str, sample.gt_alleles)).replace("None",".")})
+                          name = sample.sample
+                          sample_calls.append(vcfpy.Call(name, gt))
+
+                      new_record = vcfpy.Record(new_record_chr, new_record_start, [], "N", [alt], ".", ["MERGED_CALL"], info, ["GT"], sample_calls)
+                      writer.write_record(new_record)
+                      merge_records = [record]
+                  else:
+                      merge_records = [record]
+                  writer.write_record(record)
+                  next
+
+              dist = abs(merge_records[-1].INFO['END'] - record.POS)
+              if(dist < window_size):
+                  merge_records.append(record)
+          print(f"Found {new_record_count} records that can be merged based on the input {window_size} distance")
 
 inputs:
     input_vcf:
         type: File
         inputBinding:
+            prefix: "-i"
             position: 1
     output_vcf_name:
         type: string?
         default: "record_merged.vcf"
         inputBinding:
+            prefix: "-o"
             position: 2
     distance:
         type: int?
         default: 1000
         inputBinding:
+            prefix: "-w"
             position: 3
 
 outputs:
     vcf:
         type: File
         outputBinding:
-            glob: "$(inputs.output_vcf_name).gz"
-        secondaryFiles: [.tbi]
+            glob: "$(inputs.output_vcf_name)"

From 6e5cb2304fba57d37374c9fbc449ad1b080ef258 Mon Sep 17 00:00:00 2001
From: apaul7 <alex.paul@wustl.edu>
Date: Fri, 3 Dec 2021 15:52:04 -0600
Subject: [PATCH 31/35] add input option for output file basename

---
 definitions/subworkflows/gatk_soft_filter.cwl | 6 ++++--
 definitions/subworkflows/joint_genotype.cwl   | 2 ++
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/definitions/subworkflows/gatk_soft_filter.cwl b/definitions/subworkflows/gatk_soft_filter.cwl
index 61780268..4f3b76af 100644
--- a/definitions/subworkflows/gatk_soft_filter.cwl
+++ b/definitions/subworkflows/gatk_soft_filter.cwl
@@ -16,6 +16,9 @@ inputs:
     vcf:
         type: File
         secondaryFiles: [.tbi]
+    output_basename:
+        type: string?
+        default: "soft_filtered"
 outputs:
     filtered_vcf:
         type: File
@@ -69,8 +72,7 @@ steps:
     merge:
         run: ../tools/merge_vcf.cwl
         in:
-            merged_vcf_basename:
-                default: "soft_filtered"
+            merged_vcf_basename: output_basename
             vcfs:
                 source: [filter_snps/filtered_vcf, filter_indels/filtered_vcf]
                 linkMerge: merge_flattened
diff --git a/definitions/subworkflows/joint_genotype.cwl b/definitions/subworkflows/joint_genotype.cwl
index d4bb1edb..80e8bff6 100644
--- a/definitions/subworkflows/joint_genotype.cwl
+++ b/definitions/subworkflows/joint_genotype.cwl
@@ -172,6 +172,8 @@ steps:
         in:
             reference: reference
             vcf: bgzip_index_annotated_vcf/indexed_vcf
+            output_basename:
+                default: "annotated"
         out:
             [filtered_vcf]
     filter_vcf:

From ac09653fb1931113cca2585d5888934077624d17 Mon Sep 17 00:00:00 2001
From: Alex Paul <alex.paul@wustl.edu>
Date: Mon, 6 Dec 2021 10:07:29 -0600
Subject: [PATCH 32/35] Update definitions/subworkflows/gatk_soft_filter.cwl

Co-authored-by: Thomas B. Mooney <mooney@wustl.edu>
---
 definitions/subworkflows/gatk_soft_filter.cwl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/definitions/subworkflows/gatk_soft_filter.cwl b/definitions/subworkflows/gatk_soft_filter.cwl
index 4f3b76af..eaf6845b 100644
--- a/definitions/subworkflows/gatk_soft_filter.cwl
+++ b/definitions/subworkflows/gatk_soft_filter.cwl
@@ -2,7 +2,7 @@
 
 cwlVersion: v1.0
 class: Workflow
-label: "apply soft filtering to a gatk called vcf using hard filter paramaters"
+label: "apply soft filtering to a gatk called vcf using hard filter parameters"
 requirements:
     - class: SubworkflowFeatureRequirement
     - class: StepInputExpressionRequirement

From 1cb02546fca81fa997503c1035478e7527cdb60f Mon Sep 17 00:00:00 2001
From: Alex Paul <alex.paul@wustl.edu>
Date: Mon, 6 Dec 2021 10:07:35 -0600
Subject: [PATCH 33/35] Update definitions/tools/bcftools_view.cwl

Co-authored-by: Thomas B. Mooney <mooney@wustl.edu>
---
 definitions/tools/bcftools_view.cwl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/definitions/tools/bcftools_view.cwl b/definitions/tools/bcftools_view.cwl
index b43bcb03..4efeb9a0 100644
--- a/definitions/tools/bcftools_view.cwl
+++ b/definitions/tools/bcftools_view.cwl
@@ -17,7 +17,7 @@ inputs:
         inputBinding:
             position: 1
             prefix: "--samples"
-        doc: "comma separated list of samples to include (or exclude with '^' prefix)"
+        doc: "comma-separated list of samples to include (or exclude with '^' prefix)"
     output_type:
         type:
             type: enum

From c253e6f8513462c43612d859d11f1a59bcb88405 Mon Sep 17 00:00:00 2001
From: apaul7 <alex.paul@wustl.edu>
Date: Tue, 7 Dec 2021 08:36:36 -0600
Subject: [PATCH 34/35] add doc for output type

---
 definitions/tools/bcftools_merge.cwl | 2 +-
 definitions/tools/bcftools_view.cwl  | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/definitions/tools/bcftools_merge.cwl b/definitions/tools/bcftools_merge.cwl
index 57daeaec..17909417 100644
--- a/definitions/tools/bcftools_merge.cwl
+++ b/definitions/tools/bcftools_merge.cwl
@@ -43,7 +43,7 @@ inputs:
         inputBinding:
             position: 4
             prefix: "--output-type"
-        doc: "output file format"
+        doc: "output file format, b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF"
     output_vcf_name:
         type: string?
         default: "bcftools_merged.vcf.gz"
diff --git a/definitions/tools/bcftools_view.cwl b/definitions/tools/bcftools_view.cwl
index 4efeb9a0..fa2e18e6 100644
--- a/definitions/tools/bcftools_view.cwl
+++ b/definitions/tools/bcftools_view.cwl
@@ -26,6 +26,7 @@ inputs:
         inputBinding:
             position: 4
             prefix: "--output-type"
+        doc: "output file format, b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF"
     output_vcf_name:
         type: string?
         default: "bcftools_split.vcf.gz"

From 140e6ebbcdbe95f917d5f71728d1fa4cf19b2bb7 Mon Sep 17 00:00:00 2001
From: apaul7 <alex.paul@wustl.edu>
Date: Thu, 9 Dec 2021 09:37:16 -0600
Subject: [PATCH 35/35] use bash arrays to quote multiple vars

---
 definitions/tools/gather_to_sub_directory.cwl      | 4 ++--
 definitions/tools/gather_to_sub_directory_dirs.cwl | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/definitions/tools/gather_to_sub_directory.cwl b/definitions/tools/gather_to_sub_directory.cwl
index 10b9df49..d4ad5fad 100644
--- a/definitions/tools/gather_to_sub_directory.cwl
+++ b/definitions/tools/gather_to_sub_directory.cwl
@@ -16,10 +16,10 @@ requirements:
             set -eou pipefail
 
             outdir="$1"
-            files="${@:2}"
+            files=("${@:2}")
             mkdir "$outdir"
             chmod -R 777 "$outdir"
-            cp --recursive --preserve --no-clobber --target-directory "$outdir" $files
+            cp --recursive --preserve --no-clobber --target-directory "$outdir" "${files[@]}"
 
             exit 0
 
diff --git a/definitions/tools/gather_to_sub_directory_dirs.cwl b/definitions/tools/gather_to_sub_directory_dirs.cwl
index 88c6c0c8..efc14d63 100644
--- a/definitions/tools/gather_to_sub_directory_dirs.cwl
+++ b/definitions/tools/gather_to_sub_directory_dirs.cwl
@@ -16,10 +16,10 @@ requirements:
             set -eou pipefail
 
             outdir="$1"
-            files="${@:2}"
+            files=("${@:2}")
             mkdir "$outdir"
             chmod -R 777 "$outdir"
-            cp --recursive --preserve --no-clobber --target-directory "$outdir" $files
+            cp --recursive --preserve --no-clobber --target-directory "$outdir" "${files[@]}"
 
             exit 0