Skip to content

Commit a6c4ce5

Browse files
authored
Merge pull request #168 from icgc-argo-workflows/payload-gen-variant-calling@0.7.0
[release]
2 parents 1c8ae39 + 42aa379 commit a6c4ce5

7 files changed

+481
-6
lines changed

payload-gen-variant-calling/main.nf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
/* this block is auto-generated based on info from pkg.json where */
2626
/* changes can be made if needed, do NOT modify this block manually */
2727
nextflow.enable.dsl = 2
28-
version = '0.6.0.1'
28+
version = '0.7.0'
2929

3030
container = [
3131
'ghcr.io': 'ghcr.io/icgc-argo-workflows/data-processing-utility-tools.payload-gen-variant-calling'

payload-gen-variant-calling/main.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,11 +170,13 @@ def get_files_info(file_to_upload, wf_short_name, wf_version, somatic_or_germli
170170
else:
171171
sys.exit('Error: unknown file type "%s"' % file_to_upload)
172172

173+
## Disable the population of qc_metrics into payload to avoid the exposure of sensitive info
173174
tar = tarfile.open(file_to_upload)
174175
for member in tar.getmembers():
175176
if member.name.endswith('.extra_info.json'):
176177
f = tar.extractfile(member)
177178
extra_info = json.load(f)
179+
extra_info.pop('metrics', None)
178180
break
179181
else:
180182
sys.exit('Error: unknown file type "%s"' % file_to_upload)

payload-gen-variant-calling/pkg.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "payload-gen-variant-calling",
3-
"version": "0.6.0.1",
3+
"version": "0.7.0",
44
"description": "A tool to generate SONG payloads for variant calling workflows",
55
"main": "main.nf",
66
"deprecated": false,

payload-gen-variant-calling/tests/checker.nf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
/* this block is auto-generated based on info from pkg.json where */
3535
/* changes can be made if needed, do NOT modify this block manually */
3636
nextflow.enable.dsl = 2
37-
version = '0.6.0.1'
37+
version = '0.7.0'
3838

3939
container = [
4040
'ghcr.io': 'ghcr.io/icgc-argo-workflows/data-processing-utility-tools.payload-gen-variant-calling'

payload-gen-variant-calling/tests/expected/66f84b1e-dad1-4981-916e-07e62ff53410.variant_calling_supplement.payload.json

Lines changed: 0 additions & 1 deletion
This file was deleted.
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
{
2+
"analysisType": {
3+
"name": "variant_calling_supplement"
4+
},
5+
"info": {
6+
"origin": "ICGC-25K"
7+
},
8+
"studyId": "TEST-PR",
9+
"experiment": {
10+
"experimental_strategy": "WGS",
11+
"platform": "ILLUMINA"
12+
},
13+
"samples": [
14+
{
15+
"submitterSampleId": "HCC1143_FASTQ_INPUT",
16+
"matchedNormalSubmitterSampleId": "HCC1143_BAM_INPUT",
17+
"sampleType": "Total DNA",
18+
"specimen": {
19+
"submitterSpecimenId": "HCC1143_FASTQ_INPUT",
20+
"tumourNormalDesignation": "Tumour",
21+
"specimenTissueSource": "Solid tissue",
22+
"specimenType": "Primary tumour"
23+
},
24+
"donor": {
25+
"gender": "Female",
26+
"submitterDonorId": "HCC1143"
27+
}
28+
}
29+
],
30+
"files": [
31+
{
32+
"fileType": "TGZ",
33+
"fileSize": 91669,
34+
"fileMd5sum": "36de1a4788a5ca24eddaaf0bac7c42c2",
35+
"fileAccess": "controlled",
36+
"info": {
37+
"data_category": "Quality Control Metrics",
38+
"data_subtypes": [
39+
"Runtime Stats"
40+
],
41+
"analysis_tools": null,
42+
"description": "Files contain timing information for different processing steps",
43+
"files": [
44+
"WGS_SA610228.time.verify_WT",
45+
"WGS_SA610229.time.verify_MT",
46+
"WGS_SA610229_vs_SA610228.time.BRASS",
47+
"WGS_SA610229_vs_SA610228.time.BRASS_cover",
48+
"WGS_SA610229_vs_SA610228.time.BRASS_input",
49+
"WGS_SA610229_vs_SA610228.time.CaVEMan",
50+
"WGS_SA610229_vs_SA610228.time.CaVEMan_annot",
51+
"WGS_SA610229_vs_SA610228.time.CaVEMan_flag",
52+
"WGS_SA610229_vs_SA610228.time.CaVEMan_setup",
53+
"WGS_SA610229_vs_SA610228.time.CaVEMan_split",
54+
"WGS_SA610229_vs_SA610228.time.ascat",
55+
"WGS_SA610229_vs_SA610228.time.cache_POP",
56+
"WGS_SA610229_vs_SA610228.time.cgpPindel",
57+
"WGS_SA610229_vs_SA610228.time.cgpPindel_annot",
58+
"WGS_SA610229_vs_SA610228.time.geno"
59+
]
60+
},
61+
"fileName": "TEST-PR.DO250122.SA610148.wgs.20230601.sanger-wxs.somatic.timings-supplement.tgz",
62+
"dataType": "Analysis QC"
63+
},
64+
{
65+
"fileType": "TGZ",
66+
"fileSize": 1267621,
67+
"fileMd5sum": "de3f395473ebddcf821a02af3d6ab846",
68+
"fileAccess": "controlled",
69+
"info": {
70+
"data_category": "Simple Nucleotide Variation",
71+
"data_subtypes": null,
72+
"analysis_tools": [
73+
"CaVEMan"
74+
],
75+
"description": "Files provided by CaVEMan tool",
76+
"files": [
77+
"SA610229_vs_SA610228.annot.muts.vcf.gz",
78+
"SA610229_vs_SA610228.annot.muts.vcf.gz.tbi",
79+
"SA610229_vs_SA610228.flagged.muts.vcf.gz",
80+
"SA610229_vs_SA610228.flagged.muts.vcf.gz.tbi",
81+
"SA610229_vs_SA610228.muts.ids.vcf.gz",
82+
"SA610229_vs_SA610228.muts.ids.vcf.gz.tbi",
83+
"SA610229_vs_SA610228.no_analysis.bed",
84+
"SA610229_vs_SA610228.snps.ids.vcf.gz",
85+
"SA610229_vs_SA610228.snps.ids.vcf.gz.tbi",
86+
"alg_bean",
87+
"caveman.cfg.ini",
88+
"cov_arr",
89+
"prob_arr",
90+
"splitList"
91+
]
92+
},
93+
"fileName": "TEST-PR.DO250122.SA610148.wgs.20230601.sanger-wxs.somatic.caveman-supplement.tgz",
94+
"dataType": "SNV Supplement"
95+
},
96+
{
97+
"fileType": "TGZ",
98+
"fileSize": 1533090,
99+
"fileMd5sum": "ea61f46ceb1178ad12bd93f58fd7f401",
100+
"fileAccess": "controlled",
101+
"info": {
102+
"data_category": "Simple Nucleotide Variation",
103+
"data_subtypes": null,
104+
"analysis_tools": [
105+
"Pindel"
106+
],
107+
"description": "Files provided by Pindel tool",
108+
"files": [
109+
"SA610229_vs_SA610228.annot.vcf.gz",
110+
"SA610229_vs_SA610228.annot.vcf.gz.tbi",
111+
"SA610229_vs_SA610228.flagged.vcf.gz",
112+
"SA610229_vs_SA610228.flagged.vcf.gz.tbi",
113+
"SA610229_vs_SA610228.germline.bed",
114+
"SA610229_vs_SA610228_mt.cram",
115+
"SA610229_vs_SA610228_mt.cram.crai",
116+
"SA610229_vs_SA610228_mt.cram.md5",
117+
"SA610229_vs_SA610228_wt.cram",
118+
"SA610229_vs_SA610228_wt.cram.crai",
119+
"SA610229_vs_SA610228_wt.cram.md5"
120+
]
121+
},
122+
"fileName": "TEST-PR.DO250122.SA610148.wgs.20230601.sanger-wxs.somatic.pindel-supplement.tgz",
123+
"dataType": "InDel Supplement"
124+
}
125+
],
126+
"workflow": {
127+
"workflow_name": "Sanger WXS Variant Calling",
128+
"workflow_short_name": "sanger-wxs",
129+
"workflow_version": "0.1.0",
130+
"run_id": "magical_davinci",
131+
"session_id": "ba1dc8df-8c69-4de7-a885-4e1a71ffc420",
132+
"inputs": [
133+
{
134+
"tumour_analysis_id": "f64ae545-11c1-46f7-8ae5-4511c156f7be",
135+
"analysis_type": "sequencing_alignment"
136+
},
137+
{
138+
"normal_analysis_id": "7ad90309-21df-4345-9903-0921df73456b",
139+
"analysis_type": "sequencing_alignment"
140+
}
141+
],
142+
"genome_build": "GRCh38_hla_decoy_ebv"
143+
},
144+
"variant_class": "Somatic"
145+
}

payload-gen-variant-calling/tests/expected/735a4c34-c928-4f03-957b-fe808df68f63.qc_metrics.payload.json

Lines changed: 0 additions & 1 deletion
This file was deleted.
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
{
2+
"analysisType": {
3+
"name": "qc_metrics"
4+
},
5+
"info": {
6+
"origin": "ICGC-25K"
7+
},
8+
"studyId": "TEST-PR",
9+
"experiment": {
10+
"experimental_strategy": "WGS",
11+
"platform": "ILLUMINA"
12+
},
13+
"samples": [
14+
{
15+
"submitterSampleId": "HCC1143_FASTQ_INPUT",
16+
"matchedNormalSubmitterSampleId": "HCC1143_BAM_INPUT",
17+
"sampleType": "Total DNA",
18+
"specimen": {
19+
"submitterSpecimenId": "HCC1143_FASTQ_INPUT",
20+
"tumourNormalDesignation": "Tumour",
21+
"specimenTissueSource": "Solid tissue",
22+
"specimenType": "Primary tumour"
23+
},
24+
"donor": {
25+
"gender": "Female",
26+
"submitterDonorId": "HCC1143"
27+
}
28+
}
29+
],
30+
"files": [
31+
{
32+
"fileType": "TGZ",
33+
"fileSize": 1067,
34+
"fileMd5sum": "529a150d8ffcc5a9393f829765b26158",
35+
"fileAccess": "controlled",
36+
"info": {
37+
"data_category": "Quality Control Metrics",
38+
"data_subtypes": [
39+
"Cross Sample Contamination"
40+
],
41+
"analysis_tools": [
42+
"GATK:CalculateContamination"
43+
],
44+
"description": "Cross sample contamination estimated by GATK CalculateContamination tool",
45+
"files_in_tgz": [
46+
"74fda0ca6f60168dbb34c00f0d93de07.normal.segmentation_metrics",
47+
"74fda0ca6f60168dbb34c00f0d93de07.normal.contamination_metrics",
48+
"normal_contamination.extra_info.json"
49+
]
50+
},
51+
"fileName": "TEST-PR.DO250122.SA610149.wgs.20230601.gatk-mutect2.somatic.contamination_metrics.tgz",
52+
"dataType": "Sample QC"
53+
},
54+
{
55+
"fileType": "TGZ",
56+
"fileSize": 1019,
57+
"fileMd5sum": "651d7d0e409b1d49261dd99160e703ac",
58+
"fileAccess": "controlled",
59+
"info": {
60+
"data_category": "Quality Control Metrics",
61+
"data_subtypes": [
62+
"Cross Sample Contamination"
63+
],
64+
"analysis_tools": [
65+
"GATK:CalculateContamination"
66+
],
67+
"description": "Cross sample contamination estimated by GATK CalculateContamination tool",
68+
"files_in_tgz": [
69+
"bcdc11ebe93861a0d447158a84d845ba.tumour.contamination_metrics",
70+
"bcdc11ebe93861a0d447158a84d845ba.tumour.segmentation_metrics",
71+
"tumour_contamination.extra_info.json"
72+
]
73+
},
74+
"fileName": "TEST-PR.DO250122.SA610148.wgs.20230601.gatk-mutect2.somatic.contamination_metrics.tgz",
75+
"dataType": "Sample QC"
76+
},
77+
{
78+
"fileType": "TGZ",
79+
"fileSize": 1003,
80+
"fileMd5sum": "3d818dcf8e01f637e593f33482f745bd",
81+
"fileAccess": "controlled",
82+
"info": {
83+
"data_category": "Quality Control Metrics",
84+
"data_subtypes": [
85+
"Variant Filtering Stats"
86+
],
87+
"analysis_tools": [
88+
"GATK:FilterMutectCalls"
89+
],
90+
"description": "Information on the probability threshold chosen to optimize the F score and the number of false positives and false negatives from each filter to be expected from this choice.",
91+
"files_in_tgz": [
92+
"filter-mutect-calls.filtering-stats",
93+
"filtering_stats.extra_info.json"
94+
]
95+
},
96+
"fileName": "TEST-PR.DO250122.SA610148.wgs.20230601.gatk-mutect2.somatic.mutect_filtering_metrics.tgz",
97+
"dataType": "Analysis QC"
98+
},
99+
{
100+
"fileType": "TGZ",
101+
"fileSize": 377,
102+
"fileMd5sum": "4018f6b7c711f4f4417085816f28f4d9",
103+
"fileAccess": "controlled",
104+
"info": {
105+
"data_category": "Quality Control Metrics",
106+
"data_subtypes": [
107+
"Variant Callable Stats"
108+
],
109+
"analysis_tools": [
110+
"GATK:Mutect2"
111+
],
112+
"description": "Number of sites that are considered callable for Mutect stats with read depth equals or is higher than callable-depth which we set to default 10",
113+
"files_in_tgz": [
114+
"merged-mutect-stats.stats",
115+
"callable_stats.extra_info.json"
116+
]
117+
},
118+
"fileName": "TEST-PR.DO250122.SA610148.wgs.20230601.gatk-mutect2.somatic.mutect_callable_metrics.tgz",
119+
"dataType": "Analysis QC"
120+
}
121+
],
122+
"workflow": {
123+
"workflow_name": "GATK Mutect2 Variant Calling",
124+
"workflow_short_name": "gatk-mutect2",
125+
"workflow_version": "0.1.0",
126+
"run_id": "lethal_hamilton",
127+
"session_id": "6a1496cc-b1fc-487d-a3e3-02e4939ecbcf",
128+
"inputs": [
129+
{
130+
"tumour_analysis_id": "f64ae545-11c1-46f7-8ae5-4511c156f7be",
131+
"analysis_type": "sequencing_alignment"
132+
},
133+
{
134+
"normal_analysis_id": "7ad90309-21df-4345-9903-0921df73456b",
135+
"analysis_type": "sequencing_alignment"
136+
}
137+
],
138+
"genome_build": "GRCh38_hla_decoy_ebv"
139+
}
140+
}

payload-gen-variant-calling/tests/expected/d354c4b2-db32-41f0-973f-db1306a99a84.qc_metrics.payload.json

Lines changed: 0 additions & 1 deletion
This file was deleted.

0 commit comments

Comments
 (0)