Skip to content

Commit 5439258

Browse files
authored
Merge pull request #165 from icgc-argo-workflows/payload-gen-seq-experiment@0.8.1
[release]
2 parents d23e5d2 + 9241e18 commit 5439258

7 files changed

Lines changed: 123 additions & 9 deletions

File tree

payload-gen-seq-experiment/main.nf

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
/* this block is auto-generated based on info from pkg.json where */
2727
/* changes can be made if needed, do NOT modify this block manually */
2828
nextflow.enable.dsl = 2
29-
version = '0.8.0'
29+
version = '0.8.1'
3030

3131
container = [
3232
'ghcr.io': 'ghcr.io/icgc-argo-workflows/data-processing-utility-tools.payload-gen-seq-experiment'
@@ -54,6 +54,7 @@ params.schema_url="NO_FILE5"
5454
params.metadata_payload_json="NO_FILE6"
5555
params.converted_files=["NO_FILE7"]
5656
params.cram_reference="NO_FILE8"
57+
params.recalculate_size_and_md5_files=["NO_FILE9"]
5758

5859
process payloadGenSeqExperiment {
5960
container "${params.container ?: container[params.container_registry ?: default_container_registry]}:${params.container_version ?: version}"
@@ -71,6 +72,7 @@ process payloadGenSeqExperiment {
7172
val schema_url
7273
path converted_files
7374
path cram_reference
75+
path recalculate_size_and_md5_files
7476

7577
output:
7678
path "*.sequencing_experiment.payload.json", emit: payload
@@ -83,6 +85,7 @@ process payloadGenSeqExperiment {
8385
args_metadata_payload_json= !metadata_payload_json.name.startsWith("NO_FILE") ? "-m ${metadata_payload_json}" : ""
8486
args_schema_url = !schema_url.startsWith("NO_FILE") ? "-s ${schema_url}" : ""
8587
args_converted_file_args = !cram_reference.startsWith("NO_FILE") ? "-br ${cram_reference} -b ${converted_files}" : ""
88+
args_recalculate_size_and_md5_files = recalculate_size_and_md5_files.empty() ? "" : "-z ${recalculate_size_and_md5_files}"
8689
"""
8790
main.py \
8891
${args_experiment_info_tsv} \
@@ -91,7 +94,8 @@ process payloadGenSeqExperiment {
9194
${args_extra_info_tsv} \
9295
${args_metadata_payload_json} \
9396
${args_schema_url} \
94-
${args_converted_file_args}
97+
${args_converted_file_args} \
98+
-z ${recalculate_size_and_md5_files}
9599
"""
96100
}
97101

@@ -106,6 +110,7 @@ workflow {
106110
file(params.metadata_payload_json),
107111
params.schema_url,
108112
Channel.fromPath(params.converted_files).collect(),
109-
file(params.cram_reference)
113+
file(params.cram_reference),
114+
Channel.fromPath(params.recalculate_size_and_md5_files).collect(),
110115
)
111116
}

payload-gen-seq-experiment/main.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ def replace_cram_with_bam(payload,bam_from_cram,bam_from_cram_reference):
234234
rg['file_r2']=bam
235235
return(payload)
236236

237-
def main(metadata,url,bam_from_cram,bam_from_cram_reference,extra_info=dict()):
237+
def main(metadata,url,bam_from_cram,bam_from_cram_reference,recalculate_size_and_md5_files,extra_info=dict()):
238238
empty_str_to_null(metadata)
239239

240240
payload = {
@@ -362,6 +362,13 @@ def main(metadata,url,bam_from_cram,bam_from_cram_reference,extra_info=dict()):
362362
if len(bam_from_cram)>0:
363363
payload=replace_cram_with_bam(payload,bam_from_cram,bam_from_cram_reference)
364364

365+
if len(recalculate_size_and_md5_files)>=1:
366+
for recalculate in recalculate_size_and_md5_files:
367+
for file in payload['files']:
368+
if file['fileName']==recalculate:
369+
file['fileMd5sum']=calculate_md5(recalculate)
370+
file['fileSize']=calculate_size(recalculate)
371+
365372
validatePayload(payload,url)
366373
with open("%s.sequencing_experiment.payload.json" % str(uuid.uuid4()), 'w') as f:
367374
f.write(json.dumps(payload, indent=2))
@@ -385,6 +392,8 @@ def main(metadata,url,bam_from_cram,bam_from_cram_reference,extra_info=dict()):
385392
help="BAM files that have converted from CRAM")
386393
parser.add_argument("-br", "--bam-from-cram-reference",default=None,
387394
help="Name of reference file used in cram2bam conversion")
395+
parser.add_argument("-z", "--recalculate-size-and-md5-files",default=[],nargs="+",
396+
help="Supplied files here will have their md5sum and size relcalculated")
388397
args = parser.parse_args()
389398

390399
validate_args(args)
@@ -440,4 +449,4 @@ def main(metadata,url,bam_from_cram,bam_from_cram_reference,extra_info=dict()):
440449
extra_info[row_type][row_id][row_field]=row_val
441450

442451

443-
main(metadata,url,args.bam_from_cram,args.bam_from_cram_reference,extra_info)
452+
main(metadata,url,args.bam_from_cram,args.bam_from_cram_reference,args.recalculate_size_and_md5_files,extra_info)

payload-gen-seq-experiment/pkg.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "payload-gen-seq-experiment",
3-
"version": "0.8.0",
3+
"version": "0.8.1",
44
"description": "SONG payload generation for sequencing experiment",
55
"main": "main.nf",
66
"deprecated": false,

payload-gen-seq-experiment/tests/checker.nf

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
/* this block is auto-generated based on info from pkg.json where */
3232
/* changes can be made if needed, do NOT modify this block manually */
3333
nextflow.enable.dsl = 2
34-
version = '0.8.0'
34+
version = '0.8.1'
3535

3636
container = [
3737
'ghcr.io': 'ghcr.io/icgc-argo-workflows/data-processing-utility-tools.payload-gen-seq-experiment'
@@ -53,6 +53,7 @@ params.schema_url = "NO_FILE5"
5353
params.metadata_payload_json = "NO_FILE6"
5454
params.converted_files=["NO_FILE7"]
5555
params.cram_reference="NO_FILE8"
56+
params.recalculate_size_and_md5_files=["NO_FILE9"]
5657

5758
params.expected_output = ""
5859

@@ -92,6 +93,7 @@ workflow checker {
9293
schema_url
9394
converted_files
9495
cram_reference
96+
recalculate_size_and_md5_files
9597

9698
main:
9799
payloadGenSeqExperiment(
@@ -102,7 +104,8 @@ workflow checker {
102104
metadata_payload_json,
103105
schema_url,
104106
converted_files,
105-
cram_reference
107+
cram_reference,
108+
recalculate_size_and_md5_files
106109
)
107110

108111
file_smart_diff(
@@ -122,6 +125,7 @@ workflow {
122125
file(params.metadata_payload_json),
123126
params.schema_url,
124127
Channel.fromPath(params.converted_files).collect(),
125-
file(params.cram_reference)
128+
file(params.cram_reference),
129+
Channel.fromPath(params.recalculate_size_and_md5_files).collect()
126130
)
127131
}
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
{
2+
"analysisType": {
3+
"name": "sequencing_experiment"
4+
},
5+
"studyId": "TEST-PRO",
6+
"experiment": {
7+
"submitter_sequencing_experiment_id": "TEST_EXP",
8+
"sequencing_center": "EXT",
9+
"platform": "ILLUMINA",
10+
"platform_model": "HiSeq 2000",
11+
"experimental_strategy": "WGS",
12+
"sequencing_date": "2014-12-12"
13+
},
14+
"read_group_count": 3,
15+
"read_groups": [
16+
{
17+
"submitter_read_group_id": "C0HVY.2",
18+
"read_group_id_in_bam": null,
19+
"platform_unit": "74_8a",
20+
"is_paired_end": true,
21+
"file_r1": "test_rg_3.bam",
22+
"file_r2": "test_rg_3.bam",
23+
"read_length_r1": 150,
24+
"read_length_r2": 150,
25+
"insert_size": 298,
26+
"sample_barcode": null,
27+
"library_name": "Pond-147580"
28+
},
29+
{
30+
"submitter_read_group_id": "D0RE2.1",
31+
"read_group_id_in_bam": null,
32+
"platform_unit": "74_8b",
33+
"is_paired_end": true,
34+
"file_r1": "test_rg_3.bam",
35+
"file_r2": "test_rg_3.bam",
36+
"read_length_r1": 150,
37+
"read_length_r2": 150,
38+
"insert_size": 298,
39+
"sample_barcode": null,
40+
"library_name": "Pond-147580"
41+
},
42+
{
43+
"submitter_read_group_id": "D0RH0.2",
44+
"read_group_id_in_bam": null,
45+
"platform_unit": "74_8c",
46+
"is_paired_end": true,
47+
"file_r1": "test_rg_3.bam",
48+
"file_r2": "test_rg_3.bam",
49+
"read_length_r1": 150,
50+
"read_length_r2": 150,
51+
"insert_size": 298,
52+
"sample_barcode": null,
53+
"library_name": "Pond-147580"
54+
}
55+
],
56+
"samples": [
57+
{
58+
"submitterSampleId": "HCC1143_BAM_INPUT",
59+
"matchedNormalSubmitterSampleId": null,
60+
"sampleType": "Total DNA",
61+
"specimen": {
62+
"submitterSpecimenId": "HCC1143_BAM_INPUT",
63+
"tumourNormalDesignation": "Normal",
64+
"specimenTissueSource": "Blood derived",
65+
"specimenType": "Cell line - derived from normal"
66+
},
67+
"donor": {
68+
"submitterDonorId": "HCC1143",
69+
"gender": "Female"
70+
}
71+
}
72+
],
73+
"files": [
74+
{
75+
"fileName": "example1.bam",
76+
"fileSize": 10,
77+
"fileMd5sum": "e2bb33a7b2c6a45933a994e3e2747458",
78+
"fileType": "BAM",
79+
"fileAccess": "controlled",
80+
"dataType": "Submitted Reads",
81+
"info": {
82+
"data_category": "Sequencing Reads"
83+
}
84+
}
85+
]
86+
}
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
type name format size md5sum path
2+
file example1.bam BAM 1 AAAA input/example1.bam
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{
2+
"experiment_info_tsv": "input/experiment.v2.tsv",
3+
"read_group_info_tsv": "input/read_group.v2.tsv",
4+
"file_info_tsv": "input/file.replace.tsv",
5+
"expected_output": "input/1c1e4354-b224-4d69-afcb-5be749a183d5.sequencing_experiment.payload.json",
6+
"recalculate_size_and_md5_files": ["input/example1.bam"],
7+
"publish_dir": "outdir"
8+
}

0 commit comments

Comments
 (0)