Skip to content

Commit 6b5d12f

Browse files
authored
Merge pull request #169 from icgc-argo-workflows/payload-gen-seq-experiment@0.8.3
[release]
2 parents a6c4ce5 + 483c606 commit 6b5d12f

9 files changed

Lines changed: 135 additions & 13 deletions

File tree

payload-gen-seq-experiment/main.nf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
/* this block is auto-generated based on info from pkg.json where */
2727
/* changes can be made if needed, do NOT modify this block manually */
2828
nextflow.enable.dsl = 2
29-
version = '0.8.2'
29+
version = '0.8.3'
3030

3131
container = [
3232
'ghcr.io': 'ghcr.io/icgc-argo-workflows/data-processing-utility-tools.payload-gen-seq-experiment'

payload-gen-seq-experiment/main.py

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,12 @@
4545
'submitter_sample_id','sample_type', 'submitter_matched_normal_sample_id', 'sequencing_center',
4646
'platform', 'platform_model','experimental_strategy', 'sequencing_date', 'read_group_count']
4747
TSV_FIELDS['experiment']["conditional"]=[
48-
"library_isolation_protocol","library_preparation_kit",
49-
"library_strandedness","rin","dv200","spike_ins_included",
50-
"spike_ins_fasta","spike_ins_concentration",
51-
"target_capture_kit"]
48+
"library_preparation_kit",
49+
"library_strandedness",
50+
"rin","dv200",
51+
"target_capture_kit","number_of_genes","gene_padding","coverage",
52+
"primary_target_regions","capture_target_regions"
53+
]
5254

5355
TSV_FIELDS['read_group']= {}
5456
TSV_FIELDS['read_group']["core"]=[
@@ -77,10 +79,10 @@
7779

7880
def empty_str_to_null(metadata):
7981
for k in metadata:
80-
if k in ['read_groups', 'files']:
82+
if k in ['read_groups', 'files','experiment']:
8183
for i in range(len(metadata[k])):
8284
empty_str_to_null(metadata[k][i])
83-
if isinstance(metadata[k], str) and metadata[k] in ["", "_NULL_"]:
85+
if isinstance(metadata[k], str) and metadata[k] in ["", "_NULL_","null","NULL","Null","None","NONE","none"]:
8486
metadata[k] = None
8587

8688

@@ -146,10 +148,20 @@ def load_all_tsvs(exp_tsv, rg_tsv, file_tsv):
146148
rg['is_paired_end'] = None
147149

148150
for field in ('read_length_r1', 'read_length_r2', 'insert_size'):
149-
if rg[field]:
151+
if isinstance(rg[field],str):
152+
if re.match("^[0-9]+$", rg[field]):
153+
rg[field] = int(rg[field])
154+
continue
155+
for empty_string in ["", "_NULL_",'null',"NULL","Null","None","NONE","none"]:
156+
if rg[field]==empty_string:
157+
rg[field] = None
158+
break
159+
elif isinstance(rg[field],int):
150160
rg[field] = int(rg[field])
151-
else:
161+
elif rg[field] is None:
152162
rg[field] = None
163+
else:
164+
sys.exit("Unrecognnized value '%s' in field %s for '%s'" % (str(rg[field]),field,rg['submitter_read_group_id']))
153165

154166
metadata_dict['read_groups'].append(rg)
155167

@@ -262,7 +274,7 @@ def main(metadata,url,bam_from_cram,bam_from_cram_reference,recalculate_size_and
262274
optional_experimental_fields.remove("rin")
263275

264276
for optional_experimental_field in optional_experimental_fields:
265-
if metadata.get(optional_experimental_field):
277+
if optional_experimental_field in metadata.keys():
266278
payload['experiment'][optional_experimental_field]=metadata.get(optional_experimental_field)
267279
# Int
268280
optional_experimental_fields=["rin"]
@@ -274,6 +286,12 @@ def main(metadata,url,bam_from_cram,bam_from_cram_reference,recalculate_size_and
274286
if metadata.get('experimental_strategy')=='RNA-Seq' and not metadata.get("library_strandedness"):
275287
sys.exit(f"'experimental_strategy' 'RNA-Seq' specified but 'library_strandedness' is missing. Resubmit with both values 'experimental_strategy' and 'library_strandedness'")
276288

289+
# Targetted Sequencing :
290+
if metadata.get('experimental_strategy')=="Targeted-Seq" or metadata.get('experimental_strategy')=="WXS":
291+
for field in ['target_capture_kit','primary_target_regions','capture_target_regions']:
292+
if field not in metadata.keys():
293+
sys.exit(f"'experimental_strategy' '%s' specified but '%s' is missing. Resubmit with both values 'experimental_strategy' and '%s'" % (metadata.get('experimental_strategy'),field,field))
294+
277295
# get sample of the payload
278296
sample = {
279297
'submitterSampleId': metadata.get('submitter_sample_id'),
@@ -449,4 +467,4 @@ def main(metadata,url,bam_from_cram,bam_from_cram_reference,recalculate_size_and
449467
extra_info[row_type][row_id][row_field]=row_val
450468

451469

452-
main(metadata,url,args.bam_from_cram,args.bam_from_cram_reference,args.recalculate_size_and_md5_files,extra_info)
470+
main(metadata,url,args.bam_from_cram,args.bam_from_cram_reference,args.recalculate_size_and_md5_files,extra_info)

payload-gen-seq-experiment/pkg.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "payload-gen-seq-experiment",
3-
"version": "0.8.2",
3+
"version": "0.8.3",
44
"description": "SONG payload generation for sequencing experiment",
55
"main": "main.nf",
66
"deprecated": false,

payload-gen-seq-experiment/tests/checker.nf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
/* this block is auto-generated based on info from pkg.json where */
3232
/* changes can be made if needed, do NOT modify this block manually */
3333
nextflow.enable.dsl = 2
34-
version = '0.8.2'
34+
version = '0.8.3'
3535

3636
container = [
3737
'ghcr.io': 'ghcr.io/icgc-argo-workflows/data-processing-utility-tools.payload-gen-seq-experiment'
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
{
2+
"analysisType": {
3+
"name": "sequencing_experiment"
4+
},
5+
"studyId": "TEST-PRO",
6+
"experiment": {
7+
"submitter_sequencing_experiment_id": "TEST_EXP",
8+
"sequencing_center": "EXT",
9+
"platform": "ILLUMINA",
10+
"platform_model": "HiSeq 2000",
11+
"experimental_strategy": "WXS",
12+
"sequencing_date": "2014-12-12",
13+
"target_capture_kit": "DUMMY_VAL",
14+
"primary_target_regions": "DUMMY_VAL",
15+
"capture_target_regions": "DUMMY_VAL"
16+
},
17+
"read_group_count": 3,
18+
"read_groups": [
19+
{
20+
"submitter_read_group_id": "C0HVY.2",
21+
"read_group_id_in_bam": null,
22+
"platform_unit": "74_8a",
23+
"is_paired_end": true,
24+
"file_r1": "test_rg_3.bam",
25+
"file_r2": "test_rg_3.bam",
26+
"read_length_r1": null,
27+
"read_length_r2": null,
28+
"insert_size": 298,
29+
"sample_barcode": null,
30+
"library_name": "Pond-147580"
31+
},
32+
{
33+
"submitter_read_group_id": "D0RE2.1",
34+
"read_group_id_in_bam": null,
35+
"platform_unit": "74_8b",
36+
"is_paired_end": true,
37+
"file_r1": "test_rg_3.bam",
38+
"file_r2": "test_rg_3.bam",
39+
"read_length_r1": null,
40+
"read_length_r2": null,
41+
"insert_size": 298,
42+
"sample_barcode": null,
43+
"library_name": "Pond-147580"
44+
},
45+
{
46+
"submitter_read_group_id": "D0RH0.2",
47+
"read_group_id_in_bam": null,
48+
"platform_unit": "74_8c",
49+
"is_paired_end": true,
50+
"file_r1": "test_rg_3.bam",
51+
"file_r2": "test_rg_3.bam",
52+
"read_length_r1": null,
53+
"read_length_r2": null,
54+
"insert_size": 298,
55+
"sample_barcode": null,
56+
"library_name": "Pond-147580"
57+
}
58+
],
59+
"samples": [
60+
{
61+
"submitterSampleId": "HCC1143_BAM_INPUT",
62+
"matchedNormalSubmitterSampleId": null,
63+
"sampleType": "Total DNA",
64+
"specimen": {
65+
"submitterSpecimenId": "HCC1143_BAM_INPUT",
66+
"tumourNormalDesignation": "Normal",
67+
"specimenTissueSource": "Blood derived",
68+
"specimenType": "Cell line - derived from normal"
69+
},
70+
"donor": {
71+
"submitterDonorId": "HCC1143",
72+
"gender": "Female"
73+
}
74+
}
75+
],
76+
"files": [
77+
{
78+
"fileName": "test_rg_3.bam",
79+
"fileSize": 14911,
80+
"fileMd5sum": "178f97f7b1ca8bfc28fd5586bdd56799",
81+
"fileType": "BAM",
82+
"fileAccess": "controlled",
83+
"dataType": "Submitted Reads",
84+
"info": {
85+
"data_category": "Sequencing Reads"
86+
}
87+
}
88+
]
89+
}
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
type program_id submitter_sequencing_experiment_id submitter_donor_id gender submitter_specimen_id tumour_normal_designation specimen_type specimen_tissue_source submitter_sample_id sample_type submitter_matched_normal_sample_id sequencing_center platform platform_model experimental_strategy sequencing_date read_group_count target_capture_kit primary_target_regions capture_target_regions
2+
sequencing_experiment TEST-PRO TEST_EXP HCC1143 Female HCC1143_BAM_INPUT Normal Cell line - derived from normal Blood derived HCC1143_BAM_INPUT Total DNA EXT ILLUMINA HiSeq 2000 WXS 2014-12-12 3 DUMMY_VAL DUMMY_VAL DUMMY_VAL
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
type name format size md5sum path
2+
file test_rg_3.bam BAM 14911 178f97f7b1ca8bfc28fd5586bdd56799 test_rg_3.bam
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
type submitter_read_group_id read_group_id_in_bam submitter_sequencing_experiment_id platform_unit is_paired_end file_r1 file_r2 read_length_r1 read_length_r2 insert_size sample_barcode library_name
2+
read_group C0HVY.2 TEST_EXP 74_8a true test_rg_3.bam test_rg_3.bam null 298 Pond-147580
3+
read_group D0RE2.1 TEST_EXP 74_8b true test_rg_3.bam test_rg_3.bam NULL Null 298 Pond-147580
4+
read_group D0RH0.2 TEST_EXP 74_8c true test_rg_3.bam test_rg_3.bam NONE None 298 Pond-147580
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
{
2+
"experiment_info_tsv": "input/experiment.WXS.tsv",
3+
"read_group_info_tsv": "input/read_group.WXS.tsv",
4+
"file_info_tsv": "input/file.WXS.tsv",
5+
"expected_output": "expected/b9167a75-83ea-4c43-be30-e87faf3557dd.sequencing_experiment.payload.json",
6+
"publish_dir": "outdir"
7+
}

0 commit comments

Comments
 (0)