@@ -20,6 +20,7 @@ def tabulate_sample_stats(sample_data, group_lookup_dict, project_lookup_dict, s
2020 general_stats .update ({"mean_base_quality_sample" : sample_data [s_name ]["QualityScoreMean" ]})
2121 general_stats .update ({"percent_q30_sample" : sample_data [s_name ]["PercentQ30" ]})
2222 general_stats .update ({"percent_q40_sample" : sample_data [s_name ]["PercentQ40" ]})
23+ general_stats .update ({"percent_mismatch" : sample_data [s_name ]["PercentMismatch" ]})
2324 plot_content .update ({s_name : general_stats })
2425
2526 headers = {}
@@ -37,11 +38,10 @@ def tabulate_sample_stats(sample_data, group_lookup_dict, project_lookup_dict, s
3738 "scale" : False ,
3839 }
3940 headers ["num_polonies_sample" ] = {
40- "title" : f "# Polonies ( { config . base_count_prefix } ) " ,
41- "description" : f "The total number of polonies that are calculated for the run. ( { config . base_count_desc } ) " ,
41+ "title" : "# Polonies" ,
42+ "description" : "The total number of polonies that are calculated for the run." ,
4243 "min" : 0 ,
4344 "scale" : "Blues" ,
44- "shared_key" : "base_count" ,
4545 }
4646 headers ["yield_sample" ] = {
4747 "title" : "Yield (Gb)" ,
@@ -70,6 +70,14 @@ def tabulate_sample_stats(sample_data, group_lookup_dict, project_lookup_dict, s
7070 "scale" : "RdYlGn" ,
7171 "suffix" : "%" ,
7272 }
73+ headers ["percent_mismatch" ] = {
74+ "title" : "Percent Mismatch" ,
75+ "description" : "The percentage of mismatching reads for the sample." ,
76+ "max" : 100 ,
77+ "min" : 0 ,
78+ "scale" : "RdYlGn" ,
79+ "suffix" : "%" ,
80+ }
7381
7482 pconfig = {"id" : "sample_qc_metric_table" , "title" : "Sample QC Metrics Table" , "no_violin" : True }
7583
@@ -96,18 +104,20 @@ def sequence_content_plot(sample_data, group_lookup_dict, project_lookup_dict, c
96104 """Create the epic HTML for the FastQC sequence content heatmap"""
97105
98106 # Prep the data
99- data = dict ()
107+ all_data = dict ()
108+ plot_content = [all_data ]
100109
101110 r1r2_split = 0
102111 for s_name in sorted (sample_data .keys ()):
103- paired_end = True if len (sample_data [s_name ]["Reads" ]) > 1 else False
104112 for base in "ACTG" :
105113 base_s_name = "__" .join ([s_name , base ])
106- data [base_s_name ] = {}
114+ all_data [base_s_name ] = {}
107115 R1 = sample_data [s_name ]["Reads" ][0 ]["Cycles" ]
108116 r1r2_split = max (r1r2_split , len (R1 ))
109117
110118 for s_name in sorted (sample_data .keys ()):
119+ paired_end = True if len (sample_data [s_name ]["Reads" ]) > 1 else False
120+
111121 R1 = sample_data [s_name ]["Reads" ][0 ]["Cycles" ]
112122 for cycle in range (len (R1 )):
113123 base_no = cycle + 1
@@ -116,7 +126,7 @@ def sequence_content_plot(sample_data, group_lookup_dict, project_lookup_dict, c
116126
117127 for base in "ACTG" :
118128 base_s_name = "__" .join ([s_name , base ])
119- data [base_s_name ].update (
129+ all_data [base_s_name ].update (
120130 {base_no : float (R1 [cycle ]["BaseComposition" ][base ] / float (tot )) * 100.0 if tot > 0 else None }
121131 )
122132
@@ -128,15 +138,32 @@ def sequence_content_plot(sample_data, group_lookup_dict, project_lookup_dict, c
128138
129139 for base in "ACTG" :
130140 base_s_name = "__" .join ([s_name , base ])
131- data [base_s_name ].update (
141+ all_data [base_s_name ].update (
132142 {base_no : float (R2 [cycle ]["BaseComposition" ][base ] / float (tot )) * 100.0 if tot > 0 else None }
133143 )
134144
135- plot_content = data
145+ default_label = {
146+ "name" : "All" ,
147+ "xlab" : "Cycle" ,
148+ "ylab" : "Percentage of total reads" ,
149+ }
150+ data_labels = [
151+ default_label ,
152+ ]
153+ for s_name in sorted (sample_data .keys ()):
154+ sample_plot_data = dict ()
155+ for base in "ACTG" :
156+ base_s_name = "__" .join ([s_name , base ])
157+ sample_plot_data [base_s_name ] = all_data [base_s_name ]
158+ plot_content .append (sample_plot_data )
159+ data_labels .append ({
160+ "name" : s_name ,
161+ "xlab" : default_label ["xlab" ],
162+ "ylab" : default_label ["ylab" ],
163+ })
136164
137165 pconfig = {
138- "xlab" : "cycle" ,
139- "ylab" : "Percentage" ,
166+ "data_labels" : data_labels ,
140167 "x_lines" : [{"color" : "#FF0000" , "width" : 2 , "value" : r1r2_split , "dashStyle" : "dash" }],
141168 "colors" : color_dict ,
142169 "ymin" : 0 ,
@@ -147,8 +174,8 @@ def sequence_content_plot(sample_data, group_lookup_dict, project_lookup_dict, c
147174 plot_name = "Per Cycle Base Content"
148175 anchor = "base_content"
149176 description = """
150- Percentage of unidentified bases ("N" bases) by each sequencing cycle.
151- Read 1 and Read 2 are separated by a red dashed line
177+ Base composition per sample per cycle.
178+ Read 1 and Read 2 are separated by a red dashed line.
152179 """
153180 helptext = """
154181 If a sequencer is unable to make a base call with sufficient confidence then it will
@@ -236,6 +263,17 @@ def plot_per_read_gc_hist(sample_data, group_lookup_dict, project_lookup_dict, s
236263 Plot GC Histogram per Sample
237264 """
238265 gc_hist_dict = dict ()
266+ plot_content = [
267+ gc_hist_dict ,
268+ ]
269+ default_label = {
270+ "name" : "All" ,
271+ "xlab" : "Percentage of total reads" ,
272+ "ylab" : "Percentage of reads that are GC" ,
273+ }
274+ data_labels = [
275+ default_label ,
276+ ]
239277 for s_name in sample_data .keys ():
240278 R1_gc_counts = sample_data [s_name ]["Reads" ][0 ]["PerReadGCCountHistogram" ]
241279 R2_gc_counts = [0 ] * len (R1_gc_counts )
@@ -249,11 +287,18 @@ def plot_per_read_gc_hist(sample_data, group_lookup_dict, project_lookup_dict, s
249287 gc_hist_dict [s_name ].update ({gc / RLen * 100 : R1R2_gc_counts [gc ] / totalReads * 100 })
250288
251289 # perReadQualityHistogram
252- plot_content = gc_hist_dict
290+
291+ for s_name in gc_hist_dict .keys ():
292+ plot_content .append ({s_name : gc_hist_dict [s_name ]})
293+ data_labels .append ({
294+ "name" : s_name ,
295+ "xlab" : default_label ["xlab" ],
296+ "ylab" : default_label ["ylab" ],
297+ })
298+
253299
254300 pconfig = {
255- "xlab" : "% GC" ,
256- "ylab" : "Percentage" ,
301+ "data_labels" : data_labels ,
257302 "colors" : sample_color ,
258303 "id" : "gc_hist" ,
259304 "title" : "bases2fastq: Per Sample GC Content Histogram" ,
@@ -323,7 +368,10 @@ def plot_adapter_content(sample_data, group_lookup_dict, project_lookup_dict, sa
323368 pconfig .update ({"colors" : sample_color })
324369 plot_html = linegraph .plot (plot_content , pconfig = pconfig )
325370 anchor = "adapter_content"
326- description = "Adapter content per cycle"
371+ description = """
372+ Adapter content per cycle.
373+ Read 1 and Read 2 are separated by a red dashed line.
374+ """
327375 helptext = """
328376 The plot shows a cumulative percentage count of the proportion
329377 of your library which has seen each of the adapter sequences at each cycle.
0 commit comments