Skip to content

Commit f4c7a0d

Browse files
author
Mo Samman
committed
update bases2fastq MultiQC module
- Sequencing Run QC Metric Table - Display "# Polonies" as full number - Sequencing Run Yield - Change x-axis label - Run Base Quality Histogram - Add x-axis label - Quality Metrics by cycle - Update description - (Project) Sequencing QC metrics table - Display "# Polonies" as full number - Sample QC Metrics Table - Display "# Polonies" as full number - Add "Percentage Mismatch" - Per Cycle Base Content - Add per sample buttons - Per Cycle Adapter Content - Update description - Per Sample GC Histogram - Update x-axis label - Update y-axis label - Add per sample buttons
1 parent 42e4b61 commit f4c7a0d

3 files changed

Lines changed: 73 additions & 27 deletions

File tree

multiqc/modules/bases2fastq/plot_project_runs.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,10 @@ def tabulate_project_run_stats(run_data, color_dict):
2222

2323
headers = {}
2424
headers["num_polonies_run"] = {
25-
"title": f"# Polonies ({config.base_count_prefix})",
26-
"description": f"The total number of polonies that are calculated for the run ({config.base_count_desc})",
25+
"title": "# Polonies",
26+
"description": "The total number of polonies that are calculated for the run",
2727
"min": 0,
2828
"scale": "RdYlGn",
29-
"shared_key": "base_count",
3029
}
3130
headers["percent_assigned_run"] = {
3231
"title": "% Assigned Reads",

multiqc/modules/bases2fastq/plot_runs.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def plot_run_stats(run_data, color_dict):
5454
pconfig = {
5555
"data_labels": [
5656
{"name": "Number of Polonies", "ylab": "Number of Polonies", "format": "{d}"},
57-
{"name": "Yield (Gb)", "ylab": "Gb"},
57+
{"name": "Yield (Gb)", "ylab": "Yield"},
5858
],
5959
"cpswitch": True,
6060
"stacking": "normal",
@@ -99,11 +99,10 @@ def tabulate_run_stats(run_data, color_dict):
9999

100100
headers = {}
101101
headers["num_polonies_run"] = {
102-
"title": f"# Polonies ({config.base_count_prefix})",
103-
"description": f"The total number of polonies that are calculated for the run. ({config.base_count_desc})",
102+
"title": "# Polonies",
103+
"description": "The total number of polonies that are calculated for the run.",
104104
"min": 0,
105105
"scale": "RdYlGn",
106-
"shared_key": "base_count",
107106
}
108107
headers["percent_assigned_run"] = {
109108
"title": "% Assigned Reads",
@@ -221,6 +220,7 @@ def plot_base_quality_hist(run_data, color_dict):
221220
"id": "per_run_bq_hist",
222221
"title": "bases2fastq: Quality Histograms",
223222
"ylab": "Percentage",
223+
"xlab": "Q score",
224224
}
225225
plot_html = linegraph.plot(plot_content, pconfig=pconfig)
226226
plot_name = "Run Base Quality Histogram"
@@ -347,10 +347,9 @@ def plot_base_quality_by_cycle(run_data, color_dict):
347347
plot_html = linegraph.plot(plot_content, pconfig=pconfig)
348348
plot_name = "Quality Metrics By Cycle"
349349
anchor = "per_cycle_quality"
350-
description = "Per run base qualities by cycle"
350+
description = "Per run base qualities by cycle. Read 1 and Read 2 are separated by a red dashed line."
351351
helptext = """
352352
This section plots the base qualities by each instrument cycle.\n
353-
Choose between Median Quality, Mean Quality, Percent Q30 or Percentage Q40 per cycle.\n
354-
Read 1 and Read 2 are separated by a red dashed line.
353+
Choose between Median Quality, Mean Quality, Percent Q30 or Percentage Q40 per cycle.
355354
"""
356355
return plot_html, plot_name, anchor, description, helptext, plot_content

multiqc/modules/bases2fastq/plot_samples.py

Lines changed: 65 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ def tabulate_sample_stats(sample_data, group_lookup_dict, project_lookup_dict, s
2020
general_stats.update({"mean_base_quality_sample": sample_data[s_name]["QualityScoreMean"]})
2121
general_stats.update({"percent_q30_sample": sample_data[s_name]["PercentQ30"]})
2222
general_stats.update({"percent_q40_sample": sample_data[s_name]["PercentQ40"]})
23+
general_stats.update({"percent_mismatch": sample_data[s_name]["PercentMismatch"]})
2324
plot_content.update({s_name: general_stats})
2425

2526
headers = {}
@@ -37,11 +38,10 @@ def tabulate_sample_stats(sample_data, group_lookup_dict, project_lookup_dict, s
3738
"scale": False,
3839
}
3940
headers["num_polonies_sample"] = {
40-
"title": f"# Polonies ({config.base_count_prefix})",
41-
"description": f"The total number of polonies that are calculated for the run. ({config.base_count_desc})",
41+
"title": "# Polonies",
42+
"description": "The total number of polonies that are calculated for the run.",
4243
"min": 0,
4344
"scale": "Blues",
44-
"shared_key": "base_count",
4545
}
4646
headers["yield_sample"] = {
4747
"title": "Yield (Gb)",
@@ -70,6 +70,14 @@ def tabulate_sample_stats(sample_data, group_lookup_dict, project_lookup_dict, s
7070
"scale": "RdYlGn",
7171
"suffix": "%",
7272
}
73+
headers["percent_mismatch"] = {
74+
"title": "Percent Mismatch",
75+
"description": "The percentage of mismatching reads for the sample.",
76+
"max": 100,
77+
"min": 0,
78+
"scale": "RdYlGn",
79+
"suffix": "%",
80+
}
7381

7482
pconfig = {"id": "sample_qc_metric_table", "title": "Sample QC Metrics Table", "no_violin": True}
7583

@@ -96,18 +104,20 @@ def sequence_content_plot(sample_data, group_lookup_dict, project_lookup_dict, c
96104
"""Create the epic HTML for the FastQC sequence content heatmap"""
97105

98106
# Prep the data
99-
data = dict()
107+
all_data = dict()
108+
plot_content = [all_data]
100109

101110
r1r2_split = 0
102111
for s_name in sorted(sample_data.keys()):
103-
paired_end = True if len(sample_data[s_name]["Reads"]) > 1 else False
104112
for base in "ACTG":
105113
base_s_name = "__".join([s_name, base])
106-
data[base_s_name] = {}
114+
all_data[base_s_name] = {}
107115
R1 = sample_data[s_name]["Reads"][0]["Cycles"]
108116
r1r2_split = max(r1r2_split, len(R1))
109117

110118
for s_name in sorted(sample_data.keys()):
119+
paired_end = True if len(sample_data[s_name]["Reads"]) > 1 else False
120+
111121
R1 = sample_data[s_name]["Reads"][0]["Cycles"]
112122
for cycle in range(len(R1)):
113123
base_no = cycle + 1
@@ -116,7 +126,7 @@ def sequence_content_plot(sample_data, group_lookup_dict, project_lookup_dict, c
116126

117127
for base in "ACTG":
118128
base_s_name = "__".join([s_name, base])
119-
data[base_s_name].update(
129+
all_data[base_s_name].update(
120130
{base_no: float(R1[cycle]["BaseComposition"][base] / float(tot)) * 100.0 if tot > 0 else None}
121131
)
122132

@@ -128,15 +138,32 @@ def sequence_content_plot(sample_data, group_lookup_dict, project_lookup_dict, c
128138

129139
for base in "ACTG":
130140
base_s_name = "__".join([s_name, base])
131-
data[base_s_name].update(
141+
all_data[base_s_name].update(
132142
{base_no: float(R2[cycle]["BaseComposition"][base] / float(tot)) * 100.0 if tot > 0 else None}
133143
)
134144

135-
plot_content = data
145+
default_label = {
146+
"name": "All",
147+
"xlab": "Cycle",
148+
"ylab": "Percentage of total reads",
149+
}
150+
data_labels = [
151+
default_label,
152+
]
153+
for s_name in sorted(sample_data.keys()):
154+
sample_plot_data = dict()
155+
for base in "ACTG":
156+
base_s_name = "__".join([s_name, base])
157+
sample_plot_data[base_s_name] = all_data[base_s_name]
158+
plot_content.append(sample_plot_data)
159+
data_labels.append({
160+
"name": s_name,
161+
"xlab": default_label["xlab"],
162+
"ylab": default_label["ylab"],
163+
})
136164

137165
pconfig = {
138-
"xlab": "cycle",
139-
"ylab": "Percentage",
166+
"data_labels": data_labels,
140167
"x_lines": [{"color": "#FF0000", "width": 2, "value": r1r2_split, "dashStyle": "dash"}],
141168
"colors": color_dict,
142169
"ymin": 0,
@@ -147,8 +174,8 @@ def sequence_content_plot(sample_data, group_lookup_dict, project_lookup_dict, c
147174
plot_name = "Per Cycle Base Content"
148175
anchor = "base_content"
149176
description = """
150-
Percentage of unidentified bases ("N" bases) by each sequencing cycle.
151-
Read 1 and Read 2 are separated by a red dashed line
177+
Base composition per sample per cycle.
178+
Read 1 and Read 2 are separated by a red dashed line.
152179
"""
153180
helptext = """
154181
If a sequencer is unable to make a base call with sufficient confidence then it will
@@ -236,6 +263,17 @@ def plot_per_read_gc_hist(sample_data, group_lookup_dict, project_lookup_dict, s
236263
Plot GC Histogram per Sample
237264
"""
238265
gc_hist_dict = dict()
266+
plot_content = [
267+
gc_hist_dict,
268+
]
269+
default_label = {
270+
"name": "All",
271+
"xlab": "Percentage of total reads",
272+
"ylab": "Percentage of reads that are GC",
273+
}
274+
data_labels = [
275+
default_label,
276+
]
239277
for s_name in sample_data.keys():
240278
R1_gc_counts = sample_data[s_name]["Reads"][0]["PerReadGCCountHistogram"]
241279
R2_gc_counts = [0] * len(R1_gc_counts)
@@ -249,11 +287,18 @@ def plot_per_read_gc_hist(sample_data, group_lookup_dict, project_lookup_dict, s
249287
gc_hist_dict[s_name].update({gc / RLen * 100: R1R2_gc_counts[gc] / totalReads * 100})
250288

251289
# perReadQualityHistogram
252-
plot_content = gc_hist_dict
290+
291+
for s_name in gc_hist_dict.keys():
292+
plot_content.append({s_name: gc_hist_dict[s_name]})
293+
data_labels.append({
294+
"name": s_name,
295+
"xlab": default_label["xlab"],
296+
"ylab": default_label["ylab"],
297+
})
298+
253299

254300
pconfig = {
255-
"xlab": "% GC",
256-
"ylab": "Percentage",
301+
"data_labels": data_labels,
257302
"colors": sample_color,
258303
"id": "gc_hist",
259304
"title": "bases2fastq: Per Sample GC Content Histogram",
@@ -323,7 +368,10 @@ def plot_adapter_content(sample_data, group_lookup_dict, project_lookup_dict, sa
323368
pconfig.update({"colors": sample_color})
324369
plot_html = linegraph.plot(plot_content, pconfig=pconfig)
325370
anchor = "adapter_content"
326-
description = "Adapter content per cycle"
371+
description = """
372+
Adapter content per cycle.
373+
Read 1 and Read 2 are separated by a red dashed line.
374+
"""
327375
helptext = """
328376
The plot shows a cumulative percentage count of the proportion
329377
of your library which has seen each of the adapter sequences at each cycle.

0 commit comments

Comments
 (0)