From da4c2d7cec65313f5617b08ce68ace63bd74c1aa Mon Sep 17 00:00:00 2001
From: Frank Sabates <73601477+FrankS-2024@users.noreply.github.com>
Date: Sat, 24 Jan 2026 18:00:28 -0800
Subject: [PATCH] Use symmetric sine axis labels
---
Images/spark_demo_output.svg | 11 ++++
Images/spark_pointstat_avg.svg | 9 +++
Images/spark_pointstat_max.svg | 9 +++
SparK.py | 113 +++++++++++++++++----------------
4 files changed, 89 insertions(+), 53 deletions(-)
create mode 100644 Images/spark_demo_output.svg
create mode 100644 Images/spark_pointstat_avg.svg
create mode 100644 Images/spark_pointstat_max.svg
diff --git a/Images/spark_demo_output.svg b/Images/spark_demo_output.svg
new file mode 100644
index 0000000..1023cb6
--- /dev/null
+++ b/Images/spark_demo_output.svg
@@ -0,0 +1,11 @@
+
diff --git a/Images/spark_pointstat_avg.svg b/Images/spark_pointstat_avg.svg
new file mode 100644
index 0000000..9493571
--- /dev/null
+++ b/Images/spark_pointstat_avg.svg
@@ -0,0 +1,9 @@
+
diff --git a/Images/spark_pointstat_max.svg b/Images/spark_pointstat_max.svg
new file mode 100644
index 0000000..cccb610
--- /dev/null
+++ b/Images/spark_pointstat_max.svg
@@ -0,0 +1,9 @@
+
diff --git a/SparK.py b/SparK.py
index 1c2b518..6155b87 100644
--- a/SparK.py
+++ b/SparK.py
@@ -134,33 +134,39 @@ def make_raw_data_filled(stretch, files, offset): # files[ctrl,treat]
except:
pass
- # shrink to max_datapoints if bigger
- max_datapoints = max_points
- if stretch[2] - stretch[1] > max_datapoints:
- binfactor_split = math.modf(float((float(stretch[2] - stretch[1]))/max_datapoints)) # get values after and before period
- binfactor = sum(binfactor_split)
- temp_data = [[] for u in range(len(files))] # new data list
- for workingfilenr in range(len(files)):
- for position in range(max_datapoints):
- start_postition_split = math.modf(position * binfactor) # after and before period
-
- # first add fraction of start position or entire value if no fraction
- temp_value = float(raw_data_filled[workingfilenr][(int(start_postition_split[1]))] * (1 - start_postition_split[0]))
- binfactor_left = binfactor - (1 - start_postition_split[0])
-
- # add all values with no fractions
- iteration = 0
- while binfactor_left > 1:
- temp_value += raw_data_filled[workingfilenr][int(start_postition_split[1]) + 1 + iteration]
- iteration += 1
- binfactor_left -= 1
-
- # add last fraction or value if no fraction
- if binfactor_left > 0:
- if float((start_postition_split[1]) + 1 + iteration) < len(raw_data_filled[0]):
- temp_value += raw_data_filled[workingfilenr][int(start_postition_split[1]) + 1 + iteration] * binfactor_left
- temp_data[workingfilenr].append(temp_value/sum(binfactor_split))
- raw_data_filled = copy.deepcopy(temp_data)
+ # shrink to max_datapoints if bigger
+ max_datapoints = max_points
+ if stretch[2] - stretch[1] > max_datapoints:
+ binfactor_split = math.modf(float((float(stretch[2] - stretch[1])) / max_datapoints)) # get values after and before period
+ binfactor = sum(binfactor_split)
+ temp_data = [[] for u in range(len(files))] # new data list
+ for workingfilenr in range(len(files)):
+ for position in range(max_datapoints):
+ start_postition_split = math.modf(position * binfactor) # after and before period
+ start_idx = int(start_postition_split[1])
+ if point_stat == "max":
+ end_pos = (position + 1) * binfactor
+ end_idx = min(len(raw_data_filled[workingfilenr]) - 1, int(math.ceil(end_pos)) - 1)
+ temp_data[workingfilenr].append(max(raw_data_filled[workingfilenr][start_idx:end_idx + 1]))
+ continue
+
+ # first add fraction of start position or entire value if no fraction
+ temp_value = float(raw_data_filled[workingfilenr][start_idx] * (1 - start_postition_split[0]))
+ binfactor_left = binfactor - (1 - start_postition_split[0])
+
+ # add all values with no fractions
+ iteration = 0
+ while binfactor_left > 1:
+ temp_value += raw_data_filled[workingfilenr][start_idx + 1 + iteration]
+ iteration += 1
+ binfactor_left -= 1
+
+ # add last fraction or value if no fraction
+ if binfactor_left > 0:
+ if float(start_idx + 1 + iteration) < len(raw_data_filled[0]):
+ temp_value += raw_data_filled[workingfilenr][start_idx + 1 + iteration] * binfactor_left
+ temp_data[workingfilenr].append(temp_value / sum(binfactor_split))
+ raw_data_filled = copy.deepcopy(temp_data)
if smoothen_tracks is not None:
raw_data_filled_smooth = [[0] * max_datapoints for r in range(len(files))]
@@ -236,22 +242,24 @@ def draw_axis_for_group(y_start_val, max_value_val, has_negative_axis):
write_to_file('''0''')
write_to_file('''''' + str(axis_label) + '''''')
-def draw_sine_axis_for_group(y_start_val, pos_max_value_val, neg_max_value_val):
- # Separate top/bottom labels derived from positive and negative datasets
- axis_label_top = round(pos_max_value_val * (1 + (1 - relative_track_hight_percentage)), 1)
- axis_label_bottom = round(neg_max_value_val * (1 + (1 - relative_track_hight_percentage)), 1)
- # Draw vertical axis
- write_to_file('''''')
- # 0 tick
- write_to_file('''''')
- # Top tick and label
- write_to_file('''''')
- write_to_file('''''' + str(axis_label_top) + '''''')
- # Bottom tick and label
- write_to_file('''''')
- write_to_file('''-''' + str(axis_label_bottom) + '''''')
-def draw_standard_spark():
- summary_func = np.max if point_stat == "max" else np.average
+def draw_sine_axis_for_group(y_start_val, shared_max_val):
+ # Shared axis with symmetric labels for positive/negative magnitudes
+ if shared_max_val > 0:
+ axis_height = hight * relative_track_hight_percentage
+ pos_tick_y = y_start_val - axis_height
+ neg_tick_y = y_start_val + axis_height
+ # Draw full shared axis
+ write_to_file('''''')
+ # 0 tick
+ write_to_file('''''')
+ # Positive tick and label
+ write_to_file('''''')
+ write_to_file('''''' + str(round(shared_max_val, 1)) + '''''')
+ # Negative tick and label
+ write_to_file('''''')
+ write_to_file('''-''' + str(round(shared_max_val, 1)) + '''''')
+def draw_standard_spark():
+ summary_func = np.average
if len(control_data) > 1 and len(treat_data) > 1:
last_xpos = -1
coords = [] # y/x, spark color
@@ -356,7 +364,7 @@ def get_region_to_draw():
parser.add_argument('-w','--track_width', help='width of the track, default = 150, int', required=False, type=int, default=150)
parser.add_argument('-dg','--display_genes', help='genes to display from the gtf file', nargs='+', required=False, type=str)
parser.add_argument('--max_points', help='maximum datapoints per plot', required=False, type=int, default=2000)
-parser.add_argument('--point_stat', help='per-point statistic: average or max', required=False, type=str, default='average')
+parser.add_argument('--point_stat', help='per-bin statistic for downsampling: average or max', required=False, type=str, default='average')
parser.add_argument('--y_scale', help='scale factor for y-axis', required=False, type=float, default=1.0)
parser.add_argument('--x_scale', help='scale factor for x-axis', required=False, type=float, default=1.0)
parser.add_argument('-dt','--display_transcripts', help='display custom transcripts. By default, all transcripts annotated in the gtf file will be merged and displayed as one gene. Alternatively all can be plotted seperatelly by setting this to "all". Further, Transcript IDs can be listed to plot only certain transcripts', nargs='+', required=False, type=str, default=["mergeall"])
@@ -750,27 +758,26 @@ def get_region_to_draw():
print("Error: STD plots require at least 2 control and treatment files per plot")
elif plot_type == "sine": # treat points up, control points down #FIX combined with averages does not work
- # Compute separate positive/negative maxima for independent axis labels and scaling
+ # Compute shared max so positive/negative sides use the same y-scale
pos_max_value = 0
neg_max_value = 0
if treat_data:
pos_max_value = max(max(abs(v) for v in data) for data in treat_data)
if control_data:
neg_max_value = max(max(abs(v) for v in data) for data in control_data)
+ shared_max_value = max(pos_max_value, neg_max_value)
# Respect custom scales or group autoscale by overriding both sides equally
if custom_scales is not None and custom_scales[group] != "D":
- pos_max_value = float(custom_scales[group])
- neg_max_value = float(custom_scales[group])
+ shared_max_value = float(custom_scales[group])
elif group_autoscale == "yes" and ((group + 1) not in group_autoscale_excluded):
- pos_max_value = max_value
- neg_max_value = max_value
+ shared_max_value = max_value
if len(control_data) >= 1 and len(treat_data) >= 1:
for datafile in control_data:
coords = [] # y, x
for x, value in enumerate(datafile):
x_pos = x_start + (x * quantile)
- coords.append([-1 * get_relative_hight_custom(value, neg_max_value), x_pos])
+ coords.append([-1 * get_relative_hight_custom(value, shared_max_value), x_pos])
coords[-1][0] = 0
coords[0][0] = 0
write_to_file(draw_polygon(coords, opacity, fills[0], stroke_width))
@@ -778,7 +785,7 @@ def get_region_to_draw():
coords = [] # y, x
for x, value in enumerate(datafile):
x_pos = x_start + (x * quantile)
- coords.append([get_relative_hight_custom(value, pos_max_value), x_pos])
+ coords.append([get_relative_hight_custom(value, shared_max_value), x_pos])
coords[-1][0] = 0
coords[0][0] = 0
write_to_file(draw_polygon(coords, opacity, fills[1], stroke_width))
@@ -857,8 +864,8 @@ def get_region_to_draw():
write_to_file(draw_polygon(coords, 0.8, spark_color[1], stroke_width_spark))
else:
print("Error: no input files for treatment and/or control")
- # Draw y-axis for this group (sine) with independent labels
- draw_sine_axis_for_group(y_start, pos_max_value, neg_max_value)
+ # Draw y-axis for this group (sine) with shared scale and symmetric labels
+ draw_sine_axis_for_group(y_start, shared_max_value)
# Scalebar
if display_scalebar == "yes":