From da4c2d7cec65313f5617b08ce68ace63bd74c1aa Mon Sep 17 00:00:00 2001 From: Frank Sabates <73601477+FrankS-2024@users.noreply.github.com> Date: Sat, 24 Jan 2026 18:00:28 -0800 Subject: [PATCH] Use symmetric sine axis labels --- Images/spark_demo_output.svg | 11 ++++ Images/spark_pointstat_avg.svg | 9 +++ Images/spark_pointstat_max.svg | 9 +++ SparK.py | 113 +++++++++++++++++---------------- 4 files changed, 89 insertions(+), 53 deletions(-) create mode 100644 Images/spark_demo_output.svg create mode 100644 Images/spark_pointstat_avg.svg create mode 100644 Images/spark_pointstat_max.svg diff --git a/Images/spark_demo_output.svg b/Images/spark_demo_output.svg new file mode 100644 index 0000000..1023cb6 --- /dev/null +++ b/Images/spark_demo_output.svg @@ -0,0 +1,11 @@ + + + + + + +30.0 + +-30.0 +Chr1: 0-250 + diff --git a/Images/spark_pointstat_avg.svg b/Images/spark_pointstat_avg.svg new file mode 100644 index 0000000..9493571 --- /dev/null +++ b/Images/spark_pointstat_avg.svg @@ -0,0 +1,9 @@ + + + + + +0 +39.7 +Chr1: 0-1,000 + diff --git a/Images/spark_pointstat_max.svg b/Images/spark_pointstat_max.svg new file mode 100644 index 0000000..cccb610 --- /dev/null +++ b/Images/spark_pointstat_max.svg @@ -0,0 +1,9 @@ + + + + + +0 +96.8 +Chr1: 0-1,000 + diff --git a/SparK.py b/SparK.py index 1c2b518..6155b87 100644 --- a/SparK.py +++ b/SparK.py @@ -134,33 +134,39 @@ def make_raw_data_filled(stretch, files, offset): # files[ctrl,treat] except: pass - # shrink to max_datapoints if bigger - max_datapoints = max_points - if stretch[2] - stretch[1] > max_datapoints: - binfactor_split = math.modf(float((float(stretch[2] - stretch[1]))/max_datapoints)) # get values after and before period - binfactor = sum(binfactor_split) - temp_data = [[] for u in range(len(files))] # new data list - for workingfilenr in range(len(files)): - for position in range(max_datapoints): - start_postition_split = math.modf(position * binfactor) # after and before period - - # first add fraction of start position or entire value if no fraction - temp_value = float(raw_data_filled[workingfilenr][(int(start_postition_split[1]))] * (1 - start_postition_split[0])) - binfactor_left = binfactor - (1 - start_postition_split[0]) - - # add all values with no fractions - iteration = 0 - while binfactor_left > 1: - temp_value += raw_data_filled[workingfilenr][int(start_postition_split[1]) + 1 + iteration] - iteration += 1 - binfactor_left -= 1 - - # add last fraction or value if no fraction - if binfactor_left > 0: - if float((start_postition_split[1]) + 1 + iteration) < len(raw_data_filled[0]): - temp_value += raw_data_filled[workingfilenr][int(start_postition_split[1]) + 1 + iteration] * binfactor_left - temp_data[workingfilenr].append(temp_value/sum(binfactor_split)) - raw_data_filled = copy.deepcopy(temp_data) + # shrink to max_datapoints if bigger + max_datapoints = max_points + if stretch[2] - stretch[1] > max_datapoints: + binfactor_split = math.modf(float((float(stretch[2] - stretch[1])) / max_datapoints)) # get values after and before period + binfactor = sum(binfactor_split) + temp_data = [[] for u in range(len(files))] # new data list + for workingfilenr in range(len(files)): + for position in range(max_datapoints): + start_postition_split = math.modf(position * binfactor) # after and before period + start_idx = int(start_postition_split[1]) + if point_stat == "max": + end_pos = (position + 1) * binfactor + end_idx = min(len(raw_data_filled[workingfilenr]) - 1, int(math.ceil(end_pos)) - 1) + temp_data[workingfilenr].append(max(raw_data_filled[workingfilenr][start_idx:end_idx + 1])) + continue + + # first add fraction of start position or entire value if no fraction + temp_value = float(raw_data_filled[workingfilenr][start_idx] * (1 - start_postition_split[0])) + binfactor_left = binfactor - (1 - start_postition_split[0]) + + # add all values with no fractions + iteration = 0 + while binfactor_left > 1: + temp_value += raw_data_filled[workingfilenr][start_idx + 1 + iteration] + iteration += 1 + binfactor_left -= 1 + + # add last fraction or value if no fraction + if binfactor_left > 0: + if float(start_idx + 1 + iteration) < len(raw_data_filled[0]): + temp_value += raw_data_filled[workingfilenr][start_idx + 1 + iteration] * binfactor_left + temp_data[workingfilenr].append(temp_value / sum(binfactor_split)) + raw_data_filled = copy.deepcopy(temp_data) if smoothen_tracks is not None: raw_data_filled_smooth = [[0] * max_datapoints for r in range(len(files))] @@ -236,22 +242,24 @@ def draw_axis_for_group(y_start_val, max_value_val, has_negative_axis): write_to_file('''0''') write_to_file('''''' + str(axis_label) + '''''') -def draw_sine_axis_for_group(y_start_val, pos_max_value_val, neg_max_value_val): - # Separate top/bottom labels derived from positive and negative datasets - axis_label_top = round(pos_max_value_val * (1 + (1 - relative_track_hight_percentage)), 1) - axis_label_bottom = round(neg_max_value_val * (1 + (1 - relative_track_hight_percentage)), 1) - # Draw vertical axis - write_to_file('''''') - # 0 tick - write_to_file('''''') - # Top tick and label - write_to_file('''''') - write_to_file('''''' + str(axis_label_top) + '''''') - # Bottom tick and label - write_to_file('''''') - write_to_file('''-''' + str(axis_label_bottom) + '''''') -def draw_standard_spark(): - summary_func = np.max if point_stat == "max" else np.average +def draw_sine_axis_for_group(y_start_val, shared_max_val): + # Shared axis with symmetric labels for positive/negative magnitudes + if shared_max_val > 0: + axis_height = hight * relative_track_hight_percentage + pos_tick_y = y_start_val - axis_height + neg_tick_y = y_start_val + axis_height + # Draw full shared axis + write_to_file('''''') + # 0 tick + write_to_file('''''') + # Positive tick and label + write_to_file('''''') + write_to_file('''''' + str(round(shared_max_val, 1)) + '''''') + # Negative tick and label + write_to_file('''''') + write_to_file('''-''' + str(round(shared_max_val, 1)) + '''''') +def draw_standard_spark(): + summary_func = np.average if len(control_data) > 1 and len(treat_data) > 1: last_xpos = -1 coords = [] # y/x, spark color @@ -356,7 +364,7 @@ def get_region_to_draw(): parser.add_argument('-w','--track_width', help='width of the track, default = 150, int', required=False, type=int, default=150) parser.add_argument('-dg','--display_genes', help='genes to display from the gtf file', nargs='+', required=False, type=str) parser.add_argument('--max_points', help='maximum datapoints per plot', required=False, type=int, default=2000) -parser.add_argument('--point_stat', help='per-point statistic: average or max', required=False, type=str, default='average') +parser.add_argument('--point_stat', help='per-bin statistic for downsampling: average or max', required=False, type=str, default='average') parser.add_argument('--y_scale', help='scale factor for y-axis', required=False, type=float, default=1.0) parser.add_argument('--x_scale', help='scale factor for x-axis', required=False, type=float, default=1.0) parser.add_argument('-dt','--display_transcripts', help='display custom transcripts. By default, all transcripts annotated in the gtf file will be merged and displayed as one gene. Alternatively all can be plotted seperatelly by setting this to "all". Further, Transcript IDs can be listed to plot only certain transcripts', nargs='+', required=False, type=str, default=["mergeall"]) @@ -750,27 +758,26 @@ def get_region_to_draw(): print("Error: STD plots require at least 2 control and treatment files per plot") elif plot_type == "sine": # treat points up, control points down #FIX combined with averages does not work - # Compute separate positive/negative maxima for independent axis labels and scaling + # Compute shared max so positive/negative sides use the same y-scale pos_max_value = 0 neg_max_value = 0 if treat_data: pos_max_value = max(max(abs(v) for v in data) for data in treat_data) if control_data: neg_max_value = max(max(abs(v) for v in data) for data in control_data) + shared_max_value = max(pos_max_value, neg_max_value) # Respect custom scales or group autoscale by overriding both sides equally if custom_scales is not None and custom_scales[group] != "D": - pos_max_value = float(custom_scales[group]) - neg_max_value = float(custom_scales[group]) + shared_max_value = float(custom_scales[group]) elif group_autoscale == "yes" and ((group + 1) not in group_autoscale_excluded): - pos_max_value = max_value - neg_max_value = max_value + shared_max_value = max_value if len(control_data) >= 1 and len(treat_data) >= 1: for datafile in control_data: coords = [] # y, x for x, value in enumerate(datafile): x_pos = x_start + (x * quantile) - coords.append([-1 * get_relative_hight_custom(value, neg_max_value), x_pos]) + coords.append([-1 * get_relative_hight_custom(value, shared_max_value), x_pos]) coords[-1][0] = 0 coords[0][0] = 0 write_to_file(draw_polygon(coords, opacity, fills[0], stroke_width)) @@ -778,7 +785,7 @@ def get_region_to_draw(): coords = [] # y, x for x, value in enumerate(datafile): x_pos = x_start + (x * quantile) - coords.append([get_relative_hight_custom(value, pos_max_value), x_pos]) + coords.append([get_relative_hight_custom(value, shared_max_value), x_pos]) coords[-1][0] = 0 coords[0][0] = 0 write_to_file(draw_polygon(coords, opacity, fills[1], stroke_width)) @@ -857,8 +864,8 @@ def get_region_to_draw(): write_to_file(draw_polygon(coords, 0.8, spark_color[1], stroke_width_spark)) else: print("Error: no input files for treatment and/or control") - # Draw y-axis for this group (sine) with independent labels - draw_sine_axis_for_group(y_start, pos_max_value, neg_max_value) + # Draw y-axis for this group (sine) with shared scale and symmetric labels + draw_sine_axis_for_group(y_start, shared_max_value) # Scalebar if display_scalebar == "yes":