diff --git a/benchmarks/benchmark_arithmetic_and_derived.py b/benchmarks/benchmark_arithmetic_and_derived.py index e01d419..19a0154 100644 --- a/benchmarks/benchmark_arithmetic_and_derived.py +++ b/benchmarks/benchmark_arithmetic_and_derived.py @@ -1,21 +1,14 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -""" -Benchmark for arithmetic operations and derived properties in LVec. -This benchmark focuses on the computational speed of vector algebra and -the effectiveness of caching in LVec compared to other vector libraries. -""" - import numpy as np import timeit import matplotlib.pyplot as plt import tracemalloc import gc import time +import os from functools import partial from lvec import LVec, Vector2D, Vector3D import vector # Comparison library +from plotting_utils import plot_vector_types_comparison, set_publication_style def measure_memory_usage(operation, n_repeats=5): """Measure memory usage for an operation.""" @@ -139,7 +132,7 @@ def benchmark_arithmetic(size, vector_type, n_repeats=10): cross_time, cross_std = measure_single_timing(cross_op, n_repeats) results["cross_product"] = {"time": cross_time, "std": cross_std} - elif vector_type == "Vector": # vector package for comparison + elif vector_type == "Scikit Vector": # Changed from "Vector" to "Scikit Vector" px, py, pz, E = generate_test_data(size) v1 = vector.arr({"px": px, "py": py, "pz": pz, "E": E}) v2 = vector.arr({"px": px, "py": py, "pz": pz, "E": E}) @@ -227,7 +220,7 @@ def benchmark_derived_properties(size, vector_type, n_repeats=10): rho_time, rho_std = measure_single_timing(rho_op, n_repeats) results["rho"] = {"time": rho_time, "std": rho_std} - elif vector_type == "Vector": # vector package for comparison + elif vector_type == "Scikit Vector": # Changed from "Vector" to "Scikit Vector" px, py, pz, E = generate_test_data(size) vec = vector.arr({"px": px, "py": py, "pz": pz, "E": E}) @@ -288,132 +281,56 @@ def uncached_access(): def plot_arithmetic_results(sizes, results, vector_types, operations, save_path="benchmark_arithmetic.pdf"): """Plot arithmetic operation benchmark results.""" - plt.style.use('default') - n_ops = len(operations) - n_cols = 2 - n_rows = (n_ops + n_cols - 1) // n_cols # Ceiling division - - fig = plt.figure(figsize=(12, 4 * n_rows)) - gs = fig.add_gridspec(n_rows, n_cols, hspace=0.4, wspace=0.3) - - colors = {'LVec': '#3498db', 'Vector2D': '#2ecc71', 'Vector3D': '#9b59b6', 'Vector': '#e74c3c'} - - for op_idx, operation in enumerate(operations): - row = op_idx // n_cols - col = op_idx % n_cols - ax = fig.add_subplot(gs[row, col]) - - for vtype in vector_types: - # Check if this vector type has this operation - if vtype in results and operation in results[vtype][0]: - times = [results[vtype][i].get(operation, {}).get("time", np.nan) * 1000 for i in range(len(sizes))] # ms - ax.plot(sizes, times, 'o-', label=vtype, color=colors[vtype], linewidth=2, markersize=6) - - ax.set_xscale('log') - ax.set_yscale('log') - ax.set_xlabel('Array Size', fontsize=10) - ax.set_ylabel('Time (ms)', fontsize=10) - ax.set_title(operation.replace('_', ' ').title(), fontsize=12) - ax.grid(True, which='both', linestyle='--', alpha=0.7) - ax.grid(True, which='minor', linestyle=':', alpha=0.4) - ax.legend(fontsize=10) - ax.tick_params(labelsize=8) - - # Remove any empty subplots - for idx in range(len(operations), n_rows * n_cols): - row = idx // n_cols - col = idx % n_cols - if idx < n_rows * n_cols: # Ensure we're not out of bounds - fig.delaxes(fig.add_subplot(gs[row, col])) - - plt.suptitle('Performance Comparison of Arithmetic Operations', fontsize=14, y=1.02) - plt.savefig(save_path, dpi=300, bbox_inches='tight') - plt.close() + plot_vector_types_comparison( + sizes, + results, + vector_types, + operations, + title='Arithmetic Operations Performance', + filename=save_path + ) def plot_derived_results(sizes, results, vector_types, properties, save_path="benchmark_derived.pdf"): """Plot derived properties benchmark results.""" - plt.style.use('default') - n_props = len(properties) - n_cols = 2 - n_rows = (n_props + n_cols - 1) // n_cols # Ceiling division - - fig = plt.figure(figsize=(12, 4 * n_rows)) - gs = fig.add_gridspec(n_rows, n_cols, hspace=0.4, wspace=0.3) - - colors = {'LVec': '#3498db', 'Vector2D': '#2ecc71', 'Vector3D': '#9b59b6', 'Vector': '#e74c3c'} - - for prop_idx, prop in enumerate(properties): - row = prop_idx // n_cols - col = prop_idx % n_cols - ax = fig.add_subplot(gs[row, col]) - - for vtype in vector_types: - # Check if this vector type has this property - if vtype in results and prop in results[vtype][0]: - times = [results[vtype][i].get(prop, {}).get("time", np.nan) * 1000 for i in range(len(sizes))] # ms - ax.plot(sizes, times, 'o-', label=vtype, color=colors[vtype], linewidth=2, markersize=6) - - ax.set_xscale('log') - ax.set_yscale('log') - ax.set_xlabel('Array Size', fontsize=10) - ax.set_ylabel('Time (ms)', fontsize=10) - ax.set_title(prop.replace('_', ' ').title(), fontsize=12) - ax.grid(True, which='both', linestyle='--', alpha=0.7) - ax.grid(True, which='minor', linestyle=':', alpha=0.4) - ax.legend(fontsize=10) - ax.tick_params(labelsize=8) - - # Remove any empty subplots - for idx in range(len(properties), n_rows * n_cols): - row = idx // n_cols - col = idx % n_cols - if idx < n_rows * n_cols: # Ensure we're not out of bounds - fig.delaxes(fig.add_subplot(gs[row, col])) - - plt.suptitle('Performance Comparison of Derived Properties', fontsize=14, y=1.02) - plt.savefig(save_path, dpi=300, bbox_inches='tight') - plt.close() + plot_vector_types_comparison( + sizes, + results, + vector_types, + properties, + title='Derived Properties Performance', + filename=save_path + ) def plot_caching_results(sizes, cache_results, save_path="benchmark_caching.pdf"): """Plot caching effectiveness benchmark results.""" - plt.style.use('default') + set_publication_style() fig, ax = plt.subplots(figsize=(10, 6)) - # Extract times in milliseconds - cached_times = [res["cached"]["time"] * 1000 for res in cache_results] - uncached_times = [res["uncached"]["time"] * 1000 for res in cache_results] + # Convert to milliseconds + cached_times = np.array([res["cached"] for res in cache_results]) * 1000 + uncached_times = np.array([res["uncached"] for res in cache_results]) * 1000 - # Calculate speedup - speedup = [uncached / cached for uncached, cached in zip(uncached_times, cached_times)] + ax.plot(sizes, cached_times, 'o-', label='With Caching', color='#109618', linewidth=2, markersize=6) + ax.plot(sizes, uncached_times, 'o-', label='Without Caching', color='#FF9900', linewidth=2, markersize=6) - # Create primary plot for times - ax.plot(sizes, cached_times, 'o-', label='With Caching', color='#2ecc71', linewidth=2, markersize=6) - ax.plot(sizes, uncached_times, 'o-', label='Without Caching', color='#e74c3c', linewidth=2, markersize=6) ax.set_xscale('log') ax.set_yscale('log') ax.set_xlabel('Array Size', fontsize=12) ax.set_ylabel('Time (ms)', fontsize=12) + ax.set_title('Caching Effectiveness in lvec', fontsize=14) ax.grid(True, which='both', linestyle='--', alpha=0.7) ax.grid(True, which='minor', linestyle=':', alpha=0.4) - ax.tick_params(labelsize=10) - ax.legend(fontsize=10, loc='upper left') + ax.legend(fontsize=12) - # Create secondary y-axis for speedup - ax2 = ax.twinx() - ax2.plot(sizes, speedup, 'o--', label='Speedup Factor', color='#3498db', linewidth=1.5, markersize=5) - ax2.set_ylabel('Speedup Factor (Uncached/Cached)', fontsize=12, color='#3498db') - ax2.tick_params(axis='y', labelcolor='#3498db') - ax2.legend(fontsize=10, loc='upper right') - - plt.title('Caching Effectiveness in LVec', fontsize=14) - plt.savefig(save_path, dpi=300, bbox_inches='tight') + plt.tight_layout() + plt.savefig(os.path.join('plots', save_path), bbox_inches='tight') plt.close() def run_benchmarks(): """Run all benchmarks and plot results.""" sizes = [10, 100, 1000, 10000, 100000, 1000000] - vector_types = ["LVec", "Vector2D", "Vector3D", "Vector"] + vector_types = ["LVec", "Vector2D", "Vector3D", "Scikit Vector"] # Arithmetic operations arith_results = {vtype: [] for vtype in vector_types} @@ -447,21 +364,20 @@ def run_benchmarks(): print(f" Speedup: {res['uncached']['time']/res['cached']['time']:.2f}x") # Plot results - arith_ops = ["addition", "subtraction", "scalar_mul", "dot_product", "cross_product"] - derived_props = ["mass", "pt", "eta", "phi", "magnitude", "theta", "rho"] + # Remove operations that don't have Scikit Vector equivalents + arith_ops = ["addition", "subtraction", "scalar_mul"] # Removed dot_product and cross_product + derived_props = ["mass", "pt", "eta", "phi"] # Removed magnitude, theta, rho - plot_arithmetic_results(sizes, arith_results, vector_types, arith_ops, "benchmarks/plots/benchmark_arithmetic.pdf") - plot_derived_results(sizes, derived_results, vector_types, derived_props, "benchmarks/plots/benchmark_derived.pdf") - plot_caching_results(sizes, cache_results, "benchmarks/plots/benchmark_caching.pdf") + plot_arithmetic_results(sizes, arith_results, vector_types, arith_ops, "benchmark_arithmetic.pdf") + plot_derived_results(sizes, derived_results, vector_types, derived_props, "benchmark_derived.pdf") + plot_caching_results(sizes, cache_results, "benchmark_caching.pdf") print("\nBenchmarks completed. Plots saved to:") - print(" - benchmarks/plots/benchmark_arithmetic.pdf") - print(" - benchmarks/plots/benchmark_derived.pdf") - print(" - benchmarks/plots/benchmark_caching.pdf") + print(" - plots/benchmark_arithmetic.pdf") + print(" - plots/benchmark_derived.pdf") + print(" - plots/benchmark_caching.pdf") if __name__ == "__main__": # Create plots directory if it doesn't exist - import os - os.makedirs("benchmarks/plots", exist_ok=True) - - run_benchmarks() + os.makedirs("plots", exist_ok=True) + run_benchmarks() \ No newline at end of file diff --git a/benchmarks/benchmark_initialization.py b/benchmarks/benchmark_initialization.py index 0fea145..bbc5695 100644 --- a/benchmarks/benchmark_initialization.py +++ b/benchmarks/benchmark_initialization.py @@ -1,11 +1,13 @@ import numpy as np import timeit import matplotlib.pyplot as plt -from lvec import LVec -import vector import tracemalloc import gc import time +import os +from lvec import LVec +import vector +from plotting_utils import plot_combined_performance, set_publication_style, COLORS def measure_memory_usage(operation, n_repeats=5): """Measure memory usage for an operation.""" @@ -39,7 +41,7 @@ def measure_initialization_time(init_function, n_repeats=5, number=10): def benchmark_initialization_overhead(sizes, n_repeats=5): """ - Benchmark initialization overhead between LVec and vector package. + Benchmark initialization overhead between lvec and vector package. Parameters: ----------- @@ -63,7 +65,7 @@ def benchmark_initialization_overhead(sizes, n_repeats=5): print(f"\nBenchmarking initialization with {size:,} vectors:") px, py, pz, E = generate_test_data(size) - # Benchmark LVec initialization + # Benchmark lvec initialization def lvec_init(): return LVec(px, py, pz, E) @@ -88,10 +90,10 @@ def vector_init(): vector_memory.append(vector_mem) print(f" Results for {size:,} vectors:") - print(f" LVec: {lvec_mean*1000:.3f} ± {lvec_std*1000:.3f} ms, {lvec_mem:.2f} MB") - print(f" Vector: {vector_mean*1000:.3f} ± {vector_std*1000:.3f} ms, {vector_mem:.2f} MB") - print(f" Speed Ratio: {vector_mean/lvec_mean:.2f}x faster with LVec") - print(f" Memory Ratio: {vector_mem/lvec_mem:.2f}x more memory efficient with LVec") + print(f" lvec: {lvec_mean*1000:.3f} ± {lvec_std*1000:.3f} ms, {lvec_mem:.2f} MB") + print(f" vector: {vector_mean*1000:.3f} ± {vector_std*1000:.3f} ms, {vector_mem:.2f} MB") + print(f" Speed Ratio: {vector_mean/lvec_mean:.2f}x faster with lvec") + print(f" Memory Ratio: {vector_mem/lvec_mem:.2f}x more memory efficient with lvec") return (np.array(lvec_times), np.array(lvec_errors), np.array(vector_times), np.array(vector_errors), @@ -124,9 +126,9 @@ def benchmark_cached_initialization(): px, py, pz, E = generate_test_data(size) - # Measure LVec repeated initialization + # Measure lvec repeated initialization lvec_times = [] - print(" Measuring LVec repeated initialization...") + print(" Measuring lvec repeated initialization...") for i in range(repeats): start = time.time() vec = LVec(px, py, pz, E) @@ -145,85 +147,50 @@ def benchmark_cached_initialization(): print(f" Iteration {i+1}: {vector_times[-1]:.3f} ms") # Plot results + set_publication_style() plt.figure(figsize=(10, 6)) - plt.plot(range(1, repeats+1), lvec_times, 'o-', label='LVec', color='#3498db', linewidth=2) - plt.plot(range(1, repeats+1), vector_times, 'o-', label='vector', color='#9b59b6', linewidth=2) + plt.plot(range(1, repeats+1), lvec_times, 'o-', label='lvec', color=COLORS['lvec'], linewidth=2) + plt.plot(range(1, repeats+1), vector_times, 'o-', label='vector', color=COLORS['vector'], linewidth=2) plt.xlabel('Initialization Iteration', fontsize=12) plt.ylabel('Time (ms)', fontsize=12) plt.title('Repeated Initialization Performance (Caching Effects)', fontsize=14) plt.grid(True, linestyle='--', alpha=0.7) plt.legend(fontsize=12) - plt.savefig('benchmarks/plots/cached_initialization_benchmark.pdf', dpi=300, bbox_inches='tight') + plt.savefig(os.path.join('benchmarks/plots', 'cached_initialization_benchmark.pdf'), bbox_inches='tight') plt.close() return lvec_times, vector_times -def plot_results(sizes, lvec_data, vector_data, title="LVec vs vector Initialization Overhead"): - """Plot benchmark results.""" +def plot_results(sizes, lvec_data, vector_data, title="lvec vs vector Initialization Overhead"): + """Plot benchmark results using standardized plotting utilities.""" lvec_times, lvec_errors, lvec_memory = lvec_data vector_times, vector_errors, vector_memory = vector_data - # Convert to milliseconds - lvec_times *= 1000 - vector_times *= 1000 - - # Create figure with two subplots - plt.style.use('default') - fig = plt.figure(figsize=(12, 10)) - gs = fig.add_gridspec(3, 1, height_ratios=[1, 1, 1], hspace=0.3) - - # Upper plot: timing comparison - ax1 = fig.add_subplot(gs[0]) - ax1.plot(sizes, lvec_times, 'o-', label='LVec', color='#3498db', linewidth=2, markersize=8) - ax1.plot(sizes, vector_times, 'o-', label='vector', color='#9b59b6', linewidth=2, markersize=8) - ax1.set_xscale('log') - ax1.set_yscale('log') - ax1.set_ylabel('Initialization Time (ms)', fontsize=12) - ax1.set_title(title, fontsize=14, pad=15) - ax1.grid(True, which='both', linestyle='--', alpha=0.7) - ax1.legend(fontsize=12) - ax1.tick_params(labelsize=10) - - # Middle plot: memory usage - ax2 = fig.add_subplot(gs[1]) - ax2.plot(sizes, lvec_memory, 'o-', label='LVec', color='#2ecc71', linewidth=2, markersize=8) - ax2.plot(sizes, vector_memory, 'o-', label='vector', color='#e74c3c', linewidth=2, markersize=8) - ax2.set_xscale('log') - ax2.set_yscale('log') - ax2.set_ylabel('Memory Usage (MB)', fontsize=12) - ax2.grid(True, which='both', linestyle='--', alpha=0.7) - ax2.legend(fontsize=12) - ax2.tick_params(labelsize=10) - - # Bottom plot: performance ratio - ax3 = fig.add_subplot(gs[2]) - ax3.plot(sizes, vector_times / lvec_times, 'o-', label='Time Ratio (vector/LVec)', - color='#f39c12', linewidth=2, markersize=8) - ax3.plot(sizes, vector_memory / lvec_memory, 'o-', label='Memory Ratio (vector/LVec)', - color='#16a085', linewidth=2, markersize=8) - ax3.set_xscale('log') - ax3.axhline(y=1.0, color='gray', linestyle='--', alpha=0.7) - ax3.set_xlabel('Number of Vectors', fontsize=12) - ax3.set_ylabel('Ratio (vector/LVec)', fontsize=12) - ax3.grid(True, which='both', linestyle='--', alpha=0.7) - ax3.legend(fontsize=12) - ax3.tick_params(labelsize=10) - - # Add minor gridlines - ax1.grid(True, which='minor', linestyle=':', alpha=0.4) - ax2.grid(True, which='minor', linestyle=':', alpha=0.4) - ax3.grid(True, which='minor', linestyle=':', alpha=0.4) - - plt.savefig('benchmarks/plots/initialization_benchmark_results.pdf', dpi=300, bbox_inches='tight') - plt.close() + plot_combined_performance( + sizes, + lvec_times, + vector_times, + lvec_memory, + vector_memory, + title=title, + filename='initialization_benchmark.pdf' + ) if __name__ == '__main__': - print("=== LVec vs vector Initialization Overhead Benchmark ===") + print("=== lvec vs vector Initialization Overhead Benchmark ===") + + # Create plots directory if it doesn't exist + os.makedirs("benchmarks/plots", exist_ok=True) # Run main benchmarks - batch_results = benchmark_batch_sizes() + sizes = [10, 100, 1000, 10000, 100000, 1000000] + results = benchmark_initialization_overhead(sizes) + plot_results(sizes, + (results[0], results[1], results[4]), + (results[2], results[3], results[5])) - # Run the cached initialization benchmark - cache_results = benchmark_cached_initialization() + # Run additional benchmarks + batch_results = benchmark_batch_sizes() + cached_results = benchmark_cached_initialization() print("\nBenchmark completed. Results saved to PDF files.") diff --git a/benchmarks/benchmark_lhcb.py b/benchmarks/benchmark_lhcb.py index 63289a4..45b7633 100644 --- a/benchmarks/benchmark_lhcb.py +++ b/benchmarks/benchmark_lhcb.py @@ -38,14 +38,41 @@ # Create plots directory if it doesn't exist os.makedirs(PLOTS_DIR, exist_ok=True) -# Set LHCb style -plt.style.use('seaborn-v0_8-whitegrid') -plt.rcParams['font.family'] = 'serif' -plt.rcParams['font.serif'] = 'Times New Roman' -plt.rcParams['font.size'] = 12 -plt.rcParams['axes.labelsize'] = 14 -plt.rcParams['axes.titlesize'] = 16 -plt.rcParams['figure.figsize'] = (12, 8) +# Set modern scientific plotting style +plt.style.use('default') +plt.rcParams.update({ + 'font.family': 'sans-serif', + 'font.sans-serif': ['Helvetica', 'Arial', 'DejaVu Sans'], + 'font.size': 11, + 'axes.labelsize': 12, + 'axes.titlesize': 14, + 'xtick.labelsize': 10, + 'ytick.labelsize': 10, + 'legend.fontsize': 10, + 'figure.figsize': (10, 7), + 'figure.dpi': 150, + 'savefig.dpi': 300, + 'savefig.format': 'pdf', + 'axes.grid': False, + 'lines.linewidth': 2, + 'lines.markersize': 6, + 'axes.spines.top': False, + 'axes.spines.right': False, + 'axes.linewidth': 1.2, + 'xtick.major.width': 1.2, + 'ytick.major.width': 1.2 +}) + +# Define professional color palette +COLORS = { + 'vector': '#4472C4', # Microsoft blue + 'lvec': '#70AD47', # Microsoft green + 'highlight': '#ED7D31', # Microsoft orange + 'accent1': '#5B9BD5', # Light blue + 'accent2': '#FFC000', # Gold + 'gray': '#7F7F7F', # Gray + 'background': '#F2F2F2' # Light gray background +} class Timer: """Simple context manager for timing code blocks.""" @@ -61,13 +88,6 @@ def __exit__(self, *args): self.interval = self.end - self.start print(f"{self.name} took {self.interval:.6f} seconds") -def add_lhcb_label(ax, x=0.85, y=0.85): - """Add the LHCb label to the plot""" - ax.text(x, y, "LHCb", fontname="Times New Roman", - fontsize=16, transform=ax.transAxes, - bbox=dict(facecolor='white', alpha=0.8, edgecolor='none')) - -# Download the ROOT file if not present def download_data(): url = "https://opendata.cern.ch/record/4900/files/B2HHH_MagnetDown.root" filename = "B2HHH_MagnetDown.root" @@ -222,99 +242,274 @@ def calculate_energy(p3, mass): def plot_performance_comparison(vector_time, lvec_time, memory_vector, memory_lvec, iterations): """Plot performance comparison between Vector and LVec implementations.""" - fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6)) + # Calculate speedup and memory reduction + speedup = vector_time / lvec_time if lvec_time > 0 else float('inf') + mem_reduction = (memory_vector - memory_lvec) / memory_vector * 100 if memory_vector > memory_lvec else 0 + + # Create figure with a light gray background + fig = plt.figure(figsize=(12, 8)) + fig.patch.set_facecolor(COLORS['background']) + + # Create a 2x2 grid for the plots + gs = fig.add_gridspec(2, 2, height_ratios=[1, 1], width_ratios=[2, 1], + hspace=0.3, wspace=0.3) - # Time comparison - methods = ['Scikit-HEP Vector', 'LVec'] + # Time comparison - horizontal bar chart (more impactful) + ax1 = fig.add_subplot(gs[0, 0]) + ax1.patch.set_facecolor('white') + + # Create horizontal bar chart + methods = ['Scikit-HEP\nVector', 'LVec'] times = [vector_time, lvec_time] - colors = ['#1f77b4', '#2ca02c'] + colors = [COLORS['vector'], COLORS['lvec']] + + y_pos = np.arange(len(methods)) + ax1.barh(y_pos, times, color=colors, height=0.5, + edgecolor='white', linewidth=0.5) - ax1.bar(methods, times, color=colors) - ax1.set_ylabel('Time (seconds)') - ax1.set_title(f'Execution Time ({iterations} iterations)') + # Add value labels inside bars for i, v in enumerate(times): - ax1.text(i, v + 0.01, f"{v:.3f}s", ha='center') + ax1.text(v/2, i, f"{v:.3f}s", + ha='center', va='center', color='white', fontweight='bold') - # Speedup calculation - speedup = vector_time / lvec_time if lvec_time > 0 else float('inf') - speedup_text = f"Speedup: {speedup:.2f}x" if speedup >= 1 else f"Slowdown: {1/speedup:.2f}x" - ax1.text(0.5, 0.9, speedup_text, - transform=ax1.transAxes, ha='center', - bbox=dict(facecolor='white', alpha=0.8, edgecolor='gray')) + # Customize time plot + ax1.set_yticks(y_pos) + ax1.set_yticklabels(methods, fontweight='bold') + ax1.set_xlabel('Execution Time (seconds)', fontweight='bold') + ax1.set_title('Execution Time Comparison', fontweight='bold', pad=15) + ax1.invert_yaxis() # Puts Vector at the top + + # Memory comparison - horizontal bar chart + ax2 = fig.add_subplot(gs[1, 0]) + ax2.patch.set_facecolor('white') - # Memory comparison memory_usage = [memory_vector, memory_lvec] - ax2.bar(methods, memory_usage, color=colors) - ax2.set_ylabel('Memory Usage (MB)') - ax2.set_title('Peak Memory Usage') - for i, v in enumerate(memory_usage): - ax2.text(i, v + 0.5, f"{v:.1f} MB", ha='center') + ax2.barh(y_pos, memory_usage, color=colors, height=0.5, + edgecolor='white', linewidth=0.5) - # Memory savings - if memory_vector > memory_lvec: - mem_reduction = (memory_vector - memory_lvec) / memory_vector * 100 - mem_text = f"Memory reduction: {mem_reduction:.1f}%" + # Add value labels inside bars + for i, v in enumerate(memory_usage): + ax2.text(v/2, i, f"{v:.1f} MB", + ha='center', va='center', color='white', fontweight='bold') + + # Customize memory plot + ax2.set_yticks(y_pos) + ax2.set_yticklabels(methods, fontweight='bold') + ax2.set_xlabel('Peak Memory Usage (MB)', fontweight='bold') + ax2.set_title('Memory Usage Comparison', fontweight='bold', pad=15) + ax2.invert_yaxis() # Puts Vector at the top + + # Speedup visualization - gauge chart + ax3 = fig.add_subplot(gs[0, 1], polar=True) + ax3.patch.set_facecolor('white') + + # Create gauge chart for speedup + if speedup >= 1: + # Normalize speedup to a 0-100 scale for gauge + # Cap at 3x for visualization purposes + norm_speedup = min(speedup, 3) / 3 * 100 + gauge_color = COLORS['lvec'] + speedup_text = f"{speedup:.2f}×\nfaster" + else: + norm_speedup = min(1/speedup, 3) / 3 * 100 + gauge_color = COLORS['vector'] + speedup_text = f"{1/speedup:.2f}×\nslower" + + # Background ring (gray) + ax3.barh(0, 100, left=0, height=0.6, color=COLORS['gray'], alpha=0.3) + # Foreground ring (colored) + ax3.barh(0, norm_speedup, left=0, height=0.6, color=gauge_color) + + # Remove ticks and labels + ax3.set_xticks([]) + ax3.set_yticks([]) + ax3.set_theta_zero_location('N') + ax3.set_theta_direction(-1) # Clockwise + + # Set limits for semi-circle + ax3.set_thetamin(0) + ax3.set_thetamax(180) + + # Add text in the middle + ax3.text(0, -0.2, speedup_text, ha='center', va='center', + fontsize=14, fontweight='bold', color='black') + + # Add title + ax3.set_title('Speed Comparison', fontweight='bold', pad=15) + + # Memory savings visualization - gauge chart + ax4 = fig.add_subplot(gs[1, 1], polar=True) + ax4.patch.set_facecolor('white') + + # Create gauge chart for memory savings + if mem_reduction > 0: + # Normalize memory reduction to a 0-100 scale for gauge + # Cap at 50% for visualization purposes + norm_mem = min(mem_reduction, 50) / 50 * 100 + mem_color = COLORS['lvec'] + mem_text = f"{mem_reduction:.1f}%\nless memory" else: mem_increase = (memory_lvec - memory_vector) / memory_vector * 100 - mem_text = f"Memory increase: {mem_increase:.1f}%" + norm_mem = min(mem_increase, 50) / 50 * 100 + mem_color = COLORS['vector'] + mem_text = f"{mem_increase:.1f}%\nmore memory" + + # Background ring (gray) + ax4.barh(0, 100, left=0, height=0.6, color=COLORS['gray'], alpha=0.3) + # Foreground ring (colored) + ax4.barh(0, norm_mem, left=0, height=0.6, color=mem_color) + + # Remove ticks and labels + ax4.set_xticks([]) + ax4.set_yticks([]) + ax4.set_theta_zero_location('N') + ax4.set_theta_direction(-1) # Clockwise + + # Set limits for semi-circle + ax4.set_thetamin(0) + ax4.set_thetamax(180) - ax2.text(0.5, 0.9, mem_text, - transform=ax2.transAxes, ha='center', - bbox=dict(facecolor='white', alpha=0.8, edgecolor='gray')) + # Add text in the middle + ax4.text(0, -0.2, mem_text, ha='center', va='center', + fontsize=14, fontweight='bold', color='black') - plt.tight_layout() + # Add title + ax4.set_title('Memory Comparison', fontweight='bold', pad=15) - # Save plot to plots directory + # Add main title + fig.suptitle(f'LVec Performance Benchmark ({iterations} iterations)', + fontsize=16, fontweight='bold', y=0.98) + + # Add benchmark details as a footer + plt.figtext(0.5, 0.01, + f"Benchmark: B→hhh decay analysis | Date: {time.strftime('%Y-%m-%d')}", + ha="center", fontsize=9, fontstyle='italic') + + plt.tight_layout(rect=[0, 0.02, 1, 0.95]) + + # Save plot in PDF format plot_path = os.path.join(PLOTS_DIR, "lvec_benchmark_results.pdf") - plt.savefig(plot_path, bbox_inches='tight') + plt.savefig(plot_path, bbox_inches='tight', dpi=300, facecolor=fig.get_facecolor()) + print(f"Performance comparison plot saved as '{plot_path}'") -def plot_mass_comparison(vector_results, lvec_results): - """Plot mass distribution comparison to verify both methods produce the same physics results.""" - fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(18, 6)) - - # Common histogram parameters - bins = np.linspace(0, 5.5, 100) +def plot_physics_results(vector_results, lvec_results): + """Plot comprehensive physics results comparison.""" + # Create figure with a light gray background + fig = plt.figure(figsize=(12, 10)) + fig.patch.set_facecolor(COLORS['background']) + + # Create a 2x2 grid for the plots + gs = fig.add_gridspec(2, 2, height_ratios=[1, 1], width_ratios=[1, 1], + hspace=0.3, wspace=0.3) + + # Function to create a physics plot with inset ratio + def create_physics_plot(ax, vector_data, lvec_data, title, xlabel, bins=100, range_min=0, range_max=5.5): + ax.patch.set_facecolor('white') + + # Create bins + bins = np.linspace(range_min, range_max, bins) + bin_width = bins[1] - bins[0] + bin_centers = (bins[:-1] + bins[1:]) / 2 + + # Calculate histograms + vector_hist, _ = np.histogram(vector_data, bins=bins) + lvec_hist, _ = np.histogram(lvec_data, bins=bins) + + # Plot histograms + ax.hist(vector_data, bins=bins, label='Vector', histtype='step', + linewidth=2, color=COLORS['vector'], alpha=0.9) + ax.hist(lvec_data, bins=bins, label='LVec', histtype='step', + linewidth=2, color=COLORS['lvec'], alpha=0.9) + + # Add shaded area under curves + ax.hist(vector_data, bins=bins, histtype='stepfilled', + linewidth=0, color=COLORS['vector'], alpha=0.1) + ax.hist(lvec_data, bins=bins, histtype='stepfilled', + linewidth=0, color=COLORS['lvec'], alpha=0.1) + + # Calculate ratio for inset ratio plot + with np.errstate(divide='ignore', invalid='ignore'): + ratio = np.divide(lvec_hist, vector_hist) + ratio[~np.isfinite(ratio)] = 1.0 # Replace inf/NaN with 1.0 + + # Create inset axis for ratio + inset_ax = ax.inset_axes([0.6, 0.02, 0.38, 0.25]) + inset_ax.patch.set_facecolor('white') + inset_ax.plot(bin_centers, ratio, '-', linewidth=1.5, color=COLORS['highlight']) + inset_ax.axhline(y=1.0, color=COLORS['gray'], linestyle='--', alpha=0.7, linewidth=1) + + # Set y-range for ratio plot + inset_ax.set_ylim(0.95, 1.05) + inset_ax.set_ylabel('LVec/Vector', fontsize=8) + inset_ax.tick_params(axis='both', which='major', labelsize=7) + + # Remove spines + for spine in ['top', 'right']: + inset_ax.spines[spine].set_visible(False) + + # Formatting main plot + ax.set_xlabel(xlabel, fontweight='bold') + ax.set_ylabel(f'Candidates / {bin_width:.3f} GeV/c²', fontweight='bold') + ax.set_title(title, fontweight='bold', pad=10) + ax.legend(frameon=True, fancybox=True, framealpha=0.7, loc='upper right') + + # Add main title + fig.suptitle('Physics Results Comparison: Vector vs LVec', + fontsize=16, fontweight='bold', y=0.98) # Plot m12 distribution - ax1.hist(vector_results["m12"], bins=bins, label='Vector', histtype='step', - linewidth=2, color='blue', alpha=0.7) - ax1.hist(lvec_results["m12"], bins=bins, label='LVec', histtype='step', - linewidth=2, color='green', alpha=0.7) - ax1.set_xlabel('m(h1,h2) [GeV]') - ax1.set_ylabel('Candidates / 55 MeV') - add_lhcb_label(ax1) - ax1.legend() + ax1 = fig.add_subplot(gs[0, 0]) + create_physics_plot( + ax1, + vector_results["m12"], lvec_results["m12"], + 'Two-Body Mass m(h1,h2)', 'm(h1,h2) [GeV/c²]' + ) # Plot m23 distribution - ax2.hist(vector_results["m23"], bins=bins, label='Vector', histtype='step', - linewidth=2, color='blue', alpha=0.7) - ax2.hist(lvec_results["m23"], bins=bins, label='LVec', histtype='step', - linewidth=2, color='green', alpha=0.7) - ax2.set_xlabel('m(h2,h3) [GeV]') - ax2.set_ylabel('Candidates / 55 MeV') - add_lhcb_label(ax2) - ax2.legend() + ax2 = fig.add_subplot(gs[0, 1]) + create_physics_plot( + ax2, + vector_results["m23"], lvec_results["m23"], + 'Two-Body Mass m(h2,h3)', 'm(h2,h3) [GeV/c²]' + ) # Plot m13 distribution - ax3.hist(vector_results["m13"], bins=bins, label='Vector', histtype='step', - linewidth=2, color='blue', alpha=0.7) - ax3.hist(lvec_results["m13"], bins=bins, label='LVec', histtype='step', - linewidth=2, color='green', alpha=0.7) - ax3.set_xlabel('m(h1,h3) [GeV]') - ax3.set_ylabel('Candidates / 55 MeV') - add_lhcb_label(ax3) - ax3.legend() - - # Add common title - fig.suptitle('Physics Results Comparison: Vector vs LVec', y=1.02) - - # Adjust layout and save - plt.tight_layout() - - # Save plot to plots directory + ax3 = fig.add_subplot(gs[1, 0]) + create_physics_plot( + ax3, + vector_results["m13"], lvec_results["m13"], + 'Two-Body Mass m(h1,h3)', 'm(h1,h3) [GeV/c²]' + ) + + # Plot three-body mass distribution + ax4 = fig.add_subplot(gs[1, 1]) + create_physics_plot( + ax4, + vector_results["three_body_mass"], lvec_results["three_body_mass"], + 'Three-Body Mass m(h1,h2,h3)', 'm(h1,h2,h3) [GeV/c²]', + bins=80, range_min=4.5, range_max=6.0 + ) + + # Highlight B meson mass region in three-body plot + b_mass = 5.279 # GeV/c² + b_width = 0.1 # Approximate width to highlight + ax4.axvspan(b_mass - b_width, b_mass + b_width, alpha=0.2, color=COLORS['highlight']) + ax4.axvline(b_mass, color=COLORS['highlight'], linestyle='--', alpha=0.7) + ax4.text(b_mass, ax4.get_ylim()[1]*0.95, 'B⁰', ha='center', va='top', + color=COLORS['highlight'], fontweight='bold') + + # Add benchmark details as a footer + plt.figtext(0.5, 0.01, + f"B→hhh decay analysis | Date: {time.strftime('%Y-%m-%d')}", + ha="center", fontsize=9, fontstyle='italic') + + plt.tight_layout(rect=[0, 0.02, 1, 0.95]) + + # Save plot in PDF format plot_path = os.path.join(PLOTS_DIR, "physics_comparison.pdf") - plt.savefig(plot_path, bbox_inches='tight') + plt.savefig(plot_path, bbox_inches='tight', dpi=300, facecolor=fig.get_facecolor()) + print(f"Physics comparison plot saved as '{plot_path}'") def run_benchmark(iterations=10): @@ -364,7 +559,7 @@ def run_lvec(): # Create visualizations print("\n5. Creating performance comparison plots...") plot_performance_comparison(avg_time_vector, avg_time_lvec, memory_vector, memory_lvec, iterations) - plot_mass_comparison(results_vector, results_lvec) + plot_physics_results(results_vector, results_lvec) # Print summary print("\n===== BENCHMARK SUMMARY =====") diff --git a/benchmarks/benchmark_lorentz_boost.py b/benchmarks/benchmark_lorentz_boost.py index b951330..78e9e36 100644 --- a/benchmarks/benchmark_lorentz_boost.py +++ b/benchmarks/benchmark_lorentz_boost.py @@ -2,9 +2,9 @@ # -*- coding: utf-8 -*- """ -Benchmark for Lorentz boost operations in LVec. +Benchmark for Lorentz boost operations in lvec. This benchmark compares the performance of axis-specific boosts vs general boosts -between LVec and the vector package, with focus on backend optimizations. +between lvec and the vector package, with focus on backend optimizations. """ import numpy as np @@ -17,6 +17,7 @@ import os from lvec import LVec import vector # Comparison library +from plotting_utils import set_publication_style, COLORS def measure_memory_usage(operation, n_repeats=5): """Measure memory usage for an operation.""" @@ -55,7 +56,7 @@ def benchmark_lorentz_boost(size, n_repeats=10): # Generate test data px, py, pz, E = generate_test_data(size) - # Create LVec and vector objects + # Create lvec and vector objects lvec = LVec(px, py, pz, E) vec = vector.arr({"px": px, "py": py, "pz": pz, "E": E}) @@ -69,32 +70,32 @@ def benchmark_lorentz_boost(size, n_repeats=10): operations = { # X-axis boost operations 'boostx': ( - lambda: lvec.boost(beta_x, 0.0, 0.0), # LVec X-axis boost using general method + lambda: lvec.boost(beta_x, 0.0, 0.0), # lvec X-axis boost using general method lambda: vec.boostX(beta_x) # vector X-axis boost using specialized method ), # Y-axis boost operations 'boosty': ( - lambda: lvec.boost(0.0, beta_y, 0.0), # LVec Y-axis boost using general method + lambda: lvec.boost(0.0, beta_y, 0.0), # lvec Y-axis boost using general method lambda: vec.boostY(beta_y) # vector Y-axis boost using specialized method ), # Z-axis boost operations 'boostz': ( - lambda: lvec.boostz(beta_z), # LVec Z-axis boost using specialized method + lambda: lvec.boostz(beta_z), # lvec Z-axis boost using specialized method lambda: vec.boostZ(beta_z) # vector Z-axis boost using specialized method ), # General 3D boost operations 'boost_3d': ( - lambda: lvec.boost(0.2, 0.2, 0.2), # LVec general 3D boost + lambda: lvec.boost(0.2, 0.2, 0.2), # lvec general 3D boost lambda: vec.boost(boost_vec3d) # vector general 3D boost with Vector3D object ), - # Z-axis boost using general method vs specialized method (LVec only) + # Z-axis boost using general method vs specialized method (lvec only) 'lvec_boostz_comparison': ( - lambda: lvec.boostz(0.4), # LVec specialized Z-axis boost - lambda: lvec.boost(0.0, 0.0, 0.4) # LVec general boost method for Z-axis + lambda: lvec.boostz(0.4), # lvec specialized Z-axis boost + lambda: lvec.boost(0.0, 0.0, 0.4) # lvec general boost method for Z-axis ), } @@ -126,7 +127,7 @@ def benchmark_lorentz_boost(size, n_repeats=10): def plot_boost_time_comparison(sizes, all_results, operations, save_path=None): """Plot timing comparison for boost operations.""" - plt.style.use('default') + set_publication_style() fig, axes = plt.subplots(2, 2, figsize=(14, 12)) axes = axes.flatten() @@ -142,10 +143,10 @@ def plot_boost_time_comparison(sizes, all_results, operations, save_path=None): vector_errors = np.array([r[op]['vector']['error'] for r in all_results]) * 1000 # Plot time comparison - ax.errorbar(sizes, lvec_times, yerr=lvec_errors, fmt='o-', label='LVec', - color='#3498db', linewidth=2, markersize=8, capsize=4) + ax.errorbar(sizes, lvec_times, yerr=lvec_errors, fmt='o-', label='lvec', + color=COLORS['lvec'], linewidth=2, markersize=8, capsize=4) ax.errorbar(sizes, vector_times, yerr=vector_errors, fmt='o-', label='vector', - color='#e74c3c', linewidth=2, markersize=8, capsize=4) + color=COLORS['vector'], linewidth=2, markersize=8, capsize=4) # Calculate speedup ratio speedup = vector_times / lvec_times @@ -169,13 +170,13 @@ def plot_boost_time_comparison(sizes, all_results, operations, save_path=None): plt.tight_layout(rect=[0, 0, 1, 0.97]) if save_path: - plt.savefig(save_path, dpi=300, bbox_inches='tight') + plt.savefig(save_path, bbox_inches='tight') return fig def plot_lvec_z_boost_comparison(sizes, all_results, save_path=None): - """Plot comparison between LVec's specialized boostz and general boost methods.""" - plt.style.use('default') + """Plot comparison between lvec's specialized boostz and general boost methods.""" + set_publication_style() fig, ax = plt.subplots(figsize=(10, 6)) # Extract timing data (convert to milliseconds) @@ -188,10 +189,10 @@ def plot_lvec_z_boost_comparison(sizes, all_results, save_path=None): # Plot time comparison ax.errorbar(sizes, specialized_times, yerr=specialized_errors, fmt='o-', - label='Specialized boostz()', color='#3498db', + label='Specialized boostz()', color=COLORS['lvec'], linewidth=2, markersize=8, capsize=4) ax.errorbar(sizes, general_times, yerr=general_errors, fmt='o-', - label='General boost(0,0,β)', color='#9b59b6', + label='General boost(0,0,β)', color=COLORS['vector'], linewidth=2, markersize=8, capsize=4) # Calculate speedup ratio @@ -203,7 +204,7 @@ def plot_lvec_z_boost_comparison(sizes, all_results, save_path=None): bbox=dict(facecolor='white', alpha=0.7)) # Customize plot - ax.set_title('LVec: Specialized vs General Z-Boost Methods') + ax.set_title('Comparison of Z-Boost Methods in lvec') ax.set_xlabel('Array Size') ax.set_ylabel('Time (ms)') ax.set_xscale('log') @@ -211,96 +212,97 @@ def plot_lvec_z_boost_comparison(sizes, all_results, save_path=None): ax.grid(True, alpha=0.3) ax.legend() - plt.tight_layout() - if save_path: - plt.savefig(save_path, dpi=300, bbox_inches='tight') + plt.savefig(save_path, bbox_inches='tight') return fig def plot_memory_usage(sizes, all_results, operations, save_path=None): """Plot memory usage comparison for boost operations.""" - plt.style.use('default') - fig, ax = plt.subplots(figsize=(10, 6)) - - colors = ['#3498db', '#e74c3c', '#2ecc71', '#f39c12', '#9b59b6'] + set_publication_style() + fig, axes = plt.subplots(2, 2, figsize=(14, 12)) + axes = axes.flatten() for i, op in enumerate(operations[:4]): # First 4 operations for package comparison - # Extract memory data (in MB) + ax = axes[i] + + # Extract memory data lvec_memory = np.array([r[op]['lvec']['memory'] for r in all_results]) vector_memory = np.array([r[op]['vector']['memory'] for r in all_results]) + # Plot memory comparison + ax.plot(sizes, lvec_memory, 'o-', label='lvec', + color=COLORS['lvec'], linewidth=2, markersize=8) + ax.plot(sizes, vector_memory, 'o-', label='vector', + color=COLORS['vector'], linewidth=2, markersize=8) + # Calculate memory ratio - memory_ratio = lvec_memory / vector_memory + memory_ratio = vector_memory / lvec_memory - # Plot memory ratio (values < 1 mean LVec uses less memory) - ax.plot(sizes, memory_ratio, 'o-', label=op, - color=colors[i], linewidth=2, markersize=8) - - # Add horizontal line at ratio = 1 (equal memory usage) - ax.axhline(y=1.0, color='gray', linestyle='--', alpha=0.7) - - # Customize plot - ax.set_title('Memory Usage Ratio (LVec / vector)') - ax.set_xlabel('Array Size') - ax.set_ylabel('Memory Ratio') - ax.set_xscale('log') - ax.grid(True, alpha=0.3) - ax.legend() - - # Add explanation text - ax.text(0.02, 0.05, 'Values < 1: LVec uses less memory\nValues > 1: vector uses less memory', - transform=ax.transAxes, fontsize=10, - bbox=dict(facecolor='white', alpha=0.7)) + # Add ratio text for largest size + ax.text(0.7, 0.05, f'Memory Ratio: {memory_ratio[-1]:.2f}x', + transform=ax.transAxes, fontsize=12, + bbox=dict(facecolor='white', alpha=0.7)) + + # Customize plot + ax.set_title(f'{op.upper()} Operation') + ax.set_xlabel('Array Size') + ax.set_ylabel('Memory Usage (MB)') + ax.set_xscale('log') + ax.set_yscale('log') + ax.grid(True, alpha=0.3) + ax.legend() - plt.tight_layout() + # Add overall title + fig.suptitle('Memory Usage Comparison for Lorentz Boost Operations', fontsize=16) + plt.tight_layout(rect=[0, 0, 1, 0.97]) if save_path: - plt.savefig(save_path, dpi=300, bbox_inches='tight') + plt.savefig(save_path, bbox_inches='tight') return fig def run_benchmarks(): """Run all benchmarks and plot results.""" - # Array sizes to benchmark - sizes = [10, 100, 1000, 10000, 100000] + # Create plots directory if it doesn't exist + os.makedirs("benchmarks/plots", exist_ok=True) + + # Define sizes to benchmark + sizes = [1000, 10000, 100000, 1000000] operations = ['boostx', 'boosty', 'boostz', 'boost_3d', 'lvec_boostz_comparison'] - # Store results for each operation and size + # Run benchmarks for all sizes all_results = [] - - # Run benchmarks for size in sizes: - print(f"\nBenchmarking Lorentz boost operations with array size: {size:,}") + print(f"\nBenchmarking with {size:,} vectors") results = benchmark_lorentz_boost(size) all_results.append(results) - # Print some summary statistics + # Print results for op in operations: - if op in results: - lvec_time = results[op]['lvec']['time'] * 1000 # ms - vector_time = results[op]['vector']['time'] * 1000 # ms - speedup = vector_time / lvec_time - print(f" {op:20s} - LVec: {lvec_time:.3f} ms, vector: {vector_time:.3f} ms, Speedup: {speedup:.2f}x") - - # Create plots directory if it doesn't exist - os.makedirs("benchmarks/plots", exist_ok=True) + lvec_time = results[op]['lvec']['time'] * 1000 # Convert to ms + vector_time = results[op]['vector']['time'] * 1000 + lvec_mem = results[op]['lvec']['memory'] + vector_mem = results[op]['vector']['memory'] + + print(f" {op.upper()} Operation:") + print(f" lvec: {lvec_time:.3f} ms, {lvec_mem:.2f} MB") + print(f" vector: {vector_time:.3f} ms, {vector_mem:.2f} MB") + print(f" Speed Ratio: {vector_time/lvec_time:.2f}x faster with lvec") + print(f" Memory Ratio: {vector_mem/lvec_mem:.2f}x more memory efficient with lvec") # Plot results plot_boost_time_comparison(sizes, all_results, operations, - save_path="benchmarks/plots/lorentz_boost_time_comparison.pdf") + save_path=os.path.join("benchmarks/plots", "lorentz_boost_time_comparison.pdf")) + plot_lvec_z_boost_comparison(sizes, all_results, - save_path="benchmarks/plots/lvec_z_boost_methods_comparison.pdf") + save_path=os.path.join("benchmarks/plots", "lvec_z_boost_comparison.pdf")) + plot_memory_usage(sizes, all_results, operations, - save_path="benchmarks/plots/lorentz_boost_memory_comparison.pdf") + save_path=os.path.join("benchmarks/plots", "lorentz_boost_memory_usage.pdf")) - print("\nBenchmarks completed. Plots saved to:") - print(" - benchmarks/plots/lorentz_boost_time_comparison.pdf") - print(" - benchmarks/plots/lvec_z_boost_methods_comparison.pdf") - print(" - benchmarks/plots/lorentz_boost_memory_comparison.pdf") + print("\nBenchmarks completed. Results saved to PDF files.") if __name__ == "__main__": - # Create plots directory if it doesn't exist - os.makedirs("benchmarks/plots", exist_ok=True) - + # Run all benchmarks run_benchmarks() diff --git a/benchmarks/benchmark_lvec.py b/benchmarks/benchmark_lvec.py index 20081e4..115d022 100644 --- a/benchmarks/benchmark_lvec.py +++ b/benchmarks/benchmark_lvec.py @@ -1,10 +1,12 @@ import numpy as np import timeit import matplotlib.pyplot as plt -from lvec import LVec -import vector import tracemalloc import gc +import os +from lvec import LVec +import vector +from plotting_utils import plot_combined_performance, set_publication_style, COLORS def get_process_memory(): """Get memory usage in MB for the current process.""" @@ -41,7 +43,7 @@ def measure_single_timing(operation, n_repeats=10): return np.mean(times), np.std(times) def benchmark_lvec_vs_vector(sizes, n_repeats=10): - """Compare performance between LVec and vector package operations.""" + """Compare performance between lvec and vector package operations.""" lvec_times = [] lvec_errors = [] vector_times = [] @@ -52,7 +54,7 @@ def benchmark_lvec_vs_vector(sizes, n_repeats=10): for size in sizes: px, py, pz, E = generate_test_data(size) - # Benchmark LVec + # Benchmark lvec def lvec_operation(): vec = LVec(px, py, pz, E) return vec.mass @@ -75,8 +77,8 @@ def vector_operation(): vector_memory.append(vector_mem) print(f"Size {size:,}:") - print(f" LVec: {lvec_mean*1000:.3f} ± {lvec_std*1000:.3f} ms, {lvec_mem:.1f} MB") - print(f" Vector: {vector_mean*1000:.3f} ± {vector_std*1000:.3f} ms, {vector_mem:.1f} MB") + print(f" lvec: {lvec_mean*1000:.3f} ± {lvec_std*1000:.3f} ms, {lvec_mem:.1f} MB") + print(f" vector: {vector_mean*1000:.3f} ± {vector_std*1000:.3f} ms, {vector_mem:.1f} MB") print(f" Ratio: {vector_mean/lvec_mean:.2f}x\n") return (np.array(lvec_times), np.array(lvec_errors), @@ -84,53 +86,34 @@ def vector_operation(): np.array(lvec_memory), np.array(vector_memory)) def plot_results(sizes, lvec_data, vector_data): - """Plot benchmark results.""" - lvec_times, _, lvec_memory = lvec_data - vector_times, _, vector_memory = vector_data - - # Convert to milliseconds - lvec_times *= 1000 - vector_times *= 1000 - - # Create figure with two subplots - plt.style.use('default') - fig = plt.figure(figsize=(12, 8)) - gs = fig.add_gridspec(2, 1, height_ratios=[1, 1], hspace=0.3) + """Plot benchmark results using standardized plotting utilities.""" + lvec_times, lvec_errors, lvec_memory = lvec_data + vector_times, vector_errors, vector_memory = vector_data - # Upper plot: timing comparison - ax1 = fig.add_subplot(gs[0]) - ax1.plot(sizes, lvec_times, 'o-', label='LVec', color='#3498db', linewidth=2, markersize=8) - ax1.plot(sizes, vector_times, 'o-', label='vector', color='#9b59b6', linewidth=2, markersize=8) - ax1.set_xscale('log') - ax1.set_yscale('log') - ax1.set_ylabel('Time per operation (ms)', fontsize=12) - ax1.set_title('Performance Comparison: LVec vs vector package', fontsize=14, pad=15) - ax1.grid(True, which='both', linestyle='--', alpha=0.7) - ax1.legend(fontsize=12) - ax1.tick_params(labelsize=10) - - # Bottom plot: memory usage - ax2 = fig.add_subplot(gs[1]) - ax2.plot(sizes, lvec_memory, 'o-', label='LVec', color='#2ecc71', linewidth=2, markersize=8) - ax2.plot(sizes, vector_memory, 'o-', label='vector', color='#e74c3c', linewidth=2, markersize=8) - ax2.set_xscale('log') - ax2.set_yscale('log') - ax2.set_xlabel('Array Size', fontsize=12) - ax2.set_ylabel('Memory Usage (MB)', fontsize=12) - ax2.grid(True, which='both', linestyle='--', alpha=0.7) - ax2.legend(fontsize=12) - ax2.tick_params(labelsize=10) - - # Add minor gridlines - ax1.grid(True, which='minor', linestyle=':', alpha=0.4) - ax2.grid(True, which='minor', linestyle=':', alpha=0.4) - - plt.savefig('benchmarks/plots/benchmark_results.pdf', dpi=300, bbox_inches='tight') - plt.close() + # Use the standardized plotting utility + plot_combined_performance( + sizes, + lvec_times, + vector_times, + lvec_memory, + vector_memory, + title="lvec vs vector Performance Comparison", + filename="lvec_vs_vector_benchmark.pdf" + ) if __name__ == '__main__': + # Create plots directory if it doesn't exist + os.makedirs("benchmarks/plots", exist_ok=True) + # Test with different array sizes sizes = [10, 100, 1000, 10000, 100000, 1000000] - lvec_times, lvec_errors, vector_times, vector_errors, lvec_memory, vector_memory = benchmark_lvec_vs_vector(sizes) - plot_results(sizes, (lvec_times, lvec_errors, lvec_memory), - (vector_times, vector_errors, vector_memory)) + results = benchmark_lvec_vs_vector(sizes) + + # Plot results + plot_results( + sizes, + (results[0], results[1], results[4]), + (results[2], results[3], results[5]) + ) + + print("Benchmark completed. Results saved to PDF file.") diff --git a/benchmarks/benchmark_operations.py b/benchmarks/benchmark_operations.py index 7ed2fda..286fb7f 100644 --- a/benchmarks/benchmark_operations.py +++ b/benchmarks/benchmark_operations.py @@ -5,6 +5,7 @@ import gc from lvec import LVec import vector +from plotting_utils import plot_operations_grid def measure_memory_usage(operation, n_repeats=5): """Measure memory usage for an operation.""" @@ -94,50 +95,14 @@ def benchmark_operation(operation_name, size, n_repeats=10): } def plot_all_operations(sizes, all_results, operations): - """Plot all operation comparisons in subplots.""" - plt.style.use('default') - n_ops = len(operations) - n_cols = 3 - n_rows = (n_ops + n_cols - 1) // n_cols # Ceiling division - - fig = plt.figure(figsize=(15, 4 * n_rows)) - gs = fig.add_gridspec(n_rows, n_cols, hspace=0.4, wspace=0.3) - - for idx, operation in enumerate(operations): - row = idx // n_cols - col = idx % n_cols - ax = fig.add_subplot(gs[row, col]) - - results = all_results[operation] - - # Extract data - lvec_times = np.array([r['lvec']['time'] for r in results]) * 1000 # to ms - vector_times = np.array([r['vector']['time'] for r in results]) * 1000 - - # Timing plot - ax.plot(sizes, lvec_times, 'o-', label='lvec', color='#3498db', - linewidth=2, markersize=6) - ax.plot(sizes, vector_times, 'o-', label='vector', color='#9b59b6', - linewidth=2, markersize=6) - ax.set_xscale('log') - ax.set_yscale('log') - ax.set_xlabel('Array Size', fontsize=10) - ax.set_ylabel('Time (ms)', fontsize=10) - ax.set_title(operation.replace('_', ' ').title(), fontsize=12) - ax.grid(True, which='both', linestyle='--', alpha=0.7) - ax.grid(True, which='minor', linestyle=':', alpha=0.4) - ax.legend(fontsize=10) - ax.tick_params(labelsize=8) - - # Remove any empty subplots - for idx in range(len(operations), n_rows * n_cols): - row = idx // n_cols - col = idx % n_cols - fig.delaxes(fig.add_subplot(gs[row, col])) - - plt.suptitle('Performance Comparison of Operations', fontsize=14, y=1.02) - plt.savefig('benchmarks/plots/benchmark_all_operations.pdf', dpi=300, bbox_inches='tight') - plt.close() + """Plot all operation comparisons using standardized plotting utilities.""" + plot_operations_grid( + sizes, + all_results, + operations, + title='Performance Comparison of Operations', + filename='benchmark_all_operations.pdf' + ) if __name__ == '__main__': # Test with different array sizes @@ -159,8 +124,8 @@ def plot_all_operations(sizes, all_results, operations): lvec_time = result['lvec']['time'] * 1000 vector_time = result['vector']['time'] * 1000 ratio = vector_time / lvec_time - print(f" LVec: {lvec_time:.3f} ms") - print(f" Vector: {vector_time:.3f} ms") + print(f" lvec: {lvec_time:.3f} ms") + print(f" vector: {vector_time:.3f} ms") print(f" Ratio: {ratio:.2f}x") all_results[operation] = results diff --git a/benchmarks/plotting_utils.py b/benchmarks/plotting_utils.py new file mode 100644 index 0000000..0a1b42c --- /dev/null +++ b/benchmarks/plotting_utils.py @@ -0,0 +1,378 @@ +""" +Standardized plotting utilities for lvec benchmarks. + +This module provides consistent plotting styles and functions for all benchmark +scripts to ensure professional journal publication quality. +""" + +import os +import numpy as np +import matplotlib.pyplot as plt +from matplotlib.ticker import ScalarFormatter + +# Create plots directory if it doesn't exist +PLOTS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "plots") +os.makedirs(PLOTS_DIR, exist_ok=True) + +# Define professional color palette +COLORS = { + 'lvec': '#3366CC', # Blue + 'vector': '#DC3912', # Red + 'lvec_cached': '#109618', # Green + 'lvec_uncached': '#FF9900', # Orange + 'vector2d': '#990099', # Purple + 'vector3d': '#0099C6', # Cyan + 'gray': '#7F7F7F', # Gray + 'background': '#F8F8F8' # Light gray background +} + +def set_publication_style(): + """Set the matplotlib style for publication-quality plots.""" + plt.style.use('default') + plt.rcParams.update({ + 'font.family': 'sans-serif', + 'font.sans-serif': ['Helvetica', 'Arial', 'DejaVu Sans'], + 'font.size': 11, + 'axes.labelsize': 12, + 'axes.titlesize': 14, + 'xtick.labelsize': 10, + 'ytick.labelsize': 10, + 'legend.fontsize': 10, + 'figure.figsize': (10, 7), + 'figure.dpi': 150, + 'savefig.dpi': 300, + 'savefig.format': 'pdf', + 'axes.grid': True, + 'grid.linestyle': '--', + 'grid.alpha': 0.7, + 'lines.linewidth': 2, + 'lines.markersize': 6, + 'axes.spines.top': False, + 'axes.spines.right': False, + 'axes.linewidth': 1.2, + 'xtick.major.width': 1.2, + 'ytick.major.width': 1.2 + }) + +def format_log_axes(ax, x_is_log=True, y_is_log=True): + """Apply consistent formatting to log-scale axes.""" + if x_is_log: + ax.set_xscale('log') + ax.xaxis.set_major_formatter(ScalarFormatter()) + + if y_is_log: + ax.set_yscale('log') + ax.yaxis.set_major_formatter(ScalarFormatter()) + + ax.grid(True, which='both', linestyle='--', alpha=0.7) + ax.grid(True, which='minor', linestyle=':', alpha=0.4) + +def plot_performance_comparison(sizes, lvec_data, vector_data, title, filename, y_label="Time (ms)"): + """ + Create a standardized performance comparison plot. + + Parameters: + ----------- + sizes : array-like + Array sizes used in the benchmark + lvec_data : tuple + Tuple containing (times, errors) for lvec + vector_data : tuple + Tuple containing (times, errors) for vector + title : str + Plot title + filename : str + Filename to save the plot (without path) + y_label : str + Label for y-axis + """ + set_publication_style() + + lvec_times, lvec_errors = lvec_data + vector_times, vector_errors = vector_data + + # Convert to milliseconds if needed + if np.mean(lvec_times) < 0.1: + lvec_times *= 1000 + lvec_errors *= 1000 + vector_times *= 1000 + vector_errors *= 1000 + + fig, ax = plt.subplots(figsize=(8, 6)) + + # Plot with error bars + ax.errorbar(sizes, lvec_times, yerr=lvec_errors, fmt='o-', + label='lvec', color=COLORS['lvec'], + linewidth=2, markersize=6, capsize=3) + ax.errorbar(sizes, vector_times, yerr=vector_errors, fmt='o-', + label='vector', color=COLORS['vector'], + linewidth=2, markersize=6, capsize=3) + + format_log_axes(ax) + ax.set_xlabel('Array Size', fontsize=12) + ax.set_ylabel(y_label, fontsize=12) + ax.set_title(title, fontsize=14) + ax.legend(fontsize=10) + + plt.tight_layout() + plt.savefig(os.path.join(PLOTS_DIR, filename), bbox_inches='tight') + plt.close() + +def plot_memory_comparison(sizes, lvec_memory, vector_memory, title, filename): + """ + Create a standardized memory usage comparison plot. + + Parameters: + ----------- + sizes : array-like + Array sizes used in the benchmark + lvec_memory : array-like + Memory usage for lvec + vector_memory : array-like + Memory usage for vector + title : str + Plot title + filename : str + Filename to save the plot (without path) + """ + set_publication_style() + + fig, ax = plt.subplots(figsize=(8, 6)) + + ax.plot(sizes, lvec_memory, 'o-', label='lvec', + color=COLORS['lvec'], linewidth=2, markersize=6) + ax.plot(sizes, vector_memory, 'o-', label='vector', + color=COLORS['vector'], linewidth=2, markersize=6) + + format_log_axes(ax) + ax.set_xlabel('Array Size', fontsize=12) + ax.set_ylabel('Memory Usage (MB)', fontsize=12) + ax.set_title(title, fontsize=14) + ax.legend(fontsize=10) + + plt.tight_layout() + plt.savefig(os.path.join(PLOTS_DIR, filename), bbox_inches='tight') + plt.close() + +def plot_combined_performance(sizes, lvec_times, vector_times, lvec_memory, vector_memory, title, filename): + """ + Create a standardized plot with both timing and memory usage. + + Parameters: + ----------- + sizes : array-like + Array sizes used in the benchmark + lvec_times : array-like + Execution times for lvec + vector_times : array-like + Execution times for vector + lvec_memory : array-like + Memory usage for lvec + vector_memory : array-like + Memory usage for vector + title : str + Plot title + filename : str + Filename to save the plot (without path) + """ + set_publication_style() + + # Convert to milliseconds if needed + if np.mean(lvec_times) < 0.1: + lvec_times = lvec_times * 1000 + vector_times = vector_times * 1000 + + fig = plt.figure(figsize=(12, 8)) + gs = fig.add_gridspec(2, 1, height_ratios=[1, 1], hspace=0.3) + + # Upper plot: timing comparison + ax1 = fig.add_subplot(gs[0]) + ax1.plot(sizes, lvec_times, 'o-', label='lvec', color=COLORS['lvec'], linewidth=2, markersize=6) + ax1.plot(sizes, vector_times, 'o-', label='vector', color=COLORS['vector'], linewidth=2, markersize=6) + format_log_axes(ax1) + ax1.set_ylabel('Time per operation (ms)', fontsize=12) + ax1.set_title(title, fontsize=14, pad=15) + ax1.legend(fontsize=10) + + # Bottom plot: memory usage + ax2 = fig.add_subplot(gs[1]) + ax2.plot(sizes, lvec_memory, 'o-', label='lvec', color=COLORS['lvec'], linewidth=2, markersize=6) + ax2.plot(sizes, vector_memory, 'o-', label='vector', color=COLORS['vector'], linewidth=2, markersize=6) + format_log_axes(ax2) + ax2.set_xlabel('Array Size', fontsize=12) + ax2.set_ylabel('Memory Usage (MB)', fontsize=12) + ax2.legend(fontsize=10) + + plt.tight_layout() + plt.savefig(os.path.join(PLOTS_DIR, filename), bbox_inches='tight') + plt.close() + +def plot_operations_grid(sizes, all_results, operations, title, filename): + """ + Create a grid of plots for multiple operations. + + Parameters: + ----------- + sizes : array-like + Array sizes used in the benchmark + all_results : dict + Dictionary with operation names as keys and results as values + operations : list + List of operation names to plot + title : str + Plot title + filename : str + Filename to save the plot (without path) + """ + set_publication_style() + + n_ops = len(operations) + n_cols = min(3, n_ops) + n_rows = (n_ops + n_cols - 1) // n_cols # Ceiling division + + fig = plt.figure(figsize=(15, 4 * n_rows)) + gs = fig.add_gridspec(n_rows, n_cols, hspace=0.4, wspace=0.3) + + for idx, operation in enumerate(operations): + row = idx // n_cols + col = idx % n_cols + ax = fig.add_subplot(gs[row, col]) + + results = all_results[operation] + + # Extract data + lvec_times = np.array([r['lvec']['time'] for r in results]) * 1000 # to ms + vector_times = np.array([r['vector']['time'] for r in results]) * 1000 + + # Timing plot + ax.plot(sizes, lvec_times, 'o-', label='lvec', color=COLORS['lvec'], + linewidth=2, markersize=6) + ax.plot(sizes, vector_times, 'o-', label='vector', color=COLORS['vector'], + linewidth=2, markersize=6) + + format_log_axes(ax) + ax.set_xlabel('Array Size', fontsize=10) + ax.set_ylabel('Time (ms)', fontsize=10) + ax.set_title(operation.replace('_', ' ').title(), fontsize=12) + ax.legend(fontsize=10) + + # Remove any empty subplots + for idx in range(len(operations), n_rows * n_cols): + row = idx // n_cols + col = idx % n_cols + fig.delaxes(fig.add_subplot(gs[row, col])) + + plt.suptitle(title, fontsize=14, y=1.02) + plt.tight_layout() + plt.savefig(os.path.join(PLOTS_DIR, filename), bbox_inches='tight') + plt.close() + +def plot_vector_types_comparison(sizes, results, vector_types, operations, title, filename): + """ + Create a comparison plot for different vector types. + + Parameters: + ----------- + sizes : array-like + Array sizes used in the benchmark + results : dict + Nested dictionary with vector types, operations, and results + vector_types : list + List of vector types to plot + operations : list + List of operations to plot + title : str + Plot title + filename : str + Filename to save the plot (without path) + """ + set_publication_style() + + n_ops = len(operations) + n_cols = min(3, n_ops) + n_rows = (n_ops + n_cols - 1) // n_cols + + fig = plt.figure(figsize=(15, 4 * n_rows)) + gs = fig.add_gridspec(n_rows, n_cols, hspace=0.4, wspace=0.3) + + # Assign colors to vector types + vector_colors = { + 'LVec': COLORS['lvec'], + 'Vector2D': COLORS['vector2d'], + 'Vector3D': COLORS['vector3d'], + 'Scikit Vector': COLORS['vector'] + } + + for op_idx, operation in enumerate(operations): + row = op_idx // n_cols + col = op_idx % n_cols + ax = fig.add_subplot(gs[row, col]) + + for vtype in vector_types: + if vtype in results and operation in results[vtype]: + times = np.array([results[vtype][size][operation]["time"] for size in sizes]) * 1000 + ax.plot(sizes, times, 'o-', label=vtype, + color=vector_colors.get(vtype, COLORS['gray']), + linewidth=2, markersize=6) + + format_log_axes(ax) + ax.set_xlabel('Array Size', fontsize=10) + ax.set_ylabel('Time (ms)', fontsize=10) + ax.set_title(operation.replace('_', ' ').title(), fontsize=12) + ax.legend(fontsize=10) + + # Remove any empty subplots + for idx in range(len(operations), n_rows * n_cols): + row = idx // n_cols + col = idx % n_cols + fig.delaxes(fig.add_subplot(gs[row, col])) + + plt.suptitle(title, fontsize=14, y=1.02) + plt.tight_layout() + plt.savefig(os.path.join(PLOTS_DIR, filename), bbox_inches='tight') + plt.close() + +def plot_physics_results(data1, data2, labels, title, filename): + """ + Create a standardized plot for physics results comparison. + + Parameters: + ----------- + data1, data2 : dict + Dictionaries containing physics results + labels : tuple + Tuple of (label1, label2) for the legend + title : str + Plot title + filename : str + Filename to save the plot (without path) + """ + set_publication_style() + + fig, axs = plt.subplots(2, 2, figsize=(12, 10)) + axs = axs.flatten() + + # Define histogram properties + hist_props = { + 'm12': {'bins': 50, 'range': (0, 5), 'xlabel': r'$m_{12}$ (GeV/$c^2$)'}, + 'm23': {'bins': 50, 'range': (0, 5), 'xlabel': r'$m_{23}$ (GeV/$c^2$)'}, + 'm13': {'bins': 50, 'range': (0, 5), 'xlabel': r'$m_{13}$ (GeV/$c^2$)'}, + 'three_body_mass': {'bins': 50, 'range': (4.5, 6), 'xlabel': r'$m_{123}$ (GeV/$c^2$)'} + } + + # Plot histograms + for i, (key, props) in enumerate(hist_props.items()): + if key in data1 and key in data2: + axs[i].hist(data1[key], bins=props['bins'], range=props['range'], + alpha=0.5, label=labels[0], color=COLORS['lvec']) + axs[i].hist(data2[key], bins=props['bins'], range=props['range'], + alpha=0.5, label=labels[1], color=COLORS['vector']) + axs[i].set_xlabel(props['xlabel'], fontsize=12) + axs[i].set_ylabel('Counts', fontsize=12) + axs[i].legend(fontsize=10) + axs[i].grid(True, linestyle='--', alpha=0.7) + + plt.suptitle(title, fontsize=14) + plt.tight_layout() + plt.savefig(os.path.join(PLOTS_DIR, filename), bbox_inches='tight') + plt.close()