-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbenchmark.py
More file actions
68 lines (54 loc) · 2.44 KB
/
benchmark.py
File metadata and controls
68 lines (54 loc) · 2.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import pandas as pd
import numpy as np
import time
import fast_parser
import matplotlib.pyplot as plt
FILENAME = "market_data.bin"
NUM_RECORDS = 40_000_000
def run_benchmark():
p = fast_parser.FastParser(FILENAME)
print("--- Starting Benchmark ---")
# Normal Pandas/Numpy Method
start_pd = time.time()
data = np.fromfile(FILENAME, dtype=[('ts', 'u8'), ('sym', 'S8'), ('price', 'f8')])
df = pd.DataFrame(data)
pd_mean = df['price'].mean()
pd_time = time.time() - start_pd
print(f"Pandas/Numpy Time: {pd_time:.4f}s | Mean: {pd_mean:.2f}")
# Zero-Copy Method
start_zc = time.time()
prices = p.get_prices()
zc_mean = np.mean(prices)
zc_time = time.time() - start_zc
print(f"Zero-Copy Time: {zc_time:.4f}s | Mean: {zc_mean:.2f}")
speedup = pd_time / zc_time
print(f"\n Your parser is {speedup:.1f}x faster!")
# Visualization
labels = ['Pandas / NumPy', 'Zero-Copy Parser']
actual_times = [pd_time, zc_time]
actual_throughput = [(NUM_RECORDS / t) / 1e6 for t in actual_times]
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
colors = ['#E74C3C', '#3498DB']
bars1 = ax1.bar(labels, actual_times, color=colors, width=0.6)
ax1.set_title('Execution Latency\n(Lower is Better)', fontsize=14, pad=15)
ax1.set_ylabel('Time (Seconds)', fontsize=12)
ax1.grid(axis='y', linestyle='--', alpha=0.6)
for bar in bars1:
height = bar.get_height()
ax1.text(bar.get_x() + bar.get_width()/2., height, f'{height:.4f}s',
ha='center', va='bottom', fontsize=11, fontweight='bold')
bars2 = ax2.bar(labels, actual_throughput, color=colors, width=0.6)
ax2.set_title('Data Throughput\n(Higher is Better)', fontsize=14, pad=15)
ax2.set_ylabel('Millions of Records / Second', fontsize=12)
ax2.grid(axis='y', linestyle='--', alpha=0.6)
for bar in bars2:
height = bar.get_height()
ax2.text(bar.get_x() + bar.get_width()/2., height, f'{height:.1f}M',
ha='center', va='bottom', fontsize=11, fontweight='bold')
plt.suptitle(f'Project Performance: {speedup:.1f}x Speedup\nDataset: 40 Million Records ({40*24/1e6:.1f} MB)',
fontsize=16, fontweight='bold', y=1.08)
plt.tight_layout()
fig.savefig('benchmark_results.png', dpi=300, bbox_inches='tight')
print("\n Benchmark image saved as 'benchmark_results.png' in the sidebar!")
if __name__ == "__main__":
run_benchmark()