-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathbenchmark.py
More file actions
92 lines (76 loc) · 3.34 KB
/
benchmark.py
File metadata and controls
92 lines (76 loc) · 3.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#!/usr/bin/env python3
import argparse
import os
import subprocess
import glob
import re
import time
def main():
parser = argparse.ArgumentParser(description="Execute benchmarks using specified datafusion-cli binaries")
parser.add_argument('--output-dir', help='Directory to write benchmark results', default="results")
parser.add_argument('--binary-pattern', default='datafusion-cli@*',
help='Pattern to match datafusion-cli binaries in builds/ directory (default: datafusion-cli@*)')
parser.add_argument('--benchmarks', nargs='+', default=['clickbench'],
help='Benchmarks to run (default: clickbench)')
args = parser.parse_args()
# Create output directory if it doesn't exist
if not os.path.exists(args.output_dir):
os.makedirs(args.output_dir)
# Find all datafusion-cli binaries matching the pattern
builds_dir = os.path.join(os.path.dirname(__file__), 'builds')
binary_pattern = os.path.join(builds_dir, args.binary_pattern)
binaries = glob.glob(binary_pattern)
if not binaries:
print(f"No datafusion-cli binaries found matching pattern: {args.binary_pattern}")
print(f"Looking in directory: {builds_dir}")
return
print(f"Found {len(binaries)} datafusion-cli binaries:")
for binary in binaries:
print(f" {os.path.basename(binary)}")
# Run benchmarks for each binary
for binary_path in binaries:
binary_name = os.path.basename(binary_path)
# Parse version and timestamp from binary name: datafusion-cli@VERSION@TIMESTAMP
parts = binary_name.split('@')
if len(parts) >= 3:
version = parts[1]
timestamp = parts[2]
else:
version = "unknown"
timestamp = "unknown"
print(f"\nRunning benchmarks with {binary_name}")
print(f"Version: {version}, Timestamp: {timestamp}")
# Run each benchmark
for benchmark in args.benchmarks:
if benchmark == 'clickbench':
run_clickbench_benchmark(binary_path, version, timestamp, args.output_dir)
else:
print(f"Unknown benchmark: {benchmark}")
def run_clickbench_benchmark(binary_path, version, timestamp, output_dir):
"""Run clickbench benchmark with the specified datafusion-cli binary"""
print(f" Running clickbench benchmark...")
# Make the binary executable
os.chmod(binary_path, 0o755)
try:
# Run the clickbench script
script_path = os.path.join(os.path.dirname(__file__), 'run_clickbench.py')
cmd = [
'python3', script_path,
'--output-dir', output_dir,
'--git-revision', version,
'--git-revision-timestamp', timestamp,
'--datafusion-binary', binary_path
]
print(f" Executing: {' '.join(cmd)}")
# Execute the command and pipe the output back to the console
result = subprocess.run(cmd)
if result.returncode == 0:
print(f" ✓ Clickbench benchmark completed successfully")
else:
print(f" ✗ Clickbench benchmark failed:")
print(f" stdout: {result.stdout}")
print(f" stderr: {result.stderr}")
except Exception as e:
print(f" ✗ Error running clickbench benchmark: {e}")
if __name__ == "__main__":
main()