Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
d94c6da
Create vlm_metrics.py
sumanaga Nov 10, 2025
6babcf5
Merge branch 'intel-retail:main' into vlmmetrics
sumanaga Nov 10, 2025
b61e36a
Rename benchmark-scripts/vlm_metrics.py to benchmark-scripts/vlm/vlm_…
sumanaga Nov 10, 2025
917a02b
Create setup.py
sumanaga Nov 10, 2025
c96820e
Rename benchmark-scripts/vlm/setup.py to benchmark-scripts/setup.py
sumanaga Nov 10, 2025
f4a63fc
Rename benchmark-scripts/vlm/vlm_metrics.py to benchmark-scripts/vlm_…
sumanaga Nov 10, 2025
6d0fda8
Rename benchmark-scripts/setup.py to benchmark-scripts/vlm/setup.py
sumanaga Nov 10, 2025
725e222
Rename benchmark-scripts/vlm_metrics.py to benchmark-scripts/vlm/vlm_…
sumanaga Nov 10, 2025
3670903
Update vlm_metrics.py
sumanaga Nov 10, 2025
0ffe277
Update vlm_metrics.py
sumanaga Nov 10, 2025
6fa8896
Update vlm_metrics.py
sumanaga Nov 10, 2025
21705c2
Update vlm_metrics.py
sumanaga Nov 10, 2025
e098161
Update vlm_metrics.py
sumanaga Nov 10, 2025
865dffd
Update vlm_metrics.py
sumanaga Nov 10, 2025
c142a0e
Update vlm_metrics.py
sumanaga Nov 10, 2025
6c55c7b
Update vlm_metrics.py
sumanaga Nov 10, 2025
9158475
Update vlm_metrics.py
sumanaga Nov 10, 2025
2f2305b
Update vlm_metrics.py
sumanaga Nov 10, 2025
1f8e21e
Update vlm_metrics.py
sumanaga Nov 10, 2025
51a6ac2
Update vlm_metrics.py
sumanaga Nov 10, 2025
d57ceb3
Update vlm_metrics.py
sumanaga Nov 11, 2025
b09cdd1
Update vlm_metrics.py
sumanaga Nov 11, 2025
f3549d2
Update vlm_metrics.py
sumanaga Nov 11, 2025
7492a08
Update vlm_metrics.py
sumanaga Nov 13, 2025
bc54623
Update vlm_metrics.py
sumanaga Nov 13, 2025
aa34f06
Update vlm_metrics.py
sumanaga Nov 13, 2025
c63d4d5
Update consolidate_multiple_run_of_metrics.py
sumanaga Nov 13, 2025
9c5d246
Update vlm_metrics.py
sumanaga Nov 13, 2025
1918a79
Update consolidate_multiple_run_of_metrics.py
sumanaga Nov 13, 2025
f951f63
Create vlm_metrics_logger.py
sumanaga Dec 1, 2025
3e4481c
Create setup.py
sumanaga Dec 1, 2025
08291b5
Update vlm_metrics_logger.py
sumanaga Dec 2, 2025
d3ef12e
Merge branch 'intel-retail:main' into vlmmetrics
sumanaga Dec 2, 2025
022db34
Update consolidate_multiple_run_of_metrics.py
sumanaga Dec 2, 2025
4a06632
Delete benchmark-scripts/vlm directory
sumanaga Dec 2, 2025
ce18890
Update trivyimagescan.yaml
sumanaga Dec 3, 2025
476a097
Apply suggestions from code review
avinash-palleti Dec 3, 2025
d0caa64
Update vlm_metrics_logger.py
sumanaga Jan 6, 2026
872de82
Update consolidate_multiple_run_of_metrics.py
sumanaga Jan 6, 2026
aee5fd9
Update vlm_metrics_logger.py
sumanaga Jan 6, 2026
ff4c076
Update vlm_metrics_logger.py
sumanaga Jan 6, 2026
4c9d290
Update vlm_metrics_logger.py
sumanaga Jan 6, 2026
2806454
Update consolidate_multiple_run_of_metrics.py
sumanaga Jan 7, 2026
62327f1
Update vlm_metrics_logger.py
sumanaga Jan 7, 2026
302bcb6
Merge branch 'main' into vlmmetrics
sumanaga Jan 7, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions .github/workflows/trivyimagescan.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,12 @@ jobs:
uses: aquasecurity/trivy-action@915b19bbe73b92a6cf82a1bc12b087c9a19a5fe2
with:
image-ref: 'benchmark:latest'
format: 'template'
template: '@/contrib/sarif.tpl'
output: 'trivy-results.sarif'
format: 'table'
severity: 'CRITICAL,HIGH'
output: 'trivy-performance-tools-results.txt'

- name: Upload Trivy scan results to GitHub Security tab
uses: github/codeql-action/upload-sarif@1b549b9259bda1cb5ddde3b41741a82a2d15a841
- name: Upload Trivy scan results
uses: actions/upload-artifact@v4
with:
sarif_file: 'trivy-results.sarif'
name: trivy-performance-tools-html-report
path: trivy-performance-tools-results.txt
90 changes: 89 additions & 1 deletion benchmark-scripts/consolidate_multiple_run_of_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,92 @@ def extract_data(self, log_file_path):
def return_blank(self):
return {AVG_NPU_USAGE_CONSTANT: "NA"}


class VLMAppMetricsExtractor:
def extract_data(self, log_file_path):
print("parsing application performance metrics")
timing_data = defaultdict(list)
with open(log_file_path, 'r') as file:
for line in file:
line = line.strip()
if 'application=' in line and 'timestamp_ms=' in line:
pattern = r'(\w+)=((?:[^\s=]+(?:\s(?!\w+=)[^\s=]+)*)?)'
matches = re.findall(pattern, line)
data = dict(matches)
if data:
app_name = data.get('application')
event = data.get('event')
timestamp_ms = data.get('timestamp_ms')

if app_name and event in ['start', 'end'] and timestamp_ms:
timing_data[app_name].append({
'event': event,
'timestamp_ms': int(timestamp_ms)
})

# Calculate durations for each application
app_durations = defaultdict(list)
apps_to_analyze = timing_data.keys()

for app_name in apps_to_analyze:
events = sorted(timing_data[app_name], key=lambda x: x['timestamp_ms'])
start_time = None

for event in events:
if event['event'] == 'start':
start_time = event['timestamp_ms']
elif event['event'] == 'end' and start_time is not None:
duration = event['timestamp_ms'] - start_time
app_durations[app_name].append(duration)
start_time = None

durations = dict(app_durations)

# Generate statistics
statistics = {}
for app_name, duration_list in durations.items():
count = 0
if duration_list:
count = len(duration_list)
statistics[f"Based on {count} total calls, the average {app_name} in (ms)"] = (sum(duration_list)/count)

else:
statistics[f"Based on {count} total calls, the average {app_name} in (ms)"] = 0

return statistics

def return_blank(self):
return {"Application Performance Metrics": "NA"}

class VLMPerformanceMetricsExtractor(KPIExtractor):
#overriding abstract method
def extract_data(self, log_file_path):

print("parsing VLM latency")
latency = {}
total_duration = 0.0
count = 0
with open(log_file_path, "r") as f:
for line in f:
parts = line.strip().split()
for part in parts:
if part.startswith("Generate_Duration_Mean="):
duration_str = part.split("=")[1]
try:
duration = float(duration_str)
total_duration += duration
count += 1
except ValueError:
continue

latency['VLM_TOTAL_CALLS'] = count
latency['VLM_AVERAGE_CALL_DURATION'] = total_duration / count if count > 0 else 0

return latency

def return_blank(self):
return {"VLM_TOTAL_CALLS": "NA", 'VLM_AVERAGE_CALL_DURATION': "NA"}

class GPUUsageExtractor(KPIExtractor):
#overriding abstract method
def extract_data(self, log_file_path):
Expand Down Expand Up @@ -570,7 +656,9 @@ def return_blank(self):
"power_usage.log":PowerUsageExtractor,
"pcm.csv":PCMExtractor,
r"(?:^xpum).*\.json$": XPUMUsageExtractor,
r"^qmassa.*parsed.*\.json$": QMASSAGPUUsageExtractor, }
r"^qmassa.*parsed.*\.json$": QMASSAGPUUsageExtractor,
r"^vlm_application_metrics.*\.txt$": VLMAppMetricsExtractor,
r"^vlm_performance_metrics.*\.txt$": VLMPerformanceMetricsExtractor}

def add_parser():
parser = argparse.ArgumentParser(description='Consolidate data')
Expand Down
7 changes: 7 additions & 0 deletions benchmark-scripts/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from setuptools import setup

setup(
name='vlm_metrics',
version='0.1',
py_modules=['vlm_metrics_logger'],
)
206 changes: 206 additions & 0 deletions benchmark-scripts/vlm_metrics_logger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
import logging
import os
import time
import uuid
from datetime import datetime
from logging.handlers import RotatingFileHandler

class VLMMetricsLogger:

def __init__(self, log_dir=None, log_file=None, max_bytes=10*1024*1024, backup_count=5):
self.log_dir = log_dir or os.getenv('CONTAINER_RESULTS_PATH')
timestamp = datetime.now().strftime("%Y%m%d%H%M%S%f")
unique_id = uuid.uuid4().hex[:6]
if log_file is None:
self.log_file = f"vlm_application_metrics_{timestamp}_{unique_id}.txt"
else:
self.log_file = log_file

# Performance metrics file
self.performance_log_file = f"vlm_performance_metrics_{timestamp}_{unique_id}.txt"

self.logger = None
self.performance_logger = None
self._setup_logger(max_bytes, backup_count)

def _setup_logger(self, max_bytes, backup_count):
"""Setup the logger with file rotation"""
# Create logs directory if it doesn't exist
os.makedirs(self.log_dir, exist_ok=True)

# Delete existing VLM metrics files if they exist
if self.log_dir:
for filename in os.listdir(self.log_dir):
if filename.startswith('vlm_application_metrics') or filename.startswith('vlm_performance_metrics'):
file_path = os.path.join(self.log_dir, filename)
try:
os.remove(file_path)
except OSError:
pass # Ignore errors if file can't be deleted

# Create main logger
self.logger = logging.getLogger('vlm_metrics_logger')
self.logger.setLevel(logging.INFO)

# Create performance logger
self.performance_logger = logging.getLogger('vlm_performance_logger')
self.performance_logger.setLevel(logging.INFO)

# Avoid duplicate handlers
if not self.logger.handlers:
# Create file handler for main logs
log_path = os.path.join(self.log_dir, self.log_file)
file_handler = RotatingFileHandler(
log_path,
maxBytes=max_bytes,
backupCount=backup_count
)

# Create formatter
formatter = logging.Formatter(
'%(asctime)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)

# Set formatter
file_handler.setFormatter(formatter)

# Add handler to main logger
self.logger.addHandler(file_handler)

# Setup performance logger
if not self.performance_logger.handlers:
# Create file handler for performance logs
performance_log_path = os.path.join(self.log_dir, self.performance_log_file)
performance_file_handler = RotatingFileHandler(
performance_log_path,
maxBytes=max_bytes,
backupCount=backup_count
)

# Create formatter for performance logs
performance_formatter = logging.Formatter(
'%(asctime)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)

performance_file_handler.setFormatter(performance_formatter)

# Add handler to performance logger (no console output for performance)
self.performance_logger.addHandler(performance_file_handler)

def log_start_time(self, usecase_name):

timestamp_ms = int(time.time() * 1000)

log_data = {
'application': os.getenv(usecase_name),
'event': 'start',
'timestamp_ms': timestamp_ms
}

# Format log message
message_parts = [f"{key}={value}" for key, value in log_data.items()]
message = " ".join(message_parts)

self.logger.info(message)
return timestamp_ms

def log_end_time(self, usecase_name):

timestamp_ms = int(time.time() * 1000)

log_data = {
'application': os.getenv(usecase_name),
'event': 'end',
'timestamp_ms': timestamp_ms
}

# Format log message
message_parts = [f"{key}={value}" for key, value in log_data.items()]
message = " ".join(message_parts)

self.logger.info(message)
return timestamp_ms

def log_custom_event(self, event_type, usecase_name, **kwargs):
timestamp_ms = int(time.time() * 1000)

log_data = {
'application': os.getenv(usecase_name),
'event': event_type,
'timestamp_ms': timestamp_ms
}

# Add custom parameters
log_data.update(kwargs)

# Format log message
message_parts = [f"{key}={value}" for key, value in log_data.items()]
message = " ".join(message_parts)

# Log at appropriate level
if event_type.lower() == 'error':
self.logger.error(message)
elif event_type.lower() == 'warning':
self.logger.warning(message)
else:
self.logger.info(message)

return timestamp_ms

def log_performance_metrics(self, usecase_name, vlm_metrics_result_object):

timestamp_ms = int(time.time() * 1000)
log_data = {
'application': os.getenv(usecase_name),
'timestamp_ms': timestamp_ms,
'Load_Time' : vlm_metrics_result_object.perf_metrics.get_load_time(),
'Generated_Tokens':vlm_metrics_result_object.perf_metrics.get_num_generated_tokens(),
'Input_Tokens':vlm_metrics_result_object.perf_metrics.get_num_input_tokens(),
'TTFT_Mean':vlm_metrics_result_object.perf_metrics.get_ttft().mean,
'TPOT_Mean':vlm_metrics_result_object.perf_metrics.get_tpot().mean,
'Throughput_Mean':vlm_metrics_result_object.perf_metrics.get_throughput().mean,
'Generate_Duration_Mean':vlm_metrics_result_object.perf_metrics.get_generate_duration().mean,
'Tokenization_Duration_Mean':vlm_metrics_result_object.perf_metrics.get_tokenization_duration().mean,
'Detokenization_Duration_Mean':vlm_metrics_result_object.perf_metrics.get_detokenization_duration().mean,
'Grammar_Compile_Max':vlm_metrics_result_object.perf_metrics.get_grammar_compile_time().max,
'Grammar_Compile_Min':vlm_metrics_result_object.perf_metrics.get_grammar_compile_time().min,
'Grammar_Compile_Std':vlm_metrics_result_object.perf_metrics.get_grammar_compile_time().std,
'Grammar_Compile_Mean':vlm_metrics_result_object.perf_metrics.get_grammar_compile_time().mean
}


# Format log message
message_parts = [f"{key}={value}" for key, value in log_data.items()]
message = " ".join(message_parts)

self.performance_logger.info(message)
return timestamp_ms


# Global logger instance (singleton pattern)
_vlm_metrics_logger = None

def get_logger():
"""Get the global logger instance"""
global _vlm_metrics_logger
if _vlm_metrics_logger is None:
_vlm_metrics_logger = VLMMetricsLogger()
return _vlm_metrics_logger

def log_start_time(application_name):
"""Convenience function for logging start time"""
return get_logger().log_start_time(application_name)

def log_end_time(application_name):
"""Convenience function for logging end time"""
return get_logger().log_end_time(application_name)

def log_custom_event(event_type, application_name, **kwargs):
"""Convenience function for logging custom events"""
return get_logger().log_custom_event(event_type, application_name, **kwargs)

def log_performance_metric(application_name,metrics):
"""Convenience function for logging performance metrics"""
return get_logger().log_performance_metrics(application_name,metrics)
Loading