Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions olive/olive_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,15 @@
"supported_quantization_encodings": [ ],
"run_on_target": true
},
"EstimateNPULatency": {
"module_path": "olive.passes.onnx.vitis_ai.estimate_npu_latency.EstimateNPULatency",
"supported_providers": [ "*" ],
"supported_accelerators": [ "*" ],
"supported_precisions": [ "*" ],
"supported_algorithms": [ ],
"supported_quantization_encodings": [ ],
"module_dependencies": [ "perf-estimator" ]
},
"ExtractAdapters": {
"module_path": "olive.passes.onnx.extract_adapters.ExtractAdapters",
"supported_providers": [ "*" ],
Expand Down
74 changes: 74 additions & 0 deletions olive/passes/onnx/vitis_ai/estimate_npu_latency.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#
# Copyright (C) 2025, Advanced Micro Devices, Inc. All rights reserved.
# SPDX-License-Identifier: MIT
#

import logging

from olive.hardware.accelerator import AcceleratorSpec
from olive.model import ONNXModelHandler
from olive.passes import Pass
from olive.passes.pass_config import BasePassConfig, PassConfigParam

logger = logging.getLogger(__name__)


class EstimateNPULatency(Pass):
"""Returns latency estimates for the model."""

@classmethod
def _default_config(cls, accelerator_spec: AcceleratorSpec) -> dict[str, PassConfigParam]:
return {
"target_device": PassConfigParam(
type_=str, required=False, description="Target device type", default_value="stx"
)
}

@classmethod
def validate_config(cls, config: type[BasePassConfig], accelerator_spec: AcceleratorSpec) -> bool:
if not super().validate_config(config, accelerator_spec):
return False

if config.target_device and config.target_device not in ["stx"]:
logger.warning("Unsupported target device type: %s", config.target_device)
return False

return True

def _run_for_config(
self, model: ONNXModelHandler, config: BasePassConfig, output_model_path: str
) -> ONNXModelHandler:
perf_installed = True
try:
from estimator.config import EstimatorSettings
from estimator.run import run_perf_estimate
except ImportError:
perf_installed = False
logger.exception(
"Estimator module not found. Install perf-estimator package and delete cached run before rerunning."
)

if not isinstance(model, ONNXModelHandler):
raise ValueError("Model must be an instance of ONNXModelHandler")

input_model_path = model.model_path

# Bypass if perf estimator package not installed
if perf_installed:
EstimatorSettings.model_path = f"{input_model_path}"

# Override default parameters if specified
if config.target_device:
EstimatorSettings.target_device = config.target_device

logger.info(
"Running perf estimator for model path: %s and target device: %s",
input_model_path,
EstimatorSettings.target_device,
)

run_perf_estimate(EstimatorSettings)
logger.info("Finish running perf estimator pass")

# Return the original model as is
return model
37 changes: 37 additions & 0 deletions test/passes/onnx/test_estimate_npu_latency.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#
# Copyright (C) 2025, Advanced Micro Devices, Inc. All rights reserved.
# SPDX-License-Identifier: MIT
#
import os
from pathlib import Path

import onnx

from olive.passes.olive_pass import create_pass_from_dict
from olive.passes.onnx.vitis_ai.estimate_npu_latency import EstimateNPULatency
from test.utils import get_onnx_model


class TestEstimateNPULatency:
"""Test cases for EstimateNPULatency pass."""

def test_estimate_latency_basic(self, tmp_path):
"""Test Perf Estimator call with automatic Olive version."""
# Setup
input_model = get_onnx_model()
config = {}
p = create_pass_from_dict(EstimateNPULatency, config, disable_search=True)
output_folder = str(tmp_path / "onnx")

# Execute
output_model = p.run(input_model, output_folder)

# Assert we created output csv for latency results
estimates_csv = f"{os.path.dirname(input_model.model_path)}/concise_summary"
assert Path(estimates_csv).exists()

# Assert
assert Path(output_model.model_path).exists()
# Load the output model and check graph name
onnx_model = onnx.load_model(output_model.model_path)
assert onnx_model.graph.name == "main_graph"
1 change: 1 addition & 0 deletions test/requirements-test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ optimum-intel[openvino]>=1.17.0, <=1.24
optuna
pandas
peft
perf-estimator
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there is no package called perf-estimator on pypi

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, we are working to push the package into pypi. Will update once that is done.

plotly
polygraphy>=0.49.22
psutil
Expand Down