From 45ddfcbcb43ed96351b60951001baee66c149aa7 Mon Sep 17 00:00:00 2001 From: narugo1992 Date: Fri, 10 Oct 2025 18:41:53 +0800 Subject: [PATCH 1/5] dev(narugo): add device id --- imgutils/utils/onnxruntime.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/imgutils/utils/onnxruntime.py b/imgutils/utils/onnxruntime.py index 22a37a11b9d..d48cbb9c82b 100644 --- a/imgutils/utils/onnxruntime.py +++ b/imgutils/utils/onnxruntime.py @@ -63,13 +63,19 @@ def get_onnx_provider(provider: Optional[str] = None): f'but unsupported provider {provider!r} found.') -def _open_onnx_model(ckpt: str, provider: str, use_cpu: bool = True) -> InferenceSession: +def _open_onnx_model(ckpt: str, provider: str, use_cpu: bool = True, + cuda_device_id: Optional[int] = None) -> InferenceSession: options = SessionOptions() options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL if provider == "CPUExecutionProvider": options.intra_op_num_threads = os.cpu_count() - providers = [provider] + if provider == 'CUDAExecutionProvider' and cuda_device_id is not None: + providers = [ + ('CUDAExecutionProvider', {'device_id': cuda_device_id}), + ] + else: + providers = [provider] if use_cpu and "CPUExecutionProvider" not in providers: providers.append("CPUExecutionProvider") @@ -77,7 +83,7 @@ def _open_onnx_model(ckpt: str, provider: str, use_cpu: bool = True) -> Inferenc return InferenceSession(ckpt, options, providers=providers) -def open_onnx_model(ckpt: str, mode: str = None) -> InferenceSession: +def open_onnx_model(ckpt: str, mode: str = None, cuda_device_id: Optional[int] = None) -> InferenceSession: """ Overview: Open an ONNX model and load its ONNX runtime. @@ -93,4 +99,9 @@ def open_onnx_model(ckpt: str, mode: str = None) -> InferenceSession: on Linux, executing ``export ONNX_MODE=cpu`` will ignore any existing CUDA and force the model inference to run on CPU. """ - return _open_onnx_model(ckpt, get_onnx_provider(mode or os.environ.get('ONNX_MODE', None))) + return _open_onnx_model( + ckpt=ckpt, + provider=get_onnx_provider(mode or os.environ.get('ONNX_MODE', None)), + use_cpu=True, + cuda_device_id=cuda_device_id, + ) From a2a984ecac2db4fb239e5bc925bcd22d3d1f3151 Mon Sep 17 00:00:00 2001 From: narugo1992 Date: Fri, 10 Oct 2025 18:45:38 +0800 Subject: [PATCH 2/5] dev(narugo): add docs for this submodule --- imgutils/utils/onnxruntime.py | 98 +++++++++++++++++++++++++++++++---- 1 file changed, 87 insertions(+), 11 deletions(-) diff --git a/imgutils/utils/onnxruntime.py b/imgutils/utils/onnxruntime.py index d48cbb9c82b..391e5a7d205 100644 --- a/imgutils/utils/onnxruntime.py +++ b/imgutils/utils/onnxruntime.py @@ -1,10 +1,16 @@ """ Overview: - Management of onnx models. + Management of ONNX models with automatic runtime detection and provider selection. + + This module provides utilities for loading and managing ONNX models with support for + different execution providers (CPU, CUDA, TensorRT). It automatically handles the + installation of onnxruntime based on the system configuration and provides a + convenient interface for model inference. """ import logging import os import shutil +import warnings from typing import Optional from hbutils.system import pip_install @@ -15,6 +21,14 @@ def _ensure_onnxruntime(): + """ + Ensure that onnxruntime is installed on the system. + + This function automatically detects if NVIDIA GPU is available and installs + the appropriate version of onnxruntime (GPU or CPU version). + + :raises ImportError: If installation fails + """ try: import onnxruntime except (ImportError, ModuleNotFoundError): @@ -39,13 +53,35 @@ def _ensure_onnxruntime(): def get_onnx_provider(provider: Optional[str] = None): """ - Overview: - Get onnx provider. + Get the appropriate ONNX execution provider based on system capabilities and user preference. + + This function automatically detects available execution providers and returns the most + suitable one. It supports aliases for common providers and falls back to CPU execution + if GPU providers are not available. :param provider: The provider for ONNX runtime. ``None`` by default and will automatically detect if the ``CUDAExecutionProvider`` is available. If it is available, it will be used, - otherwise the default ``CPUExecutionProvider`` will be used. - :return: String of the provider. + otherwise the default ``CPUExecutionProvider`` will be used. Supported aliases include + 'gpu' for CUDAExecutionProvider and 'trt' for TensorrtExecutionProvider. + :type provider: Optional[str] + + :return: String name of the selected execution provider. + :rtype: str + + :raises ValueError: If the specified provider is not supported or available. + + Example:: + >>> # Auto-detect provider + >>> provider = get_onnx_provider() + >>> print(provider) # 'CUDAExecutionProvider' or 'CPUExecutionProvider' + + >>> # Explicitly request GPU provider + >>> provider = get_onnx_provider('gpu') + >>> print(provider) # 'CUDAExecutionProvider' + + >>> # Request CPU provider + >>> provider = get_onnx_provider('cpu') + >>> print(provider) # 'CPUExecutionProvider' """ if not provider: if "CUDAExecutionProvider" in get_available_providers(): @@ -65,6 +101,24 @@ def get_onnx_provider(provider: Optional[str] = None): def _open_onnx_model(ckpt: str, provider: str, use_cpu: bool = True, cuda_device_id: Optional[int] = None) -> InferenceSession: + """ + Internal function to create and configure an ONNX inference session. + + This function handles the low-level configuration of the ONNX runtime session, + including optimization settings and provider-specific configurations. + + :param ckpt: Path to the ONNX model file. + :type ckpt: str + :param provider: Name of the execution provider to use. + :type provider: str + :param use_cpu: Whether to include CPU provider as fallback. Defaults to True. + :type use_cpu: bool + :param cuda_device_id: Specific CUDA device ID to use for GPU inference. + :type cuda_device_id: Optional[int] + + :return: Configured ONNX inference session. + :rtype: InferenceSession + """ options = SessionOptions() options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL if provider == "CPUExecutionProvider": @@ -75,6 +129,9 @@ def _open_onnx_model(ckpt: str, provider: str, use_cpu: bool = True, ('CUDAExecutionProvider', {'device_id': cuda_device_id}), ] else: + if provider != 'CUDAExecutionProvider' and cuda_device_id is not None: + warnings.warn(UserWarning( + 'CUDA device ID specified but provider is not CUDAExecutionProvider. The device ID will be ignored.')) providers = [provider] if use_cpu and "CPUExecutionProvider" not in providers: providers.append("CPUExecutionProvider") @@ -85,19 +142,38 @@ def _open_onnx_model(ckpt: str, provider: str, use_cpu: bool = True, def open_onnx_model(ckpt: str, mode: str = None, cuda_device_id: Optional[int] = None) -> InferenceSession: """ - Overview: - Open an ONNX model and load its ONNX runtime. + Open an ONNX model and create a configured inference session. + + This function provides a high-level interface for loading ONNX models with + automatic provider selection and optimization. It supports environment variable + configuration for runtime provider selection. - :param ckpt: ONNX model file. - :param mode: Provider of the ONNX. Default is ``None`` which means the provider will be auto-detected, - see :func:`get_onnx_provider` for more details. - :return: A loaded ONNX runtime object. + :param ckpt: Path to the ONNX model file to load. + :type ckpt: str + :param mode: Provider of the ONNX runtime. Default is ``None`` which means the provider will be auto-detected, + see :func:`get_onnx_provider` for more details. Can also be controlled via ONNX_MODE environment variable. + :type mode: Optional[str] + :param cuda_device_id: Specific CUDA device ID to use for GPU inference. Only effective when using CUDA provider. + :type cuda_device_id: Optional[int] + + :return: A loaded and configured ONNX inference session ready for prediction. + :rtype: InferenceSession .. note:: When ``mode`` is set to ``None``, it will attempt to detect the environment variable ``ONNX_MODE``. This means you can decide which ONNX runtime to use by setting the environment variable. For example, on Linux, executing ``export ONNX_MODE=cpu`` will ignore any existing CUDA and force the model inference to run on CPU. + + Example:: + >>> # Load model with auto-detected provider + >>> session = open_onnx_model('model.onnx') + + >>> # Force CPU execution + >>> session = open_onnx_model('model.onnx', mode='cpu') + + >>> # Use specific CUDA device + >>> session = open_onnx_model('model.onnx', mode='gpu', cuda_device_id=1) """ return _open_onnx_model( ckpt=ckpt, From ae72f4cec343300048a96534d86f3d1ac5caef99 Mon Sep 17 00:00:00 2001 From: narugo1992 Date: Fri, 10 Oct 2025 19:01:43 +0800 Subject: [PATCH 3/5] dev(narugo): add max parallel --- .github/workflows/test.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ccfe28ded19..8f993533dc7 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -11,6 +11,7 @@ jobs: if: ${{ !contains(github.event.head_commit.message, 'ci skip') && !contains(github.event.head_commit.message, 'test skip') }} strategy: fail-fast: false + max-parallel: 3 matrix: os: - 'ubuntu-latest' From 7a5490176661d26f856d7361405b20739c00b392 Mon Sep 17 00:00:00 2001 From: narugo1992 Date: Fri, 10 Oct 2025 19:03:01 +0800 Subject: [PATCH 4/5] dev(narugo): lower some versions --- .github/workflows/test.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8f993533dc7..75066a25400 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -19,10 +19,10 @@ jobs: - 'macos-latest' python-version: - '3.8' - - '3.9' - - '3.10' +# - '3.9' +# - '3.10' - '3.11' - - '3.12' +# - '3.12' - '3.13' install: - 'full' From ac7d0da26444b177450a02b6d920fe5bb496531b Mon Sep 17 00:00:00 2001 From: narugo1992 Date: Sat, 11 Oct 2025 18:08:02 +0800 Subject: [PATCH 5/5] dev(narugo): use scope=class --- test/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/conftest.py b/test/conftest.py index 1b93873f6e3..5674b8b02e6 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -20,7 +20,7 @@ def text_aligner(): return TextAligner().multiple_lines() -@pytest.fixture(autouse=True, scope='module') +@pytest.fixture(autouse=True, scope='class') def clean_hf_cache(): try: yield