From 45ddfcbcb43ed96351b60951001baee66c149aa7 Mon Sep 17 00:00:00 2001
From: narugo1992 <narugo992@gmail.com>
Date: Fri, 10 Oct 2025 18:41:53 +0800
Subject: [PATCH 1/5] dev(narugo): add device id

---
 imgutils/utils/onnxruntime.py | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/imgutils/utils/onnxruntime.py b/imgutils/utils/onnxruntime.py
index 22a37a11b9d..d48cbb9c82b 100644
--- a/imgutils/utils/onnxruntime.py
+++ b/imgutils/utils/onnxruntime.py
@@ -63,13 +63,19 @@ def get_onnx_provider(provider: Optional[str] = None):
                          f'but unsupported provider {provider!r} found.')
 
 
-def _open_onnx_model(ckpt: str, provider: str, use_cpu: bool = True) -> InferenceSession:
+def _open_onnx_model(ckpt: str, provider: str, use_cpu: bool = True,
+                     cuda_device_id: Optional[int] = None) -> InferenceSession:
     options = SessionOptions()
     options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL
     if provider == "CPUExecutionProvider":
         options.intra_op_num_threads = os.cpu_count()
 
-    providers = [provider]
+    if provider == 'CUDAExecutionProvider' and cuda_device_id is not None:
+        providers = [
+            ('CUDAExecutionProvider', {'device_id': cuda_device_id}),
+        ]
+    else:
+        providers = [provider]
     if use_cpu and "CPUExecutionProvider" not in providers:
         providers.append("CPUExecutionProvider")
 
@@ -77,7 +83,7 @@ def _open_onnx_model(ckpt: str, provider: str, use_cpu: bool = True) -> Inferenc
     return InferenceSession(ckpt, options, providers=providers)
 
 
-def open_onnx_model(ckpt: str, mode: str = None) -> InferenceSession:
+def open_onnx_model(ckpt: str, mode: str = None, cuda_device_id: Optional[int] = None) -> InferenceSession:
     """
     Overview:
         Open an ONNX model and load its ONNX runtime.
@@ -93,4 +99,9 @@ def open_onnx_model(ckpt: str, mode: str = None) -> InferenceSession:
         on Linux, executing ``export ONNX_MODE=cpu`` will ignore any existing CUDA and force the model inference
         to run on CPU.
     """
-    return _open_onnx_model(ckpt, get_onnx_provider(mode or os.environ.get('ONNX_MODE', None)))
+    return _open_onnx_model(
+        ckpt=ckpt,
+        provider=get_onnx_provider(mode or os.environ.get('ONNX_MODE', None)),
+        use_cpu=True,
+        cuda_device_id=cuda_device_id,
+    )

From a2a984ecac2db4fb239e5bc925bcd22d3d1f3151 Mon Sep 17 00:00:00 2001
From: narugo1992 <narugo992@gmail.com>
Date: Fri, 10 Oct 2025 18:45:38 +0800
Subject: [PATCH 2/5] dev(narugo): add docs for this submodule

---
 imgutils/utils/onnxruntime.py | 98 +++++++++++++++++++++++++++++++----
 1 file changed, 87 insertions(+), 11 deletions(-)

diff --git a/imgutils/utils/onnxruntime.py b/imgutils/utils/onnxruntime.py
index d48cbb9c82b..391e5a7d205 100644
--- a/imgutils/utils/onnxruntime.py
+++ b/imgutils/utils/onnxruntime.py
@@ -1,10 +1,16 @@
 """
 Overview:
-    Management of onnx models.
+    Management of ONNX models with automatic runtime detection and provider selection.
+
+    This module provides utilities for loading and managing ONNX models with support for
+    different execution providers (CPU, CUDA, TensorRT). It automatically handles the
+    installation of onnxruntime based on the system configuration and provides a
+    convenient interface for model inference.
 """
 import logging
 import os
 import shutil
+import warnings
 from typing import Optional
 
 from hbutils.system import pip_install
@@ -15,6 +21,14 @@
 
 
 def _ensure_onnxruntime():
+    """
+    Ensure that onnxruntime is installed on the system.
+
+    This function automatically detects if NVIDIA GPU is available and installs
+    the appropriate version of onnxruntime (GPU or CPU version).
+
+    :raises ImportError: If installation fails
+    """
     try:
         import onnxruntime
     except (ImportError, ModuleNotFoundError):
@@ -39,13 +53,35 @@ def _ensure_onnxruntime():
 
 def get_onnx_provider(provider: Optional[str] = None):
     """
-    Overview:
-        Get onnx provider.
+    Get the appropriate ONNX execution provider based on system capabilities and user preference.
+
+    This function automatically detects available execution providers and returns the most
+    suitable one. It supports aliases for common providers and falls back to CPU execution
+    if GPU providers are not available.
 
     :param provider: The provider for ONNX runtime. ``None`` by default and will automatically detect
         if the ``CUDAExecutionProvider`` is available. If it is available, it will be used,
-        otherwise the default ``CPUExecutionProvider`` will be used.
-    :return: String of the provider.
+        otherwise the default ``CPUExecutionProvider`` will be used. Supported aliases include
+        'gpu' for CUDAExecutionProvider and 'trt' for TensorrtExecutionProvider.
+    :type provider: Optional[str]
+
+    :return: String name of the selected execution provider.
+    :rtype: str
+
+    :raises ValueError: If the specified provider is not supported or available.
+
+    Example::
+        >>> # Auto-detect provider
+        >>> provider = get_onnx_provider()
+        >>> print(provider)  # 'CUDAExecutionProvider' or 'CPUExecutionProvider'
+
+        >>> # Explicitly request GPU provider
+        >>> provider = get_onnx_provider('gpu')
+        >>> print(provider)  # 'CUDAExecutionProvider'
+
+        >>> # Request CPU provider
+        >>> provider = get_onnx_provider('cpu')
+        >>> print(provider)  # 'CPUExecutionProvider'
     """
     if not provider:
         if "CUDAExecutionProvider" in get_available_providers():
@@ -65,6 +101,24 @@ def get_onnx_provider(provider: Optional[str] = None):
 
 def _open_onnx_model(ckpt: str, provider: str, use_cpu: bool = True,
                      cuda_device_id: Optional[int] = None) -> InferenceSession:
+    """
+    Internal function to create and configure an ONNX inference session.
+
+    This function handles the low-level configuration of the ONNX runtime session,
+    including optimization settings and provider-specific configurations.
+
+    :param ckpt: Path to the ONNX model file.
+    :type ckpt: str
+    :param provider: Name of the execution provider to use.
+    :type provider: str
+    :param use_cpu: Whether to include CPU provider as fallback. Defaults to True.
+    :type use_cpu: bool
+    :param cuda_device_id: Specific CUDA device ID to use for GPU inference.
+    :type cuda_device_id: Optional[int]
+
+    :return: Configured ONNX inference session.
+    :rtype: InferenceSession
+    """
     options = SessionOptions()
     options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL
     if provider == "CPUExecutionProvider":
@@ -75,6 +129,9 @@ def _open_onnx_model(ckpt: str, provider: str, use_cpu: bool = True,
             ('CUDAExecutionProvider', {'device_id': cuda_device_id}),
         ]
     else:
+        if provider != 'CUDAExecutionProvider' and cuda_device_id is not None:
+            warnings.warn(UserWarning(
+                'CUDA device ID specified but provider is not CUDAExecutionProvider. The device ID will be ignored.'))
         providers = [provider]
     if use_cpu and "CPUExecutionProvider" not in providers:
         providers.append("CPUExecutionProvider")
@@ -85,19 +142,38 @@ def _open_onnx_model(ckpt: str, provider: str, use_cpu: bool = True,
 
 def open_onnx_model(ckpt: str, mode: str = None, cuda_device_id: Optional[int] = None) -> InferenceSession:
     """
-    Overview:
-        Open an ONNX model and load its ONNX runtime.
+    Open an ONNX model and create a configured inference session.
+
+    This function provides a high-level interface for loading ONNX models with
+    automatic provider selection and optimization. It supports environment variable
+    configuration for runtime provider selection.
 
-    :param ckpt: ONNX model file.
-    :param mode: Provider of the ONNX. Default is ``None`` which means the provider will be auto-detected,
-        see :func:`get_onnx_provider` for more details.
-    :return: A loaded ONNX runtime object.
+    :param ckpt: Path to the ONNX model file to load.
+    :type ckpt: str
+    :param mode: Provider of the ONNX runtime. Default is ``None`` which means the provider will be auto-detected,
+        see :func:`get_onnx_provider` for more details. Can also be controlled via ONNX_MODE environment variable.
+    :type mode: Optional[str]
+    :param cuda_device_id: Specific CUDA device ID to use for GPU inference. Only effective when using CUDA provider.
+    :type cuda_device_id: Optional[int]
+
+    :return: A loaded and configured ONNX inference session ready for prediction.
+    :rtype: InferenceSession
 
     .. note::
         When ``mode`` is set to ``None``, it will attempt to detect the environment variable ``ONNX_MODE``.
         This means you can decide which ONNX runtime to use by setting the environment variable. For example,
         on Linux, executing ``export ONNX_MODE=cpu`` will ignore any existing CUDA and force the model inference
         to run on CPU.
+
+    Example::
+        >>> # Load model with auto-detected provider
+        >>> session = open_onnx_model('model.onnx')
+
+        >>> # Force CPU execution
+        >>> session = open_onnx_model('model.onnx', mode='cpu')
+
+        >>> # Use specific CUDA device
+        >>> session = open_onnx_model('model.onnx', mode='gpu', cuda_device_id=1)
     """
     return _open_onnx_model(
         ckpt=ckpt,

From ae72f4cec343300048a96534d86f3d1ac5caef99 Mon Sep 17 00:00:00 2001
From: narugo1992 <narugo992@gmail.com>
Date: Fri, 10 Oct 2025 19:01:43 +0800
Subject: [PATCH 3/5] dev(narugo): add max parallel

---
 .github/workflows/test.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index ccfe28ded19..8f993533dc7 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -11,6 +11,7 @@ jobs:
     if: ${{ !contains(github.event.head_commit.message, 'ci skip') && !contains(github.event.head_commit.message, 'test skip') }}
     strategy:
       fail-fast: false
+      max-parallel: 3
       matrix:
         os:
           - 'ubuntu-latest'

From 7a5490176661d26f856d7361405b20739c00b392 Mon Sep 17 00:00:00 2001
From: narugo1992 <narugo992@gmail.com>
Date: Fri, 10 Oct 2025 19:03:01 +0800
Subject: [PATCH 4/5] dev(narugo): lower some versions

---
 .github/workflows/test.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 8f993533dc7..75066a25400 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -19,10 +19,10 @@ jobs:
           - 'macos-latest'
         python-version:
           - '3.8'
-          - '3.9'
-          - '3.10'
+#          - '3.9'
+#          - '3.10'
           - '3.11'
-          - '3.12'
+#          - '3.12'
           - '3.13'
         install:
           - 'full'

From ac7d0da26444b177450a02b6d920fe5bb496531b Mon Sep 17 00:00:00 2001
From: narugo1992 <narugo992@gmail.com>
Date: Sat, 11 Oct 2025 18:08:02 +0800
Subject: [PATCH 5/5] dev(narugo): use scope=class

---
 test/conftest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/conftest.py b/test/conftest.py
index 1b93873f6e3..5674b8b02e6 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -20,7 +20,7 @@ def text_aligner():
     return TextAligner().multiple_lines()
 
 
-@pytest.fixture(autouse=True, scope='module')
+@pytest.fixture(autouse=True, scope='class')
 def clean_hf_cache():
     try:
         yield