From ebf259a02201ac144789f40ebae417b736bdaa41 Mon Sep 17 00:00:00 2001
From: zz990099 <771647586@qq.com>
Date: Tue, 3 Jun 2025 21:37:30 +0800
Subject: [PATCH 1/6] Add yolov8 benchmarks

Signed-off-by: zz990099 <771647586@qq.com>
---
 .../detection_2d_yolov8/CMakeLists.txt        |   4 +
 .../benchmark/CMakeLists.txt                  |  53 ++++++++
 .../benchmark_detection_2d_yolov8.cpp         | 124 ++++++++++++++++++
 .../test/test_detection_2d_yolov8.cpp         |   4 +-
 4 files changed, 183 insertions(+), 2 deletions(-)
 create mode 100644 detection_2d/detection_2d_yolov8/benchmark/CMakeLists.txt
 create mode 100644 detection_2d/detection_2d_yolov8/benchmark/benchmark_detection_2d_yolov8.cpp

diff --git a/detection_2d/detection_2d_yolov8/CMakeLists.txt b/detection_2d/detection_2d_yolov8/CMakeLists.txt
index 512ec51..8d58d24 100644
--- a/detection_2d/detection_2d_yolov8/CMakeLists.txt
+++ b/detection_2d/detection_2d_yolov8/CMakeLists.txt
@@ -32,3 +32,7 @@ target_include_directories(${PROJECT_NAME} PUBLIC ${PROJECT_SOURCE_DIR}/include)
 if (BUILD_TESTING)
   add_subdirectory(test)
 endif()
+
+if (BUILD_BENCHMARK)
+  add_subdirectory(benchmark)
+endif()
diff --git a/detection_2d/detection_2d_yolov8/benchmark/CMakeLists.txt b/detection_2d/detection_2d_yolov8/benchmark/CMakeLists.txt
new file mode 100644
index 0000000..d1df83d
--- /dev/null
+++ b/detection_2d/detection_2d_yolov8/benchmark/CMakeLists.txt
@@ -0,0 +1,53 @@
+add_compile_options(-std=c++17)
+add_compile_options(-O3 -Wextra -Wdeprecated -fPIC)
+set(CMAKE_CXX_STANDARD 17)
+
+if(ENABLE_TENSORRT)
+  list(APPEND platform_core_packages trt_core)
+endif()
+
+if(ENABLE_RKNN)
+  list(APPEND platform_core_packages rknn_core)
+endif()
+
+if(ENABLE_ORT)
+  list(APPEND platform_core_packages ort_core)
+endif()
+
+find_package(glog REQUIRED)
+find_package(OpenCV REQUIRED)
+find_package(benchmark REQUIRED)
+
+set(source_file
+  benchmark_detection_2d_yolov8.cpp
+)
+
+include_directories(
+  include
+  ${OpenCV_INCLUDE_DIRS}
+)
+
+add_executable(benchmark_detection_2d_yolov8 ${source_file})
+
+target_link_libraries(benchmark_detection_2d_yolov8 PUBLIC
+  benchmark::benchmark
+  glog::glog
+  ${OpenCV_LIBS}
+  deploy_core
+  image_processing_utils
+  detection_2d_yolov8
+  benchmark_utils
+  ${platform_core_packages}
+)
+
+if(ENABLE_TENSORRT)
+  target_compile_definitions(benchmark_detection_2d_yolov8 PRIVATE ENABLE_TENSORRT)
+endif()
+
+if(ENABLE_RKNN)
+  target_compile_definitions(benchmark_detection_2d_yolov8 PRIVATE ENABLE_RKNN)
+endif()
+
+if(ENABLE_ORT)
+  target_compile_definitions(benchmark_detection_2d_yolov8 PRIVATE ENABLE_ORT)
+endif()
diff --git a/detection_2d/detection_2d_yolov8/benchmark/benchmark_detection_2d_yolov8.cpp b/detection_2d/detection_2d_yolov8/benchmark/benchmark_detection_2d_yolov8.cpp
new file mode 100644
index 0000000..edca386
--- /dev/null
+++ b/detection_2d/detection_2d_yolov8/benchmark/benchmark_detection_2d_yolov8.cpp
@@ -0,0 +1,124 @@
+#include <gtest/gtest.h>
+
+#include "detection_2d_util/detection_2d_util.h"
+#include "detection_2d_yolov8/yolov8.h"
+#include "benchmark_utils/detection_2d_benchmark_utils.hpp"
+
+using namespace inference_core;
+using namespace detection_2d;
+using namespace benchmark_utils;
+
+#ifdef ENABLE_TENSORRT
+
+#include "trt_core/trt_core.h"
+
+std::shared_ptr<BaseDetectionModel> CreateYolov8TensorRTModel()
+{
+  std::string                    model_path        = "/workspace/models/yolov8n.engine";
+  const int                      input_height      = 640;
+  const int                      input_width       = 640;
+  const int                      input_channels    = 3;
+  const int                      cls_number        = 80;
+  const std::vector<std::string> input_blobs_name  = {"images"};
+  const std::vector<std::string> output_blobs_name = {"output0"};
+
+  auto infer_core  = CreateTrtInferCore(model_path);
+  auto preprocess  = CreateCudaDetPreProcess();
+  auto postprocess = CreateYolov8PostProcessCpuOrigin(input_height, input_width, cls_number);
+
+  auto yolov8_model =
+      CreateYolov8DetectionModel(infer_core, preprocess, postprocess, input_height, input_width,
+                                 input_channels, cls_number, input_blobs_name, output_blobs_name);
+  return yolov8_model;
+}
+
+static void benchmark_detection_2d_yolov8_tensorrt_sync(benchmark::State &state)
+{
+  benchmark_detection_2d_sync(state, CreateYolov8TensorRTModel());
+}
+static void benchmark_detection_2d_yolov8_tensorrt_async(benchmark::State &state)
+{
+  benchmark_detection_2d_async(state, CreateYolov8TensorRTModel());
+}
+BENCHMARK(benchmark_detection_2d_yolov8_tensorrt_sync)->Arg(1000)->UseRealTime();
+BENCHMARK(benchmark_detection_2d_yolov8_tensorrt_async)->Arg(1000)->UseRealTime();
+
+#endif
+
+#ifdef ENABLE_ORT
+
+#include "ort_core/ort_core.h"
+
+std::shared_ptr<BaseDetectionModel> CreateYolov8OnnxRuntimeModel()
+{
+  std::string                    model_path        = "/workspace/models/yolov8n.onnx";
+  const int                      input_height      = 640;
+  const int                      input_width       = 640;
+  const int                      input_channels    = 3;
+  const int                      cls_number        = 80;
+  const std::vector<std::string> input_blobs_name  = {"images"};
+  const std::vector<std::string> output_blobs_name = {"output0"};
+
+  auto infer_core  = CreateOrtInferCore(model_path);
+  auto preprocess  = CreateCpuDetPreProcess({0, 0, 0}, {255, 255, 255}, true, true);
+  auto postprocess = CreateYolov8PostProcessCpuOrigin(input_height, input_width, cls_number);
+
+  auto yolov8_model =
+      CreateYolov8DetectionModel(infer_core, preprocess, postprocess, input_height, input_width,
+                                 input_channels, cls_number, input_blobs_name, output_blobs_name);
+  return yolov8_model;
+}
+
+static void benchmark_detection_2d_yolov8_onnxruntime_sync(benchmark::State &state)
+{
+  benchmark_detection_2d_sync(state, CreateYolov8OnnxRuntimeModel());
+}
+static void benchmark_detection_2d_yolov8_onnxruntime_async(benchmark::State &state)
+{
+  benchmark_detection_2d_async(state, CreateYolov8OnnxRuntimeModel());
+}
+BENCHMARK(benchmark_detection_2d_yolov8_onnxruntime_sync)->Arg(200)->UseRealTime();
+BENCHMARK(benchmark_detection_2d_yolov8_onnxruntime_async)->Arg(200)->UseRealTime();
+
+#endif
+
+#ifdef ENABLE_RKNN
+
+#include "rknn_core/rknn_core.h"
+
+std::shared_ptr<BaseDetectionModel> CreateYolov8RknnModel()
+{
+  std::string                    model_path       = "/workspace/models/yolov8n_divide_opset11.rknn";
+  const int                      input_height     = 640;
+  const int                      input_width      = 640;
+  const int                      input_channels   = 3;
+  const int                      cls_number       = 80;
+  const std::vector<std::string> input_blobs_name = {"images"};
+  const std::vector<std::string> output_blobs_name = {"318", "onnx::ReduceSum_326", "331",
+                                                      "338", "onnx::ReduceSum_346", "350",
+                                                      "357", "onnx::ReduceSum_365", "369"};
+
+  auto infer_core  = CreateRknnInferCore(model_path, {{"images", RknnInputTensorType::RK_UINT8}});
+  auto preprocess  = CreateCpuDetPreProcess({0, 0, 0}, {1, 1, 1}, false, false);
+  auto postprocess = CreateYolov8PostProcessCpuDivide(input_height, input_width, cls_number);
+
+  auto yolov8_model =
+      CreateYolov8DetectionModel(infer_core, preprocess, postprocess, input_height, input_width,
+                                 input_channels, cls_number, input_blobs_name, output_blobs_name);
+  return yolov8_model;
+}
+
+static void benchmark_detection_2d_yolov8_rknn_sync(benchmark::State &state)
+{
+  benchmark_detection_2d_sync(state, CreateYolov8RknnModel());
+}
+static void benchmark_detection_2d_yolov8_rknn_async(benchmark::State &state)
+{
+  benchmark_detection_2d_async(state, CreateYolov8RknnModel());
+}
+BENCHMARK(benchmark_detection_2d_yolov8_rknn_sync)->Arg(500)->UseRealTime();
+BENCHMARK(benchmark_detection_2d_yolov8_rknn_async)->Arg(500)->UseRealTime();
+
+#endif
+
+BENCHMARK_MAIN();
diff --git a/detection_2d/detection_2d_yolov8/test/test_detection_2d_yolov8.cpp b/detection_2d/detection_2d_yolov8/test/test_detection_2d_yolov8.cpp
index 66e9369..af37d6e 100644
--- a/detection_2d/detection_2d_yolov8/test/test_detection_2d_yolov8.cpp
+++ b/detection_2d/detection_2d_yolov8/test/test_detection_2d_yolov8.cpp
@@ -116,8 +116,8 @@ class Yolov8_Rknn_Fixture : public BaseYolov8Fixture {
     const int                      cls_number     = 80;
     const std::vector<std::string> input_blobs_name  = {"images"};
     const std::vector<std::string> output_blobs_name = {"318", "onnx::ReduceSum_326", "331",
-                                                      "338", "onnx::ReduceSum_346", "350",
-                                                      "357", "onnx::ReduceSum_365", "369"};
+                                                        "338", "onnx::ReduceSum_346", "350",
+                                                        "357", "onnx::ReduceSum_365", "369"};
 
     auto infer_core  = CreateRknnInferCore(model_path, {{"images", RknnInputTensorType::RK_UINT8}});
     auto preprocess  = CreateCpuDetPreProcess({0, 0, 0}, {1, 1, 1}, false, false);

From dc835b626d76e2b8eaf14bc0dbd4ae23c49c87e1 Mon Sep 17 00:00:00 2001
From: zz990099 <771647586@qq.com>
Date: Tue, 3 Jun 2025 22:16:22 +0800
Subject: [PATCH 2/6] Add rt_detr benchmarks

Signed-off-by: zz990099 <771647586@qq.com>
---
 .../detection_2d_rt_detr/CMakeLists.txt       |  7 +-
 .../benchmark/CMakeLists.txt                  | 53 ++++++++++++
 .../benchmark_detection_2d_rt_detr.cpp        | 83 +++++++++++++++++++
 3 files changed, 140 insertions(+), 3 deletions(-)
 create mode 100644 detection_2d/detection_2d_rt_detr/benchmark/CMakeLists.txt
 create mode 100644 detection_2d/detection_2d_rt_detr/benchmark/benchmark_detection_2d_rt_detr.cpp

diff --git a/detection_2d/detection_2d_rt_detr/CMakeLists.txt b/detection_2d/detection_2d_rt_detr/CMakeLists.txt
index 10cc562..142c8e9 100644
--- a/detection_2d/detection_2d_rt_detr/CMakeLists.txt
+++ b/detection_2d/detection_2d_rt_detr/CMakeLists.txt
@@ -5,12 +5,9 @@ add_compile_options(-std=c++17)
 add_compile_options(-O3 -Wextra -Wdeprecated -fPIC)
 set(CMAKE_CXX_STANDARD 17)
 
-
 find_package(OpenCV REQUIRED)
 find_package(glog REQUIRED)
 
-
-
 include_directories(
   include
   ${OpenCV_INCLUDE_DIRS}
@@ -35,3 +32,7 @@ target_include_directories(${PROJECT_NAME} PUBLIC ${PROJECT_SOURCE_DIR}/include)
 if (BUILD_TESTING)
   add_subdirectory(test)
 endif()
+
+if (BUILD_BENCHMARK)
+  add_subdirectory(benchmark)
+endif()
diff --git a/detection_2d/detection_2d_rt_detr/benchmark/CMakeLists.txt b/detection_2d/detection_2d_rt_detr/benchmark/CMakeLists.txt
new file mode 100644
index 0000000..6d47fb3
--- /dev/null
+++ b/detection_2d/detection_2d_rt_detr/benchmark/CMakeLists.txt
@@ -0,0 +1,53 @@
+add_compile_options(-std=c++17)
+add_compile_options(-O3 -Wextra -Wdeprecated -fPIC)
+set(CMAKE_CXX_STANDARD 17)
+
+if(ENABLE_TENSORRT)
+  list(APPEND platform_core_packages trt_core)
+endif()
+
+if(ENABLE_RKNN)
+  list(APPEND platform_core_packages rknn_core)
+endif()
+
+if(ENABLE_ORT)
+  list(APPEND platform_core_packages ort_core)
+endif()
+
+find_package(glog REQUIRED)
+find_package(OpenCV REQUIRED)
+find_package(benchmark REQUIRED)
+
+set(source_file
+  benchmark_detection_2d_rt_detr.cpp
+)
+
+include_directories(
+  include
+  ${OpenCV_INCLUDE_DIRS}
+)
+
+add_executable(benchmark_detection_2d_rt_detr ${source_file})
+
+target_link_libraries(benchmark_detection_2d_rt_detr PUBLIC
+  benchmark::benchmark
+  glog::glog
+  ${OpenCV_LIBS}
+  deploy_core
+  image_processing_utils
+  detection_2d_rt_detr
+  benchmark_utils
+  ${platform_core_packages}
+)
+
+if(ENABLE_TENSORRT)
+  target_compile_definitions(benchmark_detection_2d_rt_detr PRIVATE ENABLE_TENSORRT)
+endif()
+
+if(ENABLE_RKNN)
+  target_compile_definitions(benchmark_detection_2d_rt_detr PRIVATE ENABLE_RKNN)
+endif()
+
+if(ENABLE_ORT)
+  target_compile_definitions(benchmark_detection_2d_rt_detr PRIVATE ENABLE_ORT)
+endif()
diff --git a/detection_2d/detection_2d_rt_detr/benchmark/benchmark_detection_2d_rt_detr.cpp b/detection_2d/detection_2d_rt_detr/benchmark/benchmark_detection_2d_rt_detr.cpp
new file mode 100644
index 0000000..6a545d1
--- /dev/null
+++ b/detection_2d/detection_2d_rt_detr/benchmark/benchmark_detection_2d_rt_detr.cpp
@@ -0,0 +1,83 @@
+#include <gtest/gtest.h>
+
+#include "detection_2d_util/detection_2d_util.h"
+#include "detection_2d_rt_detr/rt_detr.h"
+#include "benchmark_utils/detection_2d_benchmark_utils.hpp"
+
+using namespace inference_core;
+using namespace detection_2d;
+using namespace benchmark_utils;
+
+#ifdef ENABLE_TENSORRT
+
+#include "trt_core/trt_core.h"
+
+std::shared_ptr<BaseDetectionModel> CreateRTDetrTensorRTModel()
+{
+  std::string                    model_path   = "/workspace/models/rt_detr_v2_single_input.engine";
+  const int                      input_height = 640;
+  const int                      input_width  = 640;
+  const int                      input_channels    = 3;
+  const int                      cls_number        = 80;
+  const std::vector<std::string> input_blobs_name  = {"images"};
+  const std::vector<std::string> output_blobs_name = {"labels", "boxes", "scores"};
+
+  auto infer_core = CreateTrtInferCore(model_path);
+  auto preprocess = CreateCudaDetPreProcess();
+
+  auto rt_detr_model =
+      CreateRTDetrDetectionModel(infer_core, preprocess, input_height, input_width, input_channels,
+                                 cls_number, input_blobs_name, output_blobs_name);
+  return rt_detr_model;
+}
+
+static void benchmark_detection_2d_rt_detr_tensorrt_sync(benchmark::State &state)
+{
+  benchmark_detection_2d_sync(state, CreateRTDetrTensorRTModel());
+}
+static void benchmark_detection_2d_rt_detr_tensorrt_async(benchmark::State &state)
+{
+  benchmark_detection_2d_async(state, CreateRTDetrTensorRTModel());
+}
+BENCHMARK(benchmark_detection_2d_rt_detr_tensorrt_sync)->Arg(500)->UseRealTime();
+BENCHMARK(benchmark_detection_2d_rt_detr_tensorrt_async)->Arg(500)->UseRealTime();
+
+#endif
+
+#ifdef ENABLE_ORT
+
+#include "ort_core/ort_core.h"
+
+std::shared_ptr<BaseDetectionModel> CreateRTDetrOnnxRuntimeModel()
+{
+  std::string                    model_path     = "/workspace/models/rt_detr_v2_single_input.onnx";
+  const int                      input_height   = 640;
+  const int                      input_width    = 640;
+  const int                      input_channels = 3;
+  const int                      cls_number     = 80;
+  const std::vector<std::string> input_blobs_name  = {"images"};
+  const std::vector<std::string> output_blobs_name = {"labels", "boxes", "scores"};
+
+  auto infer_core = CreateOrtInferCore(model_path);
+  auto preprocess = CreateCpuDetPreProcess({0, 0, 0}, {255, 255, 255}, true, true);
+
+  auto rt_detr_model =
+      CreateRTDetrDetectionModel(infer_core, preprocess, input_height, input_width, input_channels,
+                                 cls_number, input_blobs_name, output_blobs_name);
+  return rt_detr_model;
+}
+
+static void benchmark_detection_2d_rt_detr_onnxruntime_sync(benchmark::State &state)
+{
+  benchmark_detection_2d_sync(state, CreateRTDetrOnnxRuntimeModel());
+}
+static void benchmark_detection_2d_rt_detr_onnxruntime_async(benchmark::State &state)
+{
+  benchmark_detection_2d_async(state, CreateRTDetrOnnxRuntimeModel());
+}
+BENCHMARK(benchmark_detection_2d_rt_detr_onnxruntime_sync)->Arg(100)->UseRealTime();
+BENCHMARK(benchmark_detection_2d_rt_detr_onnxruntime_async)->Arg(100)->UseRealTime();
+
+#endif
+
+BENCHMARK_MAIN();

From 9d9fcd9c079b0410190e6f637f0d78584121df65 Mon Sep 17 00:00:00 2001
From: zz990099 <771647586@qq.com>
Date: Tue, 3 Jun 2025 22:16:46 +0800
Subject: [PATCH 3/6] Add sam benchmarks

Signed-off-by: zz990099 <771647586@qq.com>
---
 sam/sam_mobilesam/CMakeLists.txt              |   4 +
 sam/sam_mobilesam/benchmark/CMakeLists.txt    |  53 +++++
 .../benchmark/benchmark_sam_mobilesam.cpp     | 194 ++++++++++++++++++
 3 files changed, 251 insertions(+)
 create mode 100644 sam/sam_mobilesam/benchmark/CMakeLists.txt
 create mode 100644 sam/sam_mobilesam/benchmark/benchmark_sam_mobilesam.cpp

diff --git a/sam/sam_mobilesam/CMakeLists.txt b/sam/sam_mobilesam/CMakeLists.txt
index 66fdf30..ac8dfe4 100644
--- a/sam/sam_mobilesam/CMakeLists.txt
+++ b/sam/sam_mobilesam/CMakeLists.txt
@@ -33,3 +33,7 @@ target_include_directories(${PROJECT_NAME} PUBLIC ${PROJECT_SOURCE_DIR}/include)
 if (BUILD_TESTING)
   add_subdirectory(test)
 endif()
+
+if (BUILD_BENCHMARK)
+  add_subdirectory(benchmark)
+endif()
diff --git a/sam/sam_mobilesam/benchmark/CMakeLists.txt b/sam/sam_mobilesam/benchmark/CMakeLists.txt
new file mode 100644
index 0000000..7baea5a
--- /dev/null
+++ b/sam/sam_mobilesam/benchmark/CMakeLists.txt
@@ -0,0 +1,53 @@
+add_compile_options(-std=c++17)
+add_compile_options(-O3 -Wextra -Wdeprecated -fPIC)
+set(CMAKE_CXX_STANDARD 17)
+
+if(ENABLE_TENSORRT)
+  list(APPEND platform_core_packages trt_core)
+endif()
+
+if(ENABLE_RKNN)
+  list(APPEND platform_core_packages rknn_core)
+endif()
+
+if(ENABLE_ORT)
+  list(APPEND platform_core_packages ort_core)
+endif()
+
+find_package(glog REQUIRED)
+find_package(OpenCV REQUIRED)
+find_package(benchmark REQUIRED)
+
+set(source_file
+  benchmark_sam_mobilesam.cpp
+)
+
+include_directories(
+  include
+  ${OpenCV_INCLUDE_DIRS}
+)
+
+add_executable(benchmark_sam_mobilesam ${source_file})
+
+target_link_libraries(benchmark_sam_mobilesam PUBLIC
+  benchmark::benchmark
+  glog::glog
+  ${OpenCV_LIBS}
+  deploy_core
+  image_processing_utils
+  sam_mobilesam
+  benchmark_utils
+  ${platform_core_packages}
+)
+
+if(ENABLE_TENSORRT)
+  target_compile_definitions(benchmark_sam_mobilesam PRIVATE ENABLE_TENSORRT)
+endif()
+
+if(ENABLE_RKNN)
+  target_compile_definitions(benchmark_sam_mobilesam PRIVATE ENABLE_RKNN)
+endif()
+
+if(ENABLE_ORT)
+  target_compile_definitions(benchmark_sam_mobilesam PRIVATE ENABLE_ORT)
+endif()
diff --git a/sam/sam_mobilesam/benchmark/benchmark_sam_mobilesam.cpp b/sam/sam_mobilesam/benchmark/benchmark_sam_mobilesam.cpp
new file mode 100644
index 0000000..2cc6643
--- /dev/null
+++ b/sam/sam_mobilesam/benchmark/benchmark_sam_mobilesam.cpp
@@ -0,0 +1,194 @@
+#include <gtest/gtest.h>
+
+#include "detection_2d_util/detection_2d_util.h"
+#include "sam_mobilesam/mobilesam.h"
+#include "benchmark_utils/sam_benchmark_utils.hpp"
+
+using namespace inference_core;
+using namespace detection_2d;
+using namespace sam;
+using namespace benchmark_utils;
+
+#ifdef ENABLE_TENSORRT
+
+#include "trt_core/trt_core.h"
+
+std::shared_ptr<BaseSamModel> CreateSAMTensorRTModel(const std::string &image_encoder_model_path)
+{
+  auto box_decoder_model_path   = "/workspace/models/modified_mobile_sam_box.engine";
+  auto point_decoder_model_path = "/workspace/models/modified_mobile_sam_point.engine";
+
+  auto image_encoder = CreateTrtInferCore(image_encoder_model_path);
+
+  const int SAM_MAX_BOX    = 1;
+  const int SAM_MAX_POINTS = 8;
+
+  auto box_decoder_factory =
+      CreateTrtInferCoreFactory(box_decoder_model_path,
+                                {
+                                    {"image_embeddings", {1, 256, 64, 64}},
+                                    {"boxes", {1, SAM_MAX_BOX, 4}},
+                                    {"mask_input", {1, 1, 256, 256}},
+                                    {"has_mask_input", {1}},
+                                },
+                                {{"masks", {1, 1, 256, 256}}, {"scores", {1, 1}}});
+
+  auto point_decoder_factory =
+      CreateTrtInferCoreFactory(point_decoder_model_path,
+                                {
+                                    {"image_embeddings", {1, 256, 64, 64}},
+                                    {"point_coords", {1, SAM_MAX_POINTS, 2}},
+                                    {"point_labels", {1, SAM_MAX_POINTS}},
+                                    {"mask_input", {1, 1, 256, 256}},
+                                    {"has_mask_input", {1}},
+                                },
+                                {{"masks", {1, 1, 256, 256}}, {"scores", {1, 1}}});
+
+  auto image_preprocess_factory = CreateCudaDetPreProcessFactory();
+
+  return CreateMobileSamModel(image_encoder, point_decoder_factory->Create(),
+                              box_decoder_factory->Create(), image_preprocess_factory->Create());
+}
+
+// benchmark sam_mobilesam
+static void benchmark_sam_mobilesam_tensorrt_sync(benchmark::State &state)
+{
+  auto mobilesam_image_encoder_model_path = "/workspace/models/mobile_sam_encoder.engine";
+  benchmark_sam_sync(state, CreateSAMTensorRTModel(mobilesam_image_encoder_model_path));
+}
+static void benchmark_sam_mobilesam_tensorrt_async(benchmark::State &state)
+{
+  auto mobilesam_image_encoder_model_path = "/workspace/models/mobile_sam_encoder.engine";
+  benchmark_sam_async(state, CreateSAMTensorRTModel(mobilesam_image_encoder_model_path));
+}
+BENCHMARK(benchmark_sam_mobilesam_tensorrt_sync)->Arg(100)->UseRealTime();
+BENCHMARK(benchmark_sam_mobilesam_tensorrt_async)->Arg(100)->UseRealTime();
+
+// benchmark sam_nanosam
+static void benchmark_sam_nanosam_tensorrt_sync(benchmark::State &state)
+{
+  auto nanosam_image_encoder_model_path = "/workspace/models/nanosam_image_encoder_opset11.engine";
+  benchmark_sam_sync(state, CreateSAMTensorRTModel(nanosam_image_encoder_model_path));
+}
+static void benchmark_sam_nanosam_tensorrt_async(benchmark::State &state)
+{
+  auto nanosam_image_encoder_model_path = "/workspace/models/nanosam_image_encoder_opset11.engine";
+  benchmark_sam_async(state, CreateSAMTensorRTModel(nanosam_image_encoder_model_path));
+}
+BENCHMARK(benchmark_sam_nanosam_tensorrt_sync)->Arg(200)->UseRealTime();
+BENCHMARK(benchmark_sam_nanosam_tensorrt_async)->Arg(200)->UseRealTime();
+
+#endif
+
+#ifdef ENABLE_ORT
+
+#include "ort_core/ort_core.h"
+
+std::shared_ptr<BaseSamModel> CreateSAMOnnxRuntimeModel(const std::string &image_encoder_model_path)
+{
+  auto box_decoder_model_path   = "/workspace/models/modified_mobile_sam_box.onnx";
+  auto point_decoder_model_path = "/workspace/models/modified_mobile_sam_point.onnx";
+
+  auto image_encoder = CreateOrtInferCore(image_encoder_model_path);
+
+  const int SAM_MAX_BOX    = 1;
+  const int SAM_MAX_POINTS = 8;
+
+  auto box_decoder_factory =
+      CreateOrtInferCoreFactory(box_decoder_model_path,
+                                {
+                                    {"image_embeddings", {1, 256, 64, 64}},
+                                    {"boxes", {1, SAM_MAX_BOX, 4}},
+                                    {"mask_input", {1, 1, 256, 256}},
+                                    {"has_mask_input", {1}},
+                                },
+                                {{"masks", {1, 1, 256, 256}}, {"scores", {1, 1}}});
+
+  auto point_decoder_factory =
+      CreateOrtInferCoreFactory(point_decoder_model_path,
+                                {
+                                    {"image_embeddings", {1, 256, 64, 64}},
+                                    {"point_coords", {1, SAM_MAX_POINTS, 2}},
+                                    {"point_labels", {1, SAM_MAX_POINTS}},
+                                    {"mask_input", {1, 1, 256, 256}},
+                                    {"has_mask_input", {1}},
+                                },
+                                {{"masks", {1, 1, 256, 256}}, {"scores", {1, 1}}});
+
+  auto image_preprocess_factory =
+      CreateCpuDetPreProcessFactory({0, 0, 0}, {255, 255, 255}, true, true);
+
+  return CreateMobileSamModel(image_encoder, point_decoder_factory->Create(),
+                              box_decoder_factory->Create(), image_preprocess_factory->Create());
+}
+
+// benchmark sam_mobilesam
+static void benchmark_sam_mobilesam_onnxruntime_sync(benchmark::State &state)
+{
+  auto mobilesam_image_encoder_model_path = "/workspace/models/mobile_sam_encoder.onnx";
+  benchmark_sam_sync(state, CreateSAMOnnxRuntimeModel(mobilesam_image_encoder_model_path));
+}
+static void benchmark_sam_mobilesam_onnxruntime_async(benchmark::State &state)
+{
+  auto mobilesam_image_encoder_model_path = "/workspace/models/mobile_sam_encoder.onnx";
+  benchmark_sam_async(state, CreateSAMOnnxRuntimeModel(mobilesam_image_encoder_model_path));
+}
+BENCHMARK(benchmark_sam_mobilesam_onnxruntime_sync)->Arg(20)->UseRealTime();
+BENCHMARK(benchmark_sam_mobilesam_onnxruntime_async)->Arg(20)->UseRealTime();
+
+// benchmark sam_nanosam
+static void benchmark_sam_nanosam_onnxruntime_sync(benchmark::State &state)
+{
+  auto nanosam_image_encoder_model_path = "/workspace/models/nanosam_image_encoder_opset11.onnx";
+  benchmark_sam_sync(state, CreateSAMOnnxRuntimeModel(nanosam_image_encoder_model_path));
+}
+static void benchmark_sam_nanosam_onnxruntime_async(benchmark::State &state)
+{
+  auto nanosam_image_encoder_model_path = "/workspace/models/nanosam_image_encoder_opset11.onnx";
+  benchmark_sam_async(state, CreateSAMOnnxRuntimeModel(nanosam_image_encoder_model_path));
+}
+BENCHMARK(benchmark_sam_nanosam_onnxruntime_sync)->Arg(50)->UseRealTime();
+BENCHMARK(benchmark_sam_nanosam_onnxruntime_async)->Arg(50)->UseRealTime();
+
+#endif
+
+#ifdef ENABLE_RKNN
+
+#include "rknn_core/rknn_core.h"
+
+std::shared_ptr<BaseSamModel> CreateSAMRknnModel(const std::string &image_encoder_model_path)
+{
+  auto box_decoder_model_path   = "/workspace/models/modified_mobile_sam_box.rknn";
+  auto point_decoder_model_path = "/workspace/models/modified_mobile_sam_point.rknn";
+
+  auto nanosam_image_encoder = CreateRknnInferCore(
+      nanosam_image_encoder_model_path, {{"images", RknnInputTensorType::RK_UINT8}}, 5, 2);
+
+  auto box_decoder_factory = CreateRknnInferCoreFactory(box_decoder_model_path, {}, 5, 2);
+
+  auto point_decoder_factory = CreateRknnInferCoreFactory(point_decoder_model_path, {}, 5, 2);
+
+  auto image_preprocess_factory =
+      CreateCpuDetPreProcessFactory({0, 0, 0}, {255, 255, 255}, false, false);
+
+  return CreateMobileSamModel(nanosam_image_encoder, point_decoder_factory->Create(),
+                              box_decoder_factory->Create(), image_preprocess_factory->Create());
+}
+
+// benchmark sam_nanosam
+static void benchmark_sam_nanosam_rknn_sync(benchmark::State &state)
+{
+  auto nanosam_image_encoder_model_path = "/workspace/models/nanosam_image_encoder_opset11.rknn";
+  benchmark_sam_sync(state, CreateSAMRknnModel(nanosam_image_encoder_model_path));
+}
+static void benchmark_sam_nanosam_rknn_async(benchmark::State &state)
+{
+  auto nanosam_image_encoder_model_path = "/workspace/models/nanosam_image_encoder_opset11.rknn";
+  benchmark_sam_async(state, CreateSAMRknnModel(nanosam_image_encoder_model_path));
+}
+BENCHMARK(benchmark_sam_nanosam_rknn_sync)->Arg(100)->UseRealTime();
+BENCHMARK(benchmark_sam_nanosam_rknn_async)->Arg(100)->UseRealTime();
+
+#endif
+
+BENCHMARK_MAIN();

From d3cb83ad59973e5b70eb1d366e1a18fc0eba405e Mon Sep 17 00:00:00 2001
From: zz990099 <771647586@qq.com>
Date: Wed, 4 Jun 2025 21:26:06 +0800
Subject: [PATCH 4/6] Update easy_deploy_tool submodule

Signed-off-by: zz990099 <771647586@qq.com>
---
 easy_deploy_tool | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/easy_deploy_tool b/easy_deploy_tool
index 4a01290..a8dad8d 160000
--- a/easy_deploy_tool
+++ b/easy_deploy_tool
@@ -1 +1 @@
-Subproject commit 4a012904f39be0c35f0da9921e5c761f3ef1e2bb
+Subproject commit a8dad8d3a09fa0e136ae670f7a7c3f82468360d1

From 26da144a56f9c9abe5d01f8b46f80ffd142106f7 Mon Sep 17 00:00:00 2001
From: zz990099 <771647586@qq.com>
Date: Wed, 4 Jun 2025 22:10:42 +0800
Subject: [PATCH 5/6] fix mobilesam benchmarks issue

Signed-off-by: zz990099 <771647586@qq.com>
---
 sam/sam_mobilesam/benchmark/benchmark_sam_mobilesam.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sam/sam_mobilesam/benchmark/benchmark_sam_mobilesam.cpp b/sam/sam_mobilesam/benchmark/benchmark_sam_mobilesam.cpp
index 2cc6643..fbfa1a5 100644
--- a/sam/sam_mobilesam/benchmark/benchmark_sam_mobilesam.cpp
+++ b/sam/sam_mobilesam/benchmark/benchmark_sam_mobilesam.cpp
@@ -162,7 +162,7 @@ std::shared_ptr<BaseSamModel> CreateSAMRknnModel(const std::string &image_encode
   auto point_decoder_model_path = "/workspace/models/modified_mobile_sam_point.rknn";
 
   auto nanosam_image_encoder = CreateRknnInferCore(
-      nanosam_image_encoder_model_path, {{"images", RknnInputTensorType::RK_UINT8}}, 5, 2);
+      image_encoder_model_path, {{"images", RknnInputTensorType::RK_UINT8}}, 5, 2);
 
   auto box_decoder_factory = CreateRknnInferCoreFactory(box_decoder_model_path, {}, 5, 2);
 
@@ -186,7 +186,7 @@ static void benchmark_sam_nanosam_rknn_async(benchmark::State &state)
   auto nanosam_image_encoder_model_path = "/workspace/models/nanosam_image_encoder_opset11.rknn";
   benchmark_sam_async(state, CreateSAMRknnModel(nanosam_image_encoder_model_path));
 }
-BENCHMARK(benchmark_sam_nanosam_rknn_sync)->Arg(100)->UseRealTime();
+BENCHMARK(benchmark_sam_nanosam_rknn_sync)->Arg(50)->UseRealTime();
 BENCHMARK(benchmark_sam_nanosam_rknn_async)->Arg(100)->UseRealTime();
 
 #endif

From c024dec4b0daf7cbef3d8f4df83362478edf909e Mon Sep 17 00:00:00 2001
From: zz990099 <771647586@qq.com>
Date: Wed, 4 Jun 2025 22:10:51 +0800
Subject: [PATCH 6/6] Update submodule

Signed-off-by: zz990099 <771647586@qq.com>
---
 easy_deploy_tool | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/easy_deploy_tool b/easy_deploy_tool
index a8dad8d..6c254c6 160000
--- a/easy_deploy_tool
+++ b/easy_deploy_tool
@@ -1 +1 @@
-Subproject commit a8dad8d3a09fa0e136ae670f7a7c3f82468360d1
+Subproject commit 6c254c6d53e429513d46924f96fb1e543364497f