zz990099 · zz990099 · Jun 4, 2025 · Jun 3, 2025 · Jun 3, 2025 · Jun 3, 2025
diff --git a/detection_2d/detection_2d_rt_detr/CMakeLists.txt b/detection_2d/detection_2d_rt_detr/CMakeLists.txt
@@ -5,12 +5,9 @@ add_compile_options(-std=c++17)
 add_compile_options(-O3 -Wextra -Wdeprecated -fPIC)
 set(CMAKE_CXX_STANDARD 17)
 
-
 find_package(OpenCV REQUIRED)
 find_package(glog REQUIRED)
 
-
-
 include_directories(
   include
   ${OpenCV_INCLUDE_DIRS}
@@ -35,3 +32,7 @@ target_include_directories(${PROJECT_NAME} PUBLIC ${PROJECT_SOURCE_DIR}/include)
 if (BUILD_TESTING)
   add_subdirectory(test)
 endif()
+
+if (BUILD_BENCHMARK)
+  add_subdirectory(benchmark)
+endif()
diff --git a/detection_2d/detection_2d_rt_detr/benchmark/CMakeLists.txt b/detection_2d/detection_2d_rt_detr/benchmark/CMakeLists.txt
@@ -0,0 +1,53 @@
+add_compile_options(-std=c++17)
+add_compile_options(-O3 -Wextra -Wdeprecated -fPIC)
+set(CMAKE_CXX_STANDARD 17)
+
+if(ENABLE_TENSORRT)
+  list(APPEND platform_core_packages trt_core)
+endif()
+
+if(ENABLE_RKNN)
+  list(APPEND platform_core_packages rknn_core)
+endif()
+
+if(ENABLE_ORT)
+  list(APPEND platform_core_packages ort_core)
+endif()
+
+find_package(glog REQUIRED)
+find_package(OpenCV REQUIRED)
+find_package(benchmark REQUIRED)
+
+set(source_file
+  benchmark_detection_2d_rt_detr.cpp
+)
+
+include_directories(
+  include
+  ${OpenCV_INCLUDE_DIRS}
+)
+
+add_executable(benchmark_detection_2d_rt_detr ${source_file})
+
+target_link_libraries(benchmark_detection_2d_rt_detr PUBLIC
+  benchmark::benchmark
+  glog::glog
+  ${OpenCV_LIBS}
+  deploy_core
+  image_processing_utils
+  detection_2d_rt_detr
+  benchmark_utils
+  ${platform_core_packages}
+)
+
+if(ENABLE_TENSORRT)
+  target_compile_definitions(benchmark_detection_2d_rt_detr PRIVATE ENABLE_TENSORRT)
+endif()
+
+if(ENABLE_RKNN)
+  target_compile_definitions(benchmark_detection_2d_rt_detr PRIVATE ENABLE_RKNN)
+endif()
+
+if(ENABLE_ORT)
+  target_compile_definitions(benchmark_detection_2d_rt_detr PRIVATE ENABLE_ORT)
+endif()
diff --git a/detection_2d/detection_2d_rt_detr/benchmark/benchmark_detection_2d_rt_detr.cpp b/detection_2d/detection_2d_rt_detr/benchmark/benchmark_detection_2d_rt_detr.cpp
@@ -0,0 +1,83 @@
+#include <gtest/gtest.h>
+
+#include "detection_2d_util/detection_2d_util.h"
+#include "detection_2d_rt_detr/rt_detr.h"
+#include "benchmark_utils/detection_2d_benchmark_utils.hpp"
+
+using namespace inference_core;
+using namespace detection_2d;
+using namespace benchmark_utils;
+
+#ifdef ENABLE_TENSORRT
+
+#include "trt_core/trt_core.h"
+
+std::shared_ptr<BaseDetectionModel> CreateRTDetrTensorRTModel()
+{
+  std::string                    model_path   = "/workspace/models/rt_detr_v2_single_input.engine";
+  const int                      input_height = 640;
+  const int                      input_width  = 640;
+  const int                      input_channels    = 3;
+  const int                      cls_number        = 80;
+  const std::vector<std::string> input_blobs_name  = {"images"};
+  const std::vector<std::string> output_blobs_name = {"labels", "boxes", "scores"};
+
+  auto infer_core = CreateTrtInferCore(model_path);
+  auto preprocess = CreateCudaDetPreProcess();
+
+  auto rt_detr_model =
+      CreateRTDetrDetectionModel(infer_core, preprocess, input_height, input_width, input_channels,
+                                 cls_number, input_blobs_name, output_blobs_name);
+  return rt_detr_model;
+}
+
+static void benchmark_detection_2d_rt_detr_tensorrt_sync(benchmark::State &state)
+{
+  benchmark_detection_2d_sync(state, CreateRTDetrTensorRTModel());
+}
+static void benchmark_detection_2d_rt_detr_tensorrt_async(benchmark::State &state)
+{
+  benchmark_detection_2d_async(state, CreateRTDetrTensorRTModel());
+}
+BENCHMARK(benchmark_detection_2d_rt_detr_tensorrt_sync)->Arg(500)->UseRealTime();
+BENCHMARK(benchmark_detection_2d_rt_detr_tensorrt_async)->Arg(500)->UseRealTime();
+
+#endif
+
+#ifdef ENABLE_ORT
+
+#include "ort_core/ort_core.h"
+
+std::shared_ptr<BaseDetectionModel> CreateRTDetrOnnxRuntimeModel()
+{
+  std::string                    model_path     = "/workspace/models/rt_detr_v2_single_input.onnx";
+  const int                      input_height   = 640;
+  const int                      input_width    = 640;
+  const int                      input_channels = 3;
+  const int                      cls_number     = 80;
+  const std::vector<std::string> input_blobs_name  = {"images"};
+  const std::vector<std::string> output_blobs_name = {"labels", "boxes", "scores"};
+
+  auto infer_core = CreateOrtInferCore(model_path);
+  auto preprocess = CreateCpuDetPreProcess({0, 0, 0}, {255, 255, 255}, true, true);
+
+  auto rt_detr_model =
+      CreateRTDetrDetectionModel(infer_core, preprocess, input_height, input_width, input_channels,
+                                 cls_number, input_blobs_name, output_blobs_name);
+  return rt_detr_model;
+}
+
+static void benchmark_detection_2d_rt_detr_onnxruntime_sync(benchmark::State &state)
+{
+  benchmark_detection_2d_sync(state, CreateRTDetrOnnxRuntimeModel());
+}
+static void benchmark_detection_2d_rt_detr_onnxruntime_async(benchmark::State &state)
+{
+  benchmark_detection_2d_async(state, CreateRTDetrOnnxRuntimeModel());
+}
+BENCHMARK(benchmark_detection_2d_rt_detr_onnxruntime_sync)->Arg(100)->UseRealTime();
+BENCHMARK(benchmark_detection_2d_rt_detr_onnxruntime_async)->Arg(100)->UseRealTime();
+
+#endif
+
+BENCHMARK_MAIN();
diff --git a/detection_2d/detection_2d_yolov8/CMakeLists.txt b/detection_2d/detection_2d_yolov8/CMakeLists.txt
@@ -32,3 +32,7 @@ target_include_directories(${PROJECT_NAME} PUBLIC ${PROJECT_SOURCE_DIR}/include)
 if (BUILD_TESTING)
   add_subdirectory(test)
 endif()
+
+if (BUILD_BENCHMARK)
+  add_subdirectory(benchmark)
+endif()
diff --git a/detection_2d/detection_2d_yolov8/benchmark/CMakeLists.txt b/detection_2d/detection_2d_yolov8/benchmark/CMakeLists.txt
@@ -0,0 +1,53 @@
+add_compile_options(-std=c++17)
+add_compile_options(-O3 -Wextra -Wdeprecated -fPIC)
+set(CMAKE_CXX_STANDARD 17)
+
+if(ENABLE_TENSORRT)
+  list(APPEND platform_core_packages trt_core)
+endif()
+
+if(ENABLE_RKNN)
+  list(APPEND platform_core_packages rknn_core)
+endif()
+
+if(ENABLE_ORT)
+  list(APPEND platform_core_packages ort_core)
+endif()
+
+find_package(glog REQUIRED)
+find_package(OpenCV REQUIRED)
+find_package(benchmark REQUIRED)
+
+set(source_file
+  benchmark_detection_2d_yolov8.cpp
+)
+
+include_directories(
+  include
+  ${OpenCV_INCLUDE_DIRS}
+)
+
+add_executable(benchmark_detection_2d_yolov8 ${source_file})
+
+target_link_libraries(benchmark_detection_2d_yolov8 PUBLIC
+  benchmark::benchmark
+  glog::glog
+  ${OpenCV_LIBS}
+  deploy_core
+  image_processing_utils
+  detection_2d_yolov8
+  benchmark_utils
+  ${platform_core_packages}
+)
+
+if(ENABLE_TENSORRT)
+  target_compile_definitions(benchmark_detection_2d_yolov8 PRIVATE ENABLE_TENSORRT)
+endif()
+
+if(ENABLE_RKNN)
+  target_compile_definitions(benchmark_detection_2d_yolov8 PRIVATE ENABLE_RKNN)
+endif()
+
+if(ENABLE_ORT)
+  target_compile_definitions(benchmark_detection_2d_yolov8 PRIVATE ENABLE_ORT)
+endif()
diff --git a/detection_2d/detection_2d_yolov8/benchmark/benchmark_detection_2d_yolov8.cpp b/detection_2d/detection_2d_yolov8/benchmark/benchmark_detection_2d_yolov8.cpp
@@ -0,0 +1,124 @@
+#include <gtest/gtest.h>
+
+#include "detection_2d_util/detection_2d_util.h"
+#include "detection_2d_yolov8/yolov8.h"
+#include "benchmark_utils/detection_2d_benchmark_utils.hpp"
+
+using namespace inference_core;
+using namespace detection_2d;
+using namespace benchmark_utils;
+
+#ifdef ENABLE_TENSORRT
+
+#include "trt_core/trt_core.h"
+
+std::shared_ptr<BaseDetectionModel> CreateYolov8TensorRTModel()
+{
+  std::string                    model_path        = "/workspace/models/yolov8n.engine";
+  const int                      input_height      = 640;
+  const int                      input_width       = 640;
+  const int                      input_channels    = 3;
+  const int                      cls_number        = 80;
+  const std::vector<std::string> input_blobs_name  = {"images"};
+  const std::vector<std::string> output_blobs_name = {"output0"};
+
+  auto infer_core  = CreateTrtInferCore(model_path);
+  auto preprocess  = CreateCudaDetPreProcess();
+  auto postprocess = CreateYolov8PostProcessCpuOrigin(input_height, input_width, cls_number);
+
+  auto yolov8_model =
+      CreateYolov8DetectionModel(infer_core, preprocess, postprocess, input_height, input_width,
+                                 input_channels, cls_number, input_blobs_name, output_blobs_name);
+  return yolov8_model;
+}
+
+static void benchmark_detection_2d_yolov8_tensorrt_sync(benchmark::State &state)
+{
+  benchmark_detection_2d_sync(state, CreateYolov8TensorRTModel());
+}
+static void benchmark_detection_2d_yolov8_tensorrt_async(benchmark::State &state)
+{
+  benchmark_detection_2d_async(state, CreateYolov8TensorRTModel());
+}
+BENCHMARK(benchmark_detection_2d_yolov8_tensorrt_sync)->Arg(1000)->UseRealTime();
+BENCHMARK(benchmark_detection_2d_yolov8_tensorrt_async)->Arg(1000)->UseRealTime();
+
+#endif
+
+#ifdef ENABLE_ORT
+
+#include "ort_core/ort_core.h"
+
+std::shared_ptr<BaseDetectionModel> CreateYolov8OnnxRuntimeModel()
+{
+  std::string                    model_path        = "/workspace/models/yolov8n.onnx";
+  const int                      input_height      = 640;
+  const int                      input_width       = 640;
+  const int                      input_channels    = 3;
+  const int                      cls_number        = 80;
+  const std::vector<std::string> input_blobs_name  = {"images"};
+  const std::vector<std::string> output_blobs_name = {"output0"};
+
+  auto infer_core  = CreateOrtInferCore(model_path);
+  auto preprocess  = CreateCpuDetPreProcess({0, 0, 0}, {255, 255, 255}, true, true);
+  auto postprocess = CreateYolov8PostProcessCpuOrigin(input_height, input_width, cls_number);
+
+  auto yolov8_model =
+      CreateYolov8DetectionModel(infer_core, preprocess, postprocess, input_height, input_width,
+                                 input_channels, cls_number, input_blobs_name, output_blobs_name);
+  return yolov8_model;
+}
+
+static void benchmark_detection_2d_yolov8_onnxruntime_sync(benchmark::State &state)
+{
+  benchmark_detection_2d_sync(state, CreateYolov8OnnxRuntimeModel());
+}
+static void benchmark_detection_2d_yolov8_onnxruntime_async(benchmark::State &state)
+{
+  benchmark_detection_2d_async(state, CreateYolov8OnnxRuntimeModel());
+}
+BENCHMARK(benchmark_detection_2d_yolov8_onnxruntime_sync)->Arg(200)->UseRealTime();
+BENCHMARK(benchmark_detection_2d_yolov8_onnxruntime_async)->Arg(200)->UseRealTime();
+
+#endif
+
+#ifdef ENABLE_RKNN
+
+#include "rknn_core/rknn_core.h"
+
+std::shared_ptr<BaseDetectionModel> CreateYolov8RknnModel()
+{
+  std::string                    model_path       = "/workspace/models/yolov8n_divide_opset11.rknn";
+  const int                      input_height     = 640;
+  const int                      input_width      = 640;
+  const int                      input_channels   = 3;
+  const int                      cls_number       = 80;
+  const std::vector<std::string> input_blobs_name = {"images"};
+  const std::vector<std::string> output_blobs_name = {"318", "onnx::ReduceSum_326", "331",
+                                                      "338", "onnx::ReduceSum_346", "350",
+                                                      "357", "onnx::ReduceSum_365", "369"};
+
+  auto infer_core  = CreateRknnInferCore(model_path, {{"images", RknnInputTensorType::RK_UINT8}});
+  auto preprocess  = CreateCpuDetPreProcess({0, 0, 0}, {1, 1, 1}, false, false);
+  auto postprocess = CreateYolov8PostProcessCpuDivide(input_height, input_width, cls_number);
+
+  auto yolov8_model =
+      CreateYolov8DetectionModel(infer_core, preprocess, postprocess, input_height, input_width,
+                                 input_channels, cls_number, input_blobs_name, output_blobs_name);
+  return yolov8_model;
+}
+
+static void benchmark_detection_2d_yolov8_rknn_sync(benchmark::State &state)
+{
+  benchmark_detection_2d_sync(state, CreateYolov8RknnModel());
+}
+static void benchmark_detection_2d_yolov8_rknn_async(benchmark::State &state)
+{
+  benchmark_detection_2d_async(state, CreateYolov8RknnModel());
+}
+BENCHMARK(benchmark_detection_2d_yolov8_rknn_sync)->Arg(500)->UseRealTime();
+BENCHMARK(benchmark_detection_2d_yolov8_rknn_async)->Arg(500)->UseRealTime();
+
+#endif
+
+BENCHMARK_MAIN();
diff --git a/detection_2d/detection_2d_yolov8/test/test_detection_2d_yolov8.cpp b/detection_2d/detection_2d_yolov8/test/test_detection_2d_yolov8.cpp
@@ -116,8 +116,8 @@ class Yolov8_Rknn_Fixture : public BaseYolov8Fixture {
     const int                      cls_number     = 80;
     const std::vector<std::string> input_blobs_name  = {"images"};
     const std::vector<std::string> output_blobs_name = {"318", "onnx::ReduceSum_326", "331",
-                                                      "338", "onnx::ReduceSum_346", "350",
-                                                      "357", "onnx::ReduceSum_365", "369"};
+                                                        "338", "onnx::ReduceSum_346", "350",
+                                                        "357", "onnx::ReduceSum_365", "369"};
 
     auto infer_core  = CreateRknnInferCore(model_path, {{"images", RknnInputTensorType::RK_UINT8}});
     auto preprocess  = CreateCpuDetPreProcess({0, 0, 0}, {1, 1, 1}, false, false);

diff --git a/easy_deploy_tool b/easy_deploy_tool
diff --git a/sam/sam_mobilesam/CMakeLists.txt b/sam/sam_mobilesam/CMakeLists.txt
@@ -33,3 +33,7 @@ target_include_directories(${PROJECT_NAME} PUBLIC ${PROJECT_SOURCE_DIR}/include)
 if (BUILD_TESTING)
   add_subdirectory(test)
 endif()
+
+if (BUILD_BENCHMARK)
+  add_subdirectory(benchmark)
+endif()
+4 −4		deploy_core/include/deploy_core/base_infer_core.h
+4 −4		deploy_core/include/deploy_core/block_queue.h
+2 −2		deploy_core/src/base_infer_core.cpp
+1 −0		deploy_utils/CMakeLists.txt
+32 −0		deploy_utils/benchmark_utils/CMakeLists.txt
+15 −0		deploy_utils/benchmark_utils/include/benchmark_utils/detection_2d_benchmark_utils.hpp
+13 −0		deploy_utils/benchmark_utils/include/benchmark_utils/sam_benchmark_utils.hpp
+47 −0		deploy_utils/benchmark_utils/src/detection_2d_benchmark_utils.cpp
+45 −0		deploy_utils/benchmark_utils/src/sam_benchmark_utils.cpp
+2 −1		docker/jetson_tensorrt_trt10_u2204.dockerfile
+2 −1		docker/jetson_tensorrt_trt8_u2004.dockerfile
+2 −1		docker/jetson_tensorrt_trt8_u2204.dockerfile
+2 −1		docker/nvidia_gpu_tensorrt_trt10_u2204.dockerfile
+2 −1		docker/nvidia_gpu_tensorrt_trt8_u2004.dockerfile
+2 −1		docker/nvidia_gpu_tensorrt_trt8_u2204.dockerfile
+2 −1		docker/rknn_230_u2204.dockerfile
+3 −3		inference_core/rknn_core/src/rknn_core.cpp