From ebf259a02201ac144789f40ebae417b736bdaa41 Mon Sep 17 00:00:00 2001 From: zz990099 <771647586@qq.com> Date: Tue, 3 Jun 2025 21:37:30 +0800 Subject: [PATCH 1/6] Add yolov8 benchmarks Signed-off-by: zz990099 <771647586@qq.com> --- .../detection_2d_yolov8/CMakeLists.txt | 4 + .../benchmark/CMakeLists.txt | 53 ++++++++ .../benchmark_detection_2d_yolov8.cpp | 124 ++++++++++++++++++ .../test/test_detection_2d_yolov8.cpp | 4 +- 4 files changed, 183 insertions(+), 2 deletions(-) create mode 100644 detection_2d/detection_2d_yolov8/benchmark/CMakeLists.txt create mode 100644 detection_2d/detection_2d_yolov8/benchmark/benchmark_detection_2d_yolov8.cpp diff --git a/detection_2d/detection_2d_yolov8/CMakeLists.txt b/detection_2d/detection_2d_yolov8/CMakeLists.txt index 512ec51..8d58d24 100644 --- a/detection_2d/detection_2d_yolov8/CMakeLists.txt +++ b/detection_2d/detection_2d_yolov8/CMakeLists.txt @@ -32,3 +32,7 @@ target_include_directories(${PROJECT_NAME} PUBLIC ${PROJECT_SOURCE_DIR}/include) if (BUILD_TESTING) add_subdirectory(test) endif() + +if (BUILD_BENCHMARK) + add_subdirectory(benchmark) +endif() diff --git a/detection_2d/detection_2d_yolov8/benchmark/CMakeLists.txt b/detection_2d/detection_2d_yolov8/benchmark/CMakeLists.txt new file mode 100644 index 0000000..d1df83d --- /dev/null +++ b/detection_2d/detection_2d_yolov8/benchmark/CMakeLists.txt @@ -0,0 +1,53 @@ +add_compile_options(-std=c++17) +add_compile_options(-O3 -Wextra -Wdeprecated -fPIC) +set(CMAKE_CXX_STANDARD 17) + +if(ENABLE_TENSORRT) + list(APPEND platform_core_packages trt_core) +endif() + +if(ENABLE_RKNN) + list(APPEND platform_core_packages rknn_core) +endif() + +if(ENABLE_ORT) + list(APPEND platform_core_packages ort_core) +endif() + +find_package(glog REQUIRED) +find_package(OpenCV REQUIRED) +find_package(benchmark REQUIRED) + +set(source_file + benchmark_detection_2d_yolov8.cpp +) + +include_directories( + include + ${OpenCV_INCLUDE_DIRS} +) + +add_executable(benchmark_detection_2d_yolov8 ${source_file}) + +target_link_libraries(benchmark_detection_2d_yolov8 PUBLIC + benchmark::benchmark + glog::glog + ${OpenCV_LIBS} + deploy_core + image_processing_utils + detection_2d_yolov8 + benchmark_utils + ${platform_core_packages} +) + +if(ENABLE_TENSORRT) + target_compile_definitions(benchmark_detection_2d_yolov8 PRIVATE ENABLE_TENSORRT) +endif() + +if(ENABLE_RKNN) + target_compile_definitions(benchmark_detection_2d_yolov8 PRIVATE ENABLE_RKNN) +endif() + +if(ENABLE_ORT) + target_compile_definitions(benchmark_detection_2d_yolov8 PRIVATE ENABLE_ORT) +endif() diff --git a/detection_2d/detection_2d_yolov8/benchmark/benchmark_detection_2d_yolov8.cpp b/detection_2d/detection_2d_yolov8/benchmark/benchmark_detection_2d_yolov8.cpp new file mode 100644 index 0000000..edca386 --- /dev/null +++ b/detection_2d/detection_2d_yolov8/benchmark/benchmark_detection_2d_yolov8.cpp @@ -0,0 +1,124 @@ +#include + +#include "detection_2d_util/detection_2d_util.h" +#include "detection_2d_yolov8/yolov8.h" +#include "benchmark_utils/detection_2d_benchmark_utils.hpp" + +using namespace inference_core; +using namespace detection_2d; +using namespace benchmark_utils; + +#ifdef ENABLE_TENSORRT + +#include "trt_core/trt_core.h" + +std::shared_ptr CreateYolov8TensorRTModel() +{ + std::string model_path = "/workspace/models/yolov8n.engine"; + const int input_height = 640; + const int input_width = 640; + const int input_channels = 3; + const int cls_number = 80; + const std::vector input_blobs_name = {"images"}; + const std::vector output_blobs_name = {"output0"}; + + auto infer_core = CreateTrtInferCore(model_path); + auto preprocess = CreateCudaDetPreProcess(); + auto postprocess = CreateYolov8PostProcessCpuOrigin(input_height, input_width, cls_number); + + auto yolov8_model = + CreateYolov8DetectionModel(infer_core, preprocess, postprocess, input_height, input_width, + input_channels, cls_number, input_blobs_name, output_blobs_name); + return yolov8_model; +} + +static void benchmark_detection_2d_yolov8_tensorrt_sync(benchmark::State &state) +{ + benchmark_detection_2d_sync(state, CreateYolov8TensorRTModel()); +} +static void benchmark_detection_2d_yolov8_tensorrt_async(benchmark::State &state) +{ + benchmark_detection_2d_async(state, CreateYolov8TensorRTModel()); +} +BENCHMARK(benchmark_detection_2d_yolov8_tensorrt_sync)->Arg(1000)->UseRealTime(); +BENCHMARK(benchmark_detection_2d_yolov8_tensorrt_async)->Arg(1000)->UseRealTime(); + +#endif + +#ifdef ENABLE_ORT + +#include "ort_core/ort_core.h" + +std::shared_ptr CreateYolov8OnnxRuntimeModel() +{ + std::string model_path = "/workspace/models/yolov8n.onnx"; + const int input_height = 640; + const int input_width = 640; + const int input_channels = 3; + const int cls_number = 80; + const std::vector input_blobs_name = {"images"}; + const std::vector output_blobs_name = {"output0"}; + + auto infer_core = CreateOrtInferCore(model_path); + auto preprocess = CreateCpuDetPreProcess({0, 0, 0}, {255, 255, 255}, true, true); + auto postprocess = CreateYolov8PostProcessCpuOrigin(input_height, input_width, cls_number); + + auto yolov8_model = + CreateYolov8DetectionModel(infer_core, preprocess, postprocess, input_height, input_width, + input_channels, cls_number, input_blobs_name, output_blobs_name); + return yolov8_model; +} + +static void benchmark_detection_2d_yolov8_onnxruntime_sync(benchmark::State &state) +{ + benchmark_detection_2d_sync(state, CreateYolov8OnnxRuntimeModel()); +} +static void benchmark_detection_2d_yolov8_onnxruntime_async(benchmark::State &state) +{ + benchmark_detection_2d_async(state, CreateYolov8OnnxRuntimeModel()); +} +BENCHMARK(benchmark_detection_2d_yolov8_onnxruntime_sync)->Arg(200)->UseRealTime(); +BENCHMARK(benchmark_detection_2d_yolov8_onnxruntime_async)->Arg(200)->UseRealTime(); + +#endif + +#ifdef ENABLE_RKNN + +#include "rknn_core/rknn_core.h" + +std::shared_ptr CreateYolov8RknnModel() +{ + std::string model_path = "/workspace/models/yolov8n_divide_opset11.rknn"; + const int input_height = 640; + const int input_width = 640; + const int input_channels = 3; + const int cls_number = 80; + const std::vector input_blobs_name = {"images"}; + const std::vector output_blobs_name = {"318", "onnx::ReduceSum_326", "331", + "338", "onnx::ReduceSum_346", "350", + "357", "onnx::ReduceSum_365", "369"}; + + auto infer_core = CreateRknnInferCore(model_path, {{"images", RknnInputTensorType::RK_UINT8}}); + auto preprocess = CreateCpuDetPreProcess({0, 0, 0}, {1, 1, 1}, false, false); + auto postprocess = CreateYolov8PostProcessCpuDivide(input_height, input_width, cls_number); + + auto yolov8_model = + CreateYolov8DetectionModel(infer_core, preprocess, postprocess, input_height, input_width, + input_channels, cls_number, input_blobs_name, output_blobs_name); + return yolov8_model; +} + +static void benchmark_detection_2d_yolov8_rknn_sync(benchmark::State &state) +{ + benchmark_detection_2d_sync(state, CreateYolov8RknnModel()); +} +static void benchmark_detection_2d_yolov8_rknn_async(benchmark::State &state) +{ + benchmark_detection_2d_async(state, CreateYolov8RknnModel()); +} +BENCHMARK(benchmark_detection_2d_yolov8_rknn_sync)->Arg(500)->UseRealTime(); +BENCHMARK(benchmark_detection_2d_yolov8_rknn_async)->Arg(500)->UseRealTime(); + +#endif + +BENCHMARK_MAIN(); diff --git a/detection_2d/detection_2d_yolov8/test/test_detection_2d_yolov8.cpp b/detection_2d/detection_2d_yolov8/test/test_detection_2d_yolov8.cpp index 66e9369..af37d6e 100644 --- a/detection_2d/detection_2d_yolov8/test/test_detection_2d_yolov8.cpp +++ b/detection_2d/detection_2d_yolov8/test/test_detection_2d_yolov8.cpp @@ -116,8 +116,8 @@ class Yolov8_Rknn_Fixture : public BaseYolov8Fixture { const int cls_number = 80; const std::vector input_blobs_name = {"images"}; const std::vector output_blobs_name = {"318", "onnx::ReduceSum_326", "331", - "338", "onnx::ReduceSum_346", "350", - "357", "onnx::ReduceSum_365", "369"}; + "338", "onnx::ReduceSum_346", "350", + "357", "onnx::ReduceSum_365", "369"}; auto infer_core = CreateRknnInferCore(model_path, {{"images", RknnInputTensorType::RK_UINT8}}); auto preprocess = CreateCpuDetPreProcess({0, 0, 0}, {1, 1, 1}, false, false); From dc835b626d76e2b8eaf14bc0dbd4ae23c49c87e1 Mon Sep 17 00:00:00 2001 From: zz990099 <771647586@qq.com> Date: Tue, 3 Jun 2025 22:16:22 +0800 Subject: [PATCH 2/6] Add rt_detr benchmarks Signed-off-by: zz990099 <771647586@qq.com> --- .../detection_2d_rt_detr/CMakeLists.txt | 7 +- .../benchmark/CMakeLists.txt | 53 ++++++++++++ .../benchmark_detection_2d_rt_detr.cpp | 83 +++++++++++++++++++ 3 files changed, 140 insertions(+), 3 deletions(-) create mode 100644 detection_2d/detection_2d_rt_detr/benchmark/CMakeLists.txt create mode 100644 detection_2d/detection_2d_rt_detr/benchmark/benchmark_detection_2d_rt_detr.cpp diff --git a/detection_2d/detection_2d_rt_detr/CMakeLists.txt b/detection_2d/detection_2d_rt_detr/CMakeLists.txt index 10cc562..142c8e9 100644 --- a/detection_2d/detection_2d_rt_detr/CMakeLists.txt +++ b/detection_2d/detection_2d_rt_detr/CMakeLists.txt @@ -5,12 +5,9 @@ add_compile_options(-std=c++17) add_compile_options(-O3 -Wextra -Wdeprecated -fPIC) set(CMAKE_CXX_STANDARD 17) - find_package(OpenCV REQUIRED) find_package(glog REQUIRED) - - include_directories( include ${OpenCV_INCLUDE_DIRS} @@ -35,3 +32,7 @@ target_include_directories(${PROJECT_NAME} PUBLIC ${PROJECT_SOURCE_DIR}/include) if (BUILD_TESTING) add_subdirectory(test) endif() + +if (BUILD_BENCHMARK) + add_subdirectory(benchmark) +endif() diff --git a/detection_2d/detection_2d_rt_detr/benchmark/CMakeLists.txt b/detection_2d/detection_2d_rt_detr/benchmark/CMakeLists.txt new file mode 100644 index 0000000..6d47fb3 --- /dev/null +++ b/detection_2d/detection_2d_rt_detr/benchmark/CMakeLists.txt @@ -0,0 +1,53 @@ +add_compile_options(-std=c++17) +add_compile_options(-O3 -Wextra -Wdeprecated -fPIC) +set(CMAKE_CXX_STANDARD 17) + +if(ENABLE_TENSORRT) + list(APPEND platform_core_packages trt_core) +endif() + +if(ENABLE_RKNN) + list(APPEND platform_core_packages rknn_core) +endif() + +if(ENABLE_ORT) + list(APPEND platform_core_packages ort_core) +endif() + +find_package(glog REQUIRED) +find_package(OpenCV REQUIRED) +find_package(benchmark REQUIRED) + +set(source_file + benchmark_detection_2d_rt_detr.cpp +) + +include_directories( + include + ${OpenCV_INCLUDE_DIRS} +) + +add_executable(benchmark_detection_2d_rt_detr ${source_file}) + +target_link_libraries(benchmark_detection_2d_rt_detr PUBLIC + benchmark::benchmark + glog::glog + ${OpenCV_LIBS} + deploy_core + image_processing_utils + detection_2d_rt_detr + benchmark_utils + ${platform_core_packages} +) + +if(ENABLE_TENSORRT) + target_compile_definitions(benchmark_detection_2d_rt_detr PRIVATE ENABLE_TENSORRT) +endif() + +if(ENABLE_RKNN) + target_compile_definitions(benchmark_detection_2d_rt_detr PRIVATE ENABLE_RKNN) +endif() + +if(ENABLE_ORT) + target_compile_definitions(benchmark_detection_2d_rt_detr PRIVATE ENABLE_ORT) +endif() diff --git a/detection_2d/detection_2d_rt_detr/benchmark/benchmark_detection_2d_rt_detr.cpp b/detection_2d/detection_2d_rt_detr/benchmark/benchmark_detection_2d_rt_detr.cpp new file mode 100644 index 0000000..6a545d1 --- /dev/null +++ b/detection_2d/detection_2d_rt_detr/benchmark/benchmark_detection_2d_rt_detr.cpp @@ -0,0 +1,83 @@ +#include + +#include "detection_2d_util/detection_2d_util.h" +#include "detection_2d_rt_detr/rt_detr.h" +#include "benchmark_utils/detection_2d_benchmark_utils.hpp" + +using namespace inference_core; +using namespace detection_2d; +using namespace benchmark_utils; + +#ifdef ENABLE_TENSORRT + +#include "trt_core/trt_core.h" + +std::shared_ptr CreateRTDetrTensorRTModel() +{ + std::string model_path = "/workspace/models/rt_detr_v2_single_input.engine"; + const int input_height = 640; + const int input_width = 640; + const int input_channels = 3; + const int cls_number = 80; + const std::vector input_blobs_name = {"images"}; + const std::vector output_blobs_name = {"labels", "boxes", "scores"}; + + auto infer_core = CreateTrtInferCore(model_path); + auto preprocess = CreateCudaDetPreProcess(); + + auto rt_detr_model = + CreateRTDetrDetectionModel(infer_core, preprocess, input_height, input_width, input_channels, + cls_number, input_blobs_name, output_blobs_name); + return rt_detr_model; +} + +static void benchmark_detection_2d_rt_detr_tensorrt_sync(benchmark::State &state) +{ + benchmark_detection_2d_sync(state, CreateRTDetrTensorRTModel()); +} +static void benchmark_detection_2d_rt_detr_tensorrt_async(benchmark::State &state) +{ + benchmark_detection_2d_async(state, CreateRTDetrTensorRTModel()); +} +BENCHMARK(benchmark_detection_2d_rt_detr_tensorrt_sync)->Arg(500)->UseRealTime(); +BENCHMARK(benchmark_detection_2d_rt_detr_tensorrt_async)->Arg(500)->UseRealTime(); + +#endif + +#ifdef ENABLE_ORT + +#include "ort_core/ort_core.h" + +std::shared_ptr CreateRTDetrOnnxRuntimeModel() +{ + std::string model_path = "/workspace/models/rt_detr_v2_single_input.onnx"; + const int input_height = 640; + const int input_width = 640; + const int input_channels = 3; + const int cls_number = 80; + const std::vector input_blobs_name = {"images"}; + const std::vector output_blobs_name = {"labels", "boxes", "scores"}; + + auto infer_core = CreateOrtInferCore(model_path); + auto preprocess = CreateCpuDetPreProcess({0, 0, 0}, {255, 255, 255}, true, true); + + auto rt_detr_model = + CreateRTDetrDetectionModel(infer_core, preprocess, input_height, input_width, input_channels, + cls_number, input_blobs_name, output_blobs_name); + return rt_detr_model; +} + +static void benchmark_detection_2d_rt_detr_onnxruntime_sync(benchmark::State &state) +{ + benchmark_detection_2d_sync(state, CreateRTDetrOnnxRuntimeModel()); +} +static void benchmark_detection_2d_rt_detr_onnxruntime_async(benchmark::State &state) +{ + benchmark_detection_2d_async(state, CreateRTDetrOnnxRuntimeModel()); +} +BENCHMARK(benchmark_detection_2d_rt_detr_onnxruntime_sync)->Arg(100)->UseRealTime(); +BENCHMARK(benchmark_detection_2d_rt_detr_onnxruntime_async)->Arg(100)->UseRealTime(); + +#endif + +BENCHMARK_MAIN(); From 9d9fcd9c079b0410190e6f637f0d78584121df65 Mon Sep 17 00:00:00 2001 From: zz990099 <771647586@qq.com> Date: Tue, 3 Jun 2025 22:16:46 +0800 Subject: [PATCH 3/6] Add sam benchmarks Signed-off-by: zz990099 <771647586@qq.com> --- sam/sam_mobilesam/CMakeLists.txt | 4 + sam/sam_mobilesam/benchmark/CMakeLists.txt | 53 +++++ .../benchmark/benchmark_sam_mobilesam.cpp | 194 ++++++++++++++++++ 3 files changed, 251 insertions(+) create mode 100644 sam/sam_mobilesam/benchmark/CMakeLists.txt create mode 100644 sam/sam_mobilesam/benchmark/benchmark_sam_mobilesam.cpp diff --git a/sam/sam_mobilesam/CMakeLists.txt b/sam/sam_mobilesam/CMakeLists.txt index 66fdf30..ac8dfe4 100644 --- a/sam/sam_mobilesam/CMakeLists.txt +++ b/sam/sam_mobilesam/CMakeLists.txt @@ -33,3 +33,7 @@ target_include_directories(${PROJECT_NAME} PUBLIC ${PROJECT_SOURCE_DIR}/include) if (BUILD_TESTING) add_subdirectory(test) endif() + +if (BUILD_BENCHMARK) + add_subdirectory(benchmark) +endif() diff --git a/sam/sam_mobilesam/benchmark/CMakeLists.txt b/sam/sam_mobilesam/benchmark/CMakeLists.txt new file mode 100644 index 0000000..7baea5a --- /dev/null +++ b/sam/sam_mobilesam/benchmark/CMakeLists.txt @@ -0,0 +1,53 @@ +add_compile_options(-std=c++17) +add_compile_options(-O3 -Wextra -Wdeprecated -fPIC) +set(CMAKE_CXX_STANDARD 17) + +if(ENABLE_TENSORRT) + list(APPEND platform_core_packages trt_core) +endif() + +if(ENABLE_RKNN) + list(APPEND platform_core_packages rknn_core) +endif() + +if(ENABLE_ORT) + list(APPEND platform_core_packages ort_core) +endif() + +find_package(glog REQUIRED) +find_package(OpenCV REQUIRED) +find_package(benchmark REQUIRED) + +set(source_file + benchmark_sam_mobilesam.cpp +) + +include_directories( + include + ${OpenCV_INCLUDE_DIRS} +) + +add_executable(benchmark_sam_mobilesam ${source_file}) + +target_link_libraries(benchmark_sam_mobilesam PUBLIC + benchmark::benchmark + glog::glog + ${OpenCV_LIBS} + deploy_core + image_processing_utils + sam_mobilesam + benchmark_utils + ${platform_core_packages} +) + +if(ENABLE_TENSORRT) + target_compile_definitions(benchmark_sam_mobilesam PRIVATE ENABLE_TENSORRT) +endif() + +if(ENABLE_RKNN) + target_compile_definitions(benchmark_sam_mobilesam PRIVATE ENABLE_RKNN) +endif() + +if(ENABLE_ORT) + target_compile_definitions(benchmark_sam_mobilesam PRIVATE ENABLE_ORT) +endif() diff --git a/sam/sam_mobilesam/benchmark/benchmark_sam_mobilesam.cpp b/sam/sam_mobilesam/benchmark/benchmark_sam_mobilesam.cpp new file mode 100644 index 0000000..2cc6643 --- /dev/null +++ b/sam/sam_mobilesam/benchmark/benchmark_sam_mobilesam.cpp @@ -0,0 +1,194 @@ +#include + +#include "detection_2d_util/detection_2d_util.h" +#include "sam_mobilesam/mobilesam.h" +#include "benchmark_utils/sam_benchmark_utils.hpp" + +using namespace inference_core; +using namespace detection_2d; +using namespace sam; +using namespace benchmark_utils; + +#ifdef ENABLE_TENSORRT + +#include "trt_core/trt_core.h" + +std::shared_ptr CreateSAMTensorRTModel(const std::string &image_encoder_model_path) +{ + auto box_decoder_model_path = "/workspace/models/modified_mobile_sam_box.engine"; + auto point_decoder_model_path = "/workspace/models/modified_mobile_sam_point.engine"; + + auto image_encoder = CreateTrtInferCore(image_encoder_model_path); + + const int SAM_MAX_BOX = 1; + const int SAM_MAX_POINTS = 8; + + auto box_decoder_factory = + CreateTrtInferCoreFactory(box_decoder_model_path, + { + {"image_embeddings", {1, 256, 64, 64}}, + {"boxes", {1, SAM_MAX_BOX, 4}}, + {"mask_input", {1, 1, 256, 256}}, + {"has_mask_input", {1}}, + }, + {{"masks", {1, 1, 256, 256}}, {"scores", {1, 1}}}); + + auto point_decoder_factory = + CreateTrtInferCoreFactory(point_decoder_model_path, + { + {"image_embeddings", {1, 256, 64, 64}}, + {"point_coords", {1, SAM_MAX_POINTS, 2}}, + {"point_labels", {1, SAM_MAX_POINTS}}, + {"mask_input", {1, 1, 256, 256}}, + {"has_mask_input", {1}}, + }, + {{"masks", {1, 1, 256, 256}}, {"scores", {1, 1}}}); + + auto image_preprocess_factory = CreateCudaDetPreProcessFactory(); + + return CreateMobileSamModel(image_encoder, point_decoder_factory->Create(), + box_decoder_factory->Create(), image_preprocess_factory->Create()); +} + +// benchmark sam_mobilesam +static void benchmark_sam_mobilesam_tensorrt_sync(benchmark::State &state) +{ + auto mobilesam_image_encoder_model_path = "/workspace/models/mobile_sam_encoder.engine"; + benchmark_sam_sync(state, CreateSAMTensorRTModel(mobilesam_image_encoder_model_path)); +} +static void benchmark_sam_mobilesam_tensorrt_async(benchmark::State &state) +{ + auto mobilesam_image_encoder_model_path = "/workspace/models/mobile_sam_encoder.engine"; + benchmark_sam_async(state, CreateSAMTensorRTModel(mobilesam_image_encoder_model_path)); +} +BENCHMARK(benchmark_sam_mobilesam_tensorrt_sync)->Arg(100)->UseRealTime(); +BENCHMARK(benchmark_sam_mobilesam_tensorrt_async)->Arg(100)->UseRealTime(); + +// benchmark sam_nanosam +static void benchmark_sam_nanosam_tensorrt_sync(benchmark::State &state) +{ + auto nanosam_image_encoder_model_path = "/workspace/models/nanosam_image_encoder_opset11.engine"; + benchmark_sam_sync(state, CreateSAMTensorRTModel(nanosam_image_encoder_model_path)); +} +static void benchmark_sam_nanosam_tensorrt_async(benchmark::State &state) +{ + auto nanosam_image_encoder_model_path = "/workspace/models/nanosam_image_encoder_opset11.engine"; + benchmark_sam_async(state, CreateSAMTensorRTModel(nanosam_image_encoder_model_path)); +} +BENCHMARK(benchmark_sam_nanosam_tensorrt_sync)->Arg(200)->UseRealTime(); +BENCHMARK(benchmark_sam_nanosam_tensorrt_async)->Arg(200)->UseRealTime(); + +#endif + +#ifdef ENABLE_ORT + +#include "ort_core/ort_core.h" + +std::shared_ptr CreateSAMOnnxRuntimeModel(const std::string &image_encoder_model_path) +{ + auto box_decoder_model_path = "/workspace/models/modified_mobile_sam_box.onnx"; + auto point_decoder_model_path = "/workspace/models/modified_mobile_sam_point.onnx"; + + auto image_encoder = CreateOrtInferCore(image_encoder_model_path); + + const int SAM_MAX_BOX = 1; + const int SAM_MAX_POINTS = 8; + + auto box_decoder_factory = + CreateOrtInferCoreFactory(box_decoder_model_path, + { + {"image_embeddings", {1, 256, 64, 64}}, + {"boxes", {1, SAM_MAX_BOX, 4}}, + {"mask_input", {1, 1, 256, 256}}, + {"has_mask_input", {1}}, + }, + {{"masks", {1, 1, 256, 256}}, {"scores", {1, 1}}}); + + auto point_decoder_factory = + CreateOrtInferCoreFactory(point_decoder_model_path, + { + {"image_embeddings", {1, 256, 64, 64}}, + {"point_coords", {1, SAM_MAX_POINTS, 2}}, + {"point_labels", {1, SAM_MAX_POINTS}}, + {"mask_input", {1, 1, 256, 256}}, + {"has_mask_input", {1}}, + }, + {{"masks", {1, 1, 256, 256}}, {"scores", {1, 1}}}); + + auto image_preprocess_factory = + CreateCpuDetPreProcessFactory({0, 0, 0}, {255, 255, 255}, true, true); + + return CreateMobileSamModel(image_encoder, point_decoder_factory->Create(), + box_decoder_factory->Create(), image_preprocess_factory->Create()); +} + +// benchmark sam_mobilesam +static void benchmark_sam_mobilesam_onnxruntime_sync(benchmark::State &state) +{ + auto mobilesam_image_encoder_model_path = "/workspace/models/mobile_sam_encoder.onnx"; + benchmark_sam_sync(state, CreateSAMOnnxRuntimeModel(mobilesam_image_encoder_model_path)); +} +static void benchmark_sam_mobilesam_onnxruntime_async(benchmark::State &state) +{ + auto mobilesam_image_encoder_model_path = "/workspace/models/mobile_sam_encoder.onnx"; + benchmark_sam_async(state, CreateSAMOnnxRuntimeModel(mobilesam_image_encoder_model_path)); +} +BENCHMARK(benchmark_sam_mobilesam_onnxruntime_sync)->Arg(20)->UseRealTime(); +BENCHMARK(benchmark_sam_mobilesam_onnxruntime_async)->Arg(20)->UseRealTime(); + +// benchmark sam_nanosam +static void benchmark_sam_nanosam_onnxruntime_sync(benchmark::State &state) +{ + auto nanosam_image_encoder_model_path = "/workspace/models/nanosam_image_encoder_opset11.onnx"; + benchmark_sam_sync(state, CreateSAMOnnxRuntimeModel(nanosam_image_encoder_model_path)); +} +static void benchmark_sam_nanosam_onnxruntime_async(benchmark::State &state) +{ + auto nanosam_image_encoder_model_path = "/workspace/models/nanosam_image_encoder_opset11.onnx"; + benchmark_sam_async(state, CreateSAMOnnxRuntimeModel(nanosam_image_encoder_model_path)); +} +BENCHMARK(benchmark_sam_nanosam_onnxruntime_sync)->Arg(50)->UseRealTime(); +BENCHMARK(benchmark_sam_nanosam_onnxruntime_async)->Arg(50)->UseRealTime(); + +#endif + +#ifdef ENABLE_RKNN + +#include "rknn_core/rknn_core.h" + +std::shared_ptr CreateSAMRknnModel(const std::string &image_encoder_model_path) +{ + auto box_decoder_model_path = "/workspace/models/modified_mobile_sam_box.rknn"; + auto point_decoder_model_path = "/workspace/models/modified_mobile_sam_point.rknn"; + + auto nanosam_image_encoder = CreateRknnInferCore( + nanosam_image_encoder_model_path, {{"images", RknnInputTensorType::RK_UINT8}}, 5, 2); + + auto box_decoder_factory = CreateRknnInferCoreFactory(box_decoder_model_path, {}, 5, 2); + + auto point_decoder_factory = CreateRknnInferCoreFactory(point_decoder_model_path, {}, 5, 2); + + auto image_preprocess_factory = + CreateCpuDetPreProcessFactory({0, 0, 0}, {255, 255, 255}, false, false); + + return CreateMobileSamModel(nanosam_image_encoder, point_decoder_factory->Create(), + box_decoder_factory->Create(), image_preprocess_factory->Create()); +} + +// benchmark sam_nanosam +static void benchmark_sam_nanosam_rknn_sync(benchmark::State &state) +{ + auto nanosam_image_encoder_model_path = "/workspace/models/nanosam_image_encoder_opset11.rknn"; + benchmark_sam_sync(state, CreateSAMRknnModel(nanosam_image_encoder_model_path)); +} +static void benchmark_sam_nanosam_rknn_async(benchmark::State &state) +{ + auto nanosam_image_encoder_model_path = "/workspace/models/nanosam_image_encoder_opset11.rknn"; + benchmark_sam_async(state, CreateSAMRknnModel(nanosam_image_encoder_model_path)); +} +BENCHMARK(benchmark_sam_nanosam_rknn_sync)->Arg(100)->UseRealTime(); +BENCHMARK(benchmark_sam_nanosam_rknn_async)->Arg(100)->UseRealTime(); + +#endif + +BENCHMARK_MAIN(); From d3cb83ad59973e5b70eb1d366e1a18fc0eba405e Mon Sep 17 00:00:00 2001 From: zz990099 <771647586@qq.com> Date: Wed, 4 Jun 2025 21:26:06 +0800 Subject: [PATCH 4/6] Update easy_deploy_tool submodule Signed-off-by: zz990099 <771647586@qq.com> --- easy_deploy_tool | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/easy_deploy_tool b/easy_deploy_tool index 4a01290..a8dad8d 160000 --- a/easy_deploy_tool +++ b/easy_deploy_tool @@ -1 +1 @@ -Subproject commit 4a012904f39be0c35f0da9921e5c761f3ef1e2bb +Subproject commit a8dad8d3a09fa0e136ae670f7a7c3f82468360d1 From 26da144a56f9c9abe5d01f8b46f80ffd142106f7 Mon Sep 17 00:00:00 2001 From: zz990099 <771647586@qq.com> Date: Wed, 4 Jun 2025 22:10:42 +0800 Subject: [PATCH 5/6] fix mobilesam benchmarks issue Signed-off-by: zz990099 <771647586@qq.com> --- sam/sam_mobilesam/benchmark/benchmark_sam_mobilesam.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sam/sam_mobilesam/benchmark/benchmark_sam_mobilesam.cpp b/sam/sam_mobilesam/benchmark/benchmark_sam_mobilesam.cpp index 2cc6643..fbfa1a5 100644 --- a/sam/sam_mobilesam/benchmark/benchmark_sam_mobilesam.cpp +++ b/sam/sam_mobilesam/benchmark/benchmark_sam_mobilesam.cpp @@ -162,7 +162,7 @@ std::shared_ptr CreateSAMRknnModel(const std::string &image_encode auto point_decoder_model_path = "/workspace/models/modified_mobile_sam_point.rknn"; auto nanosam_image_encoder = CreateRknnInferCore( - nanosam_image_encoder_model_path, {{"images", RknnInputTensorType::RK_UINT8}}, 5, 2); + image_encoder_model_path, {{"images", RknnInputTensorType::RK_UINT8}}, 5, 2); auto box_decoder_factory = CreateRknnInferCoreFactory(box_decoder_model_path, {}, 5, 2); @@ -186,7 +186,7 @@ static void benchmark_sam_nanosam_rknn_async(benchmark::State &state) auto nanosam_image_encoder_model_path = "/workspace/models/nanosam_image_encoder_opset11.rknn"; benchmark_sam_async(state, CreateSAMRknnModel(nanosam_image_encoder_model_path)); } -BENCHMARK(benchmark_sam_nanosam_rknn_sync)->Arg(100)->UseRealTime(); +BENCHMARK(benchmark_sam_nanosam_rknn_sync)->Arg(50)->UseRealTime(); BENCHMARK(benchmark_sam_nanosam_rknn_async)->Arg(100)->UseRealTime(); #endif From c024dec4b0daf7cbef3d8f4df83362478edf909e Mon Sep 17 00:00:00 2001 From: zz990099 <771647586@qq.com> Date: Wed, 4 Jun 2025 22:10:51 +0800 Subject: [PATCH 6/6] Update submodule Signed-off-by: zz990099 <771647586@qq.com> --- easy_deploy_tool | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/easy_deploy_tool b/easy_deploy_tool index a8dad8d..6c254c6 160000 --- a/easy_deploy_tool +++ b/easy_deploy_tool @@ -1 +1 @@ -Subproject commit a8dad8d3a09fa0e136ae670f7a7c3f82468360d1 +Subproject commit 6c254c6d53e429513d46924f96fb1e543364497f