diff --git a/detection_2d/detection_2d_rt_detr/CMakeLists.txt b/detection_2d/detection_2d_rt_detr/CMakeLists.txt index 10cc562..142c8e9 100644 --- a/detection_2d/detection_2d_rt_detr/CMakeLists.txt +++ b/detection_2d/detection_2d_rt_detr/CMakeLists.txt @@ -5,12 +5,9 @@ add_compile_options(-std=c++17) add_compile_options(-O3 -Wextra -Wdeprecated -fPIC) set(CMAKE_CXX_STANDARD 17) - find_package(OpenCV REQUIRED) find_package(glog REQUIRED) - - include_directories( include ${OpenCV_INCLUDE_DIRS} @@ -35,3 +32,7 @@ target_include_directories(${PROJECT_NAME} PUBLIC ${PROJECT_SOURCE_DIR}/include) if (BUILD_TESTING) add_subdirectory(test) endif() + +if (BUILD_BENCHMARK) + add_subdirectory(benchmark) +endif() diff --git a/detection_2d/detection_2d_rt_detr/benchmark/CMakeLists.txt b/detection_2d/detection_2d_rt_detr/benchmark/CMakeLists.txt new file mode 100644 index 0000000..6d47fb3 --- /dev/null +++ b/detection_2d/detection_2d_rt_detr/benchmark/CMakeLists.txt @@ -0,0 +1,53 @@ +add_compile_options(-std=c++17) +add_compile_options(-O3 -Wextra -Wdeprecated -fPIC) +set(CMAKE_CXX_STANDARD 17) + +if(ENABLE_TENSORRT) + list(APPEND platform_core_packages trt_core) +endif() + +if(ENABLE_RKNN) + list(APPEND platform_core_packages rknn_core) +endif() + +if(ENABLE_ORT) + list(APPEND platform_core_packages ort_core) +endif() + +find_package(glog REQUIRED) +find_package(OpenCV REQUIRED) +find_package(benchmark REQUIRED) + +set(source_file + benchmark_detection_2d_rt_detr.cpp +) + +include_directories( + include + ${OpenCV_INCLUDE_DIRS} +) + +add_executable(benchmark_detection_2d_rt_detr ${source_file}) + +target_link_libraries(benchmark_detection_2d_rt_detr PUBLIC + benchmark::benchmark + glog::glog + ${OpenCV_LIBS} + deploy_core + image_processing_utils + detection_2d_rt_detr + benchmark_utils + ${platform_core_packages} +) + +if(ENABLE_TENSORRT) + target_compile_definitions(benchmark_detection_2d_rt_detr PRIVATE ENABLE_TENSORRT) +endif() + +if(ENABLE_RKNN) + target_compile_definitions(benchmark_detection_2d_rt_detr PRIVATE ENABLE_RKNN) +endif() + +if(ENABLE_ORT) + target_compile_definitions(benchmark_detection_2d_rt_detr PRIVATE ENABLE_ORT) +endif() diff --git a/detection_2d/detection_2d_rt_detr/benchmark/benchmark_detection_2d_rt_detr.cpp b/detection_2d/detection_2d_rt_detr/benchmark/benchmark_detection_2d_rt_detr.cpp new file mode 100644 index 0000000..6a545d1 --- /dev/null +++ b/detection_2d/detection_2d_rt_detr/benchmark/benchmark_detection_2d_rt_detr.cpp @@ -0,0 +1,83 @@ +#include + +#include "detection_2d_util/detection_2d_util.h" +#include "detection_2d_rt_detr/rt_detr.h" +#include "benchmark_utils/detection_2d_benchmark_utils.hpp" + +using namespace inference_core; +using namespace detection_2d; +using namespace benchmark_utils; + +#ifdef ENABLE_TENSORRT + +#include "trt_core/trt_core.h" + +std::shared_ptr CreateRTDetrTensorRTModel() +{ + std::string model_path = "/workspace/models/rt_detr_v2_single_input.engine"; + const int input_height = 640; + const int input_width = 640; + const int input_channels = 3; + const int cls_number = 80; + const std::vector input_blobs_name = {"images"}; + const std::vector output_blobs_name = {"labels", "boxes", "scores"}; + + auto infer_core = CreateTrtInferCore(model_path); + auto preprocess = CreateCudaDetPreProcess(); + + auto rt_detr_model = + CreateRTDetrDetectionModel(infer_core, preprocess, input_height, input_width, input_channels, + cls_number, input_blobs_name, output_blobs_name); + return rt_detr_model; +} + +static void benchmark_detection_2d_rt_detr_tensorrt_sync(benchmark::State &state) +{ + benchmark_detection_2d_sync(state, CreateRTDetrTensorRTModel()); +} +static void benchmark_detection_2d_rt_detr_tensorrt_async(benchmark::State &state) +{ + benchmark_detection_2d_async(state, CreateRTDetrTensorRTModel()); +} +BENCHMARK(benchmark_detection_2d_rt_detr_tensorrt_sync)->Arg(500)->UseRealTime(); +BENCHMARK(benchmark_detection_2d_rt_detr_tensorrt_async)->Arg(500)->UseRealTime(); + +#endif + +#ifdef ENABLE_ORT + +#include "ort_core/ort_core.h" + +std::shared_ptr CreateRTDetrOnnxRuntimeModel() +{ + std::string model_path = "/workspace/models/rt_detr_v2_single_input.onnx"; + const int input_height = 640; + const int input_width = 640; + const int input_channels = 3; + const int cls_number = 80; + const std::vector input_blobs_name = {"images"}; + const std::vector output_blobs_name = {"labels", "boxes", "scores"}; + + auto infer_core = CreateOrtInferCore(model_path); + auto preprocess = CreateCpuDetPreProcess({0, 0, 0}, {255, 255, 255}, true, true); + + auto rt_detr_model = + CreateRTDetrDetectionModel(infer_core, preprocess, input_height, input_width, input_channels, + cls_number, input_blobs_name, output_blobs_name); + return rt_detr_model; +} + +static void benchmark_detection_2d_rt_detr_onnxruntime_sync(benchmark::State &state) +{ + benchmark_detection_2d_sync(state, CreateRTDetrOnnxRuntimeModel()); +} +static void benchmark_detection_2d_rt_detr_onnxruntime_async(benchmark::State &state) +{ + benchmark_detection_2d_async(state, CreateRTDetrOnnxRuntimeModel()); +} +BENCHMARK(benchmark_detection_2d_rt_detr_onnxruntime_sync)->Arg(100)->UseRealTime(); +BENCHMARK(benchmark_detection_2d_rt_detr_onnxruntime_async)->Arg(100)->UseRealTime(); + +#endif + +BENCHMARK_MAIN(); diff --git a/detection_2d/detection_2d_yolov8/CMakeLists.txt b/detection_2d/detection_2d_yolov8/CMakeLists.txt index 512ec51..8d58d24 100644 --- a/detection_2d/detection_2d_yolov8/CMakeLists.txt +++ b/detection_2d/detection_2d_yolov8/CMakeLists.txt @@ -32,3 +32,7 @@ target_include_directories(${PROJECT_NAME} PUBLIC ${PROJECT_SOURCE_DIR}/include) if (BUILD_TESTING) add_subdirectory(test) endif() + +if (BUILD_BENCHMARK) + add_subdirectory(benchmark) +endif() diff --git a/detection_2d/detection_2d_yolov8/benchmark/CMakeLists.txt b/detection_2d/detection_2d_yolov8/benchmark/CMakeLists.txt new file mode 100644 index 0000000..d1df83d --- /dev/null +++ b/detection_2d/detection_2d_yolov8/benchmark/CMakeLists.txt @@ -0,0 +1,53 @@ +add_compile_options(-std=c++17) +add_compile_options(-O3 -Wextra -Wdeprecated -fPIC) +set(CMAKE_CXX_STANDARD 17) + +if(ENABLE_TENSORRT) + list(APPEND platform_core_packages trt_core) +endif() + +if(ENABLE_RKNN) + list(APPEND platform_core_packages rknn_core) +endif() + +if(ENABLE_ORT) + list(APPEND platform_core_packages ort_core) +endif() + +find_package(glog REQUIRED) +find_package(OpenCV REQUIRED) +find_package(benchmark REQUIRED) + +set(source_file + benchmark_detection_2d_yolov8.cpp +) + +include_directories( + include + ${OpenCV_INCLUDE_DIRS} +) + +add_executable(benchmark_detection_2d_yolov8 ${source_file}) + +target_link_libraries(benchmark_detection_2d_yolov8 PUBLIC + benchmark::benchmark + glog::glog + ${OpenCV_LIBS} + deploy_core + image_processing_utils + detection_2d_yolov8 + benchmark_utils + ${platform_core_packages} +) + +if(ENABLE_TENSORRT) + target_compile_definitions(benchmark_detection_2d_yolov8 PRIVATE ENABLE_TENSORRT) +endif() + +if(ENABLE_RKNN) + target_compile_definitions(benchmark_detection_2d_yolov8 PRIVATE ENABLE_RKNN) +endif() + +if(ENABLE_ORT) + target_compile_definitions(benchmark_detection_2d_yolov8 PRIVATE ENABLE_ORT) +endif() diff --git a/detection_2d/detection_2d_yolov8/benchmark/benchmark_detection_2d_yolov8.cpp b/detection_2d/detection_2d_yolov8/benchmark/benchmark_detection_2d_yolov8.cpp new file mode 100644 index 0000000..edca386 --- /dev/null +++ b/detection_2d/detection_2d_yolov8/benchmark/benchmark_detection_2d_yolov8.cpp @@ -0,0 +1,124 @@ +#include + +#include "detection_2d_util/detection_2d_util.h" +#include "detection_2d_yolov8/yolov8.h" +#include "benchmark_utils/detection_2d_benchmark_utils.hpp" + +using namespace inference_core; +using namespace detection_2d; +using namespace benchmark_utils; + +#ifdef ENABLE_TENSORRT + +#include "trt_core/trt_core.h" + +std::shared_ptr CreateYolov8TensorRTModel() +{ + std::string model_path = "/workspace/models/yolov8n.engine"; + const int input_height = 640; + const int input_width = 640; + const int input_channels = 3; + const int cls_number = 80; + const std::vector input_blobs_name = {"images"}; + const std::vector output_blobs_name = {"output0"}; + + auto infer_core = CreateTrtInferCore(model_path); + auto preprocess = CreateCudaDetPreProcess(); + auto postprocess = CreateYolov8PostProcessCpuOrigin(input_height, input_width, cls_number); + + auto yolov8_model = + CreateYolov8DetectionModel(infer_core, preprocess, postprocess, input_height, input_width, + input_channels, cls_number, input_blobs_name, output_blobs_name); + return yolov8_model; +} + +static void benchmark_detection_2d_yolov8_tensorrt_sync(benchmark::State &state) +{ + benchmark_detection_2d_sync(state, CreateYolov8TensorRTModel()); +} +static void benchmark_detection_2d_yolov8_tensorrt_async(benchmark::State &state) +{ + benchmark_detection_2d_async(state, CreateYolov8TensorRTModel()); +} +BENCHMARK(benchmark_detection_2d_yolov8_tensorrt_sync)->Arg(1000)->UseRealTime(); +BENCHMARK(benchmark_detection_2d_yolov8_tensorrt_async)->Arg(1000)->UseRealTime(); + +#endif + +#ifdef ENABLE_ORT + +#include "ort_core/ort_core.h" + +std::shared_ptr CreateYolov8OnnxRuntimeModel() +{ + std::string model_path = "/workspace/models/yolov8n.onnx"; + const int input_height = 640; + const int input_width = 640; + const int input_channels = 3; + const int cls_number = 80; + const std::vector input_blobs_name = {"images"}; + const std::vector output_blobs_name = {"output0"}; + + auto infer_core = CreateOrtInferCore(model_path); + auto preprocess = CreateCpuDetPreProcess({0, 0, 0}, {255, 255, 255}, true, true); + auto postprocess = CreateYolov8PostProcessCpuOrigin(input_height, input_width, cls_number); + + auto yolov8_model = + CreateYolov8DetectionModel(infer_core, preprocess, postprocess, input_height, input_width, + input_channels, cls_number, input_blobs_name, output_blobs_name); + return yolov8_model; +} + +static void benchmark_detection_2d_yolov8_onnxruntime_sync(benchmark::State &state) +{ + benchmark_detection_2d_sync(state, CreateYolov8OnnxRuntimeModel()); +} +static void benchmark_detection_2d_yolov8_onnxruntime_async(benchmark::State &state) +{ + benchmark_detection_2d_async(state, CreateYolov8OnnxRuntimeModel()); +} +BENCHMARK(benchmark_detection_2d_yolov8_onnxruntime_sync)->Arg(200)->UseRealTime(); +BENCHMARK(benchmark_detection_2d_yolov8_onnxruntime_async)->Arg(200)->UseRealTime(); + +#endif + +#ifdef ENABLE_RKNN + +#include "rknn_core/rknn_core.h" + +std::shared_ptr CreateYolov8RknnModel() +{ + std::string model_path = "/workspace/models/yolov8n_divide_opset11.rknn"; + const int input_height = 640; + const int input_width = 640; + const int input_channels = 3; + const int cls_number = 80; + const std::vector input_blobs_name = {"images"}; + const std::vector output_blobs_name = {"318", "onnx::ReduceSum_326", "331", + "338", "onnx::ReduceSum_346", "350", + "357", "onnx::ReduceSum_365", "369"}; + + auto infer_core = CreateRknnInferCore(model_path, {{"images", RknnInputTensorType::RK_UINT8}}); + auto preprocess = CreateCpuDetPreProcess({0, 0, 0}, {1, 1, 1}, false, false); + auto postprocess = CreateYolov8PostProcessCpuDivide(input_height, input_width, cls_number); + + auto yolov8_model = + CreateYolov8DetectionModel(infer_core, preprocess, postprocess, input_height, input_width, + input_channels, cls_number, input_blobs_name, output_blobs_name); + return yolov8_model; +} + +static void benchmark_detection_2d_yolov8_rknn_sync(benchmark::State &state) +{ + benchmark_detection_2d_sync(state, CreateYolov8RknnModel()); +} +static void benchmark_detection_2d_yolov8_rknn_async(benchmark::State &state) +{ + benchmark_detection_2d_async(state, CreateYolov8RknnModel()); +} +BENCHMARK(benchmark_detection_2d_yolov8_rknn_sync)->Arg(500)->UseRealTime(); +BENCHMARK(benchmark_detection_2d_yolov8_rknn_async)->Arg(500)->UseRealTime(); + +#endif + +BENCHMARK_MAIN(); diff --git a/detection_2d/detection_2d_yolov8/test/test_detection_2d_yolov8.cpp b/detection_2d/detection_2d_yolov8/test/test_detection_2d_yolov8.cpp index 66e9369..af37d6e 100644 --- a/detection_2d/detection_2d_yolov8/test/test_detection_2d_yolov8.cpp +++ b/detection_2d/detection_2d_yolov8/test/test_detection_2d_yolov8.cpp @@ -116,8 +116,8 @@ class Yolov8_Rknn_Fixture : public BaseYolov8Fixture { const int cls_number = 80; const std::vector input_blobs_name = {"images"}; const std::vector output_blobs_name = {"318", "onnx::ReduceSum_326", "331", - "338", "onnx::ReduceSum_346", "350", - "357", "onnx::ReduceSum_365", "369"}; + "338", "onnx::ReduceSum_346", "350", + "357", "onnx::ReduceSum_365", "369"}; auto infer_core = CreateRknnInferCore(model_path, {{"images", RknnInputTensorType::RK_UINT8}}); auto preprocess = CreateCpuDetPreProcess({0, 0, 0}, {1, 1, 1}, false, false); diff --git a/easy_deploy_tool b/easy_deploy_tool index 4a01290..6c254c6 160000 --- a/easy_deploy_tool +++ b/easy_deploy_tool @@ -1 +1 @@ -Subproject commit 4a012904f39be0c35f0da9921e5c761f3ef1e2bb +Subproject commit 6c254c6d53e429513d46924f96fb1e543364497f diff --git a/sam/sam_mobilesam/CMakeLists.txt b/sam/sam_mobilesam/CMakeLists.txt index 66fdf30..ac8dfe4 100644 --- a/sam/sam_mobilesam/CMakeLists.txt +++ b/sam/sam_mobilesam/CMakeLists.txt @@ -33,3 +33,7 @@ target_include_directories(${PROJECT_NAME} PUBLIC ${PROJECT_SOURCE_DIR}/include) if (BUILD_TESTING) add_subdirectory(test) endif() + +if (BUILD_BENCHMARK) + add_subdirectory(benchmark) +endif() diff --git a/sam/sam_mobilesam/benchmark/CMakeLists.txt b/sam/sam_mobilesam/benchmark/CMakeLists.txt new file mode 100644 index 0000000..7baea5a --- /dev/null +++ b/sam/sam_mobilesam/benchmark/CMakeLists.txt @@ -0,0 +1,53 @@ +add_compile_options(-std=c++17) +add_compile_options(-O3 -Wextra -Wdeprecated -fPIC) +set(CMAKE_CXX_STANDARD 17) + +if(ENABLE_TENSORRT) + list(APPEND platform_core_packages trt_core) +endif() + +if(ENABLE_RKNN) + list(APPEND platform_core_packages rknn_core) +endif() + +if(ENABLE_ORT) + list(APPEND platform_core_packages ort_core) +endif() + +find_package(glog REQUIRED) +find_package(OpenCV REQUIRED) +find_package(benchmark REQUIRED) + +set(source_file + benchmark_sam_mobilesam.cpp +) + +include_directories( + include + ${OpenCV_INCLUDE_DIRS} +) + +add_executable(benchmark_sam_mobilesam ${source_file}) + +target_link_libraries(benchmark_sam_mobilesam PUBLIC + benchmark::benchmark + glog::glog + ${OpenCV_LIBS} + deploy_core + image_processing_utils + sam_mobilesam + benchmark_utils + ${platform_core_packages} +) + +if(ENABLE_TENSORRT) + target_compile_definitions(benchmark_sam_mobilesam PRIVATE ENABLE_TENSORRT) +endif() + +if(ENABLE_RKNN) + target_compile_definitions(benchmark_sam_mobilesam PRIVATE ENABLE_RKNN) +endif() + +if(ENABLE_ORT) + target_compile_definitions(benchmark_sam_mobilesam PRIVATE ENABLE_ORT) +endif() diff --git a/sam/sam_mobilesam/benchmark/benchmark_sam_mobilesam.cpp b/sam/sam_mobilesam/benchmark/benchmark_sam_mobilesam.cpp new file mode 100644 index 0000000..fbfa1a5 --- /dev/null +++ b/sam/sam_mobilesam/benchmark/benchmark_sam_mobilesam.cpp @@ -0,0 +1,194 @@ +#include + +#include "detection_2d_util/detection_2d_util.h" +#include "sam_mobilesam/mobilesam.h" +#include "benchmark_utils/sam_benchmark_utils.hpp" + +using namespace inference_core; +using namespace detection_2d; +using namespace sam; +using namespace benchmark_utils; + +#ifdef ENABLE_TENSORRT + +#include "trt_core/trt_core.h" + +std::shared_ptr CreateSAMTensorRTModel(const std::string &image_encoder_model_path) +{ + auto box_decoder_model_path = "/workspace/models/modified_mobile_sam_box.engine"; + auto point_decoder_model_path = "/workspace/models/modified_mobile_sam_point.engine"; + + auto image_encoder = CreateTrtInferCore(image_encoder_model_path); + + const int SAM_MAX_BOX = 1; + const int SAM_MAX_POINTS = 8; + + auto box_decoder_factory = + CreateTrtInferCoreFactory(box_decoder_model_path, + { + {"image_embeddings", {1, 256, 64, 64}}, + {"boxes", {1, SAM_MAX_BOX, 4}}, + {"mask_input", {1, 1, 256, 256}}, + {"has_mask_input", {1}}, + }, + {{"masks", {1, 1, 256, 256}}, {"scores", {1, 1}}}); + + auto point_decoder_factory = + CreateTrtInferCoreFactory(point_decoder_model_path, + { + {"image_embeddings", {1, 256, 64, 64}}, + {"point_coords", {1, SAM_MAX_POINTS, 2}}, + {"point_labels", {1, SAM_MAX_POINTS}}, + {"mask_input", {1, 1, 256, 256}}, + {"has_mask_input", {1}}, + }, + {{"masks", {1, 1, 256, 256}}, {"scores", {1, 1}}}); + + auto image_preprocess_factory = CreateCudaDetPreProcessFactory(); + + return CreateMobileSamModel(image_encoder, point_decoder_factory->Create(), + box_decoder_factory->Create(), image_preprocess_factory->Create()); +} + +// benchmark sam_mobilesam +static void benchmark_sam_mobilesam_tensorrt_sync(benchmark::State &state) +{ + auto mobilesam_image_encoder_model_path = "/workspace/models/mobile_sam_encoder.engine"; + benchmark_sam_sync(state, CreateSAMTensorRTModel(mobilesam_image_encoder_model_path)); +} +static void benchmark_sam_mobilesam_tensorrt_async(benchmark::State &state) +{ + auto mobilesam_image_encoder_model_path = "/workspace/models/mobile_sam_encoder.engine"; + benchmark_sam_async(state, CreateSAMTensorRTModel(mobilesam_image_encoder_model_path)); +} +BENCHMARK(benchmark_sam_mobilesam_tensorrt_sync)->Arg(100)->UseRealTime(); +BENCHMARK(benchmark_sam_mobilesam_tensorrt_async)->Arg(100)->UseRealTime(); + +// benchmark sam_nanosam +static void benchmark_sam_nanosam_tensorrt_sync(benchmark::State &state) +{ + auto nanosam_image_encoder_model_path = "/workspace/models/nanosam_image_encoder_opset11.engine"; + benchmark_sam_sync(state, CreateSAMTensorRTModel(nanosam_image_encoder_model_path)); +} +static void benchmark_sam_nanosam_tensorrt_async(benchmark::State &state) +{ + auto nanosam_image_encoder_model_path = "/workspace/models/nanosam_image_encoder_opset11.engine"; + benchmark_sam_async(state, CreateSAMTensorRTModel(nanosam_image_encoder_model_path)); +} +BENCHMARK(benchmark_sam_nanosam_tensorrt_sync)->Arg(200)->UseRealTime(); +BENCHMARK(benchmark_sam_nanosam_tensorrt_async)->Arg(200)->UseRealTime(); + +#endif + +#ifdef ENABLE_ORT + +#include "ort_core/ort_core.h" + +std::shared_ptr CreateSAMOnnxRuntimeModel(const std::string &image_encoder_model_path) +{ + auto box_decoder_model_path = "/workspace/models/modified_mobile_sam_box.onnx"; + auto point_decoder_model_path = "/workspace/models/modified_mobile_sam_point.onnx"; + + auto image_encoder = CreateOrtInferCore(image_encoder_model_path); + + const int SAM_MAX_BOX = 1; + const int SAM_MAX_POINTS = 8; + + auto box_decoder_factory = + CreateOrtInferCoreFactory(box_decoder_model_path, + { + {"image_embeddings", {1, 256, 64, 64}}, + {"boxes", {1, SAM_MAX_BOX, 4}}, + {"mask_input", {1, 1, 256, 256}}, + {"has_mask_input", {1}}, + }, + {{"masks", {1, 1, 256, 256}}, {"scores", {1, 1}}}); + + auto point_decoder_factory = + CreateOrtInferCoreFactory(point_decoder_model_path, + { + {"image_embeddings", {1, 256, 64, 64}}, + {"point_coords", {1, SAM_MAX_POINTS, 2}}, + {"point_labels", {1, SAM_MAX_POINTS}}, + {"mask_input", {1, 1, 256, 256}}, + {"has_mask_input", {1}}, + }, + {{"masks", {1, 1, 256, 256}}, {"scores", {1, 1}}}); + + auto image_preprocess_factory = + CreateCpuDetPreProcessFactory({0, 0, 0}, {255, 255, 255}, true, true); + + return CreateMobileSamModel(image_encoder, point_decoder_factory->Create(), + box_decoder_factory->Create(), image_preprocess_factory->Create()); +} + +// benchmark sam_mobilesam +static void benchmark_sam_mobilesam_onnxruntime_sync(benchmark::State &state) +{ + auto mobilesam_image_encoder_model_path = "/workspace/models/mobile_sam_encoder.onnx"; + benchmark_sam_sync(state, CreateSAMOnnxRuntimeModel(mobilesam_image_encoder_model_path)); +} +static void benchmark_sam_mobilesam_onnxruntime_async(benchmark::State &state) +{ + auto mobilesam_image_encoder_model_path = "/workspace/models/mobile_sam_encoder.onnx"; + benchmark_sam_async(state, CreateSAMOnnxRuntimeModel(mobilesam_image_encoder_model_path)); +} +BENCHMARK(benchmark_sam_mobilesam_onnxruntime_sync)->Arg(20)->UseRealTime(); +BENCHMARK(benchmark_sam_mobilesam_onnxruntime_async)->Arg(20)->UseRealTime(); + +// benchmark sam_nanosam +static void benchmark_sam_nanosam_onnxruntime_sync(benchmark::State &state) +{ + auto nanosam_image_encoder_model_path = "/workspace/models/nanosam_image_encoder_opset11.onnx"; + benchmark_sam_sync(state, CreateSAMOnnxRuntimeModel(nanosam_image_encoder_model_path)); +} +static void benchmark_sam_nanosam_onnxruntime_async(benchmark::State &state) +{ + auto nanosam_image_encoder_model_path = "/workspace/models/nanosam_image_encoder_opset11.onnx"; + benchmark_sam_async(state, CreateSAMOnnxRuntimeModel(nanosam_image_encoder_model_path)); +} +BENCHMARK(benchmark_sam_nanosam_onnxruntime_sync)->Arg(50)->UseRealTime(); +BENCHMARK(benchmark_sam_nanosam_onnxruntime_async)->Arg(50)->UseRealTime(); + +#endif + +#ifdef ENABLE_RKNN + +#include "rknn_core/rknn_core.h" + +std::shared_ptr CreateSAMRknnModel(const std::string &image_encoder_model_path) +{ + auto box_decoder_model_path = "/workspace/models/modified_mobile_sam_box.rknn"; + auto point_decoder_model_path = "/workspace/models/modified_mobile_sam_point.rknn"; + + auto nanosam_image_encoder = CreateRknnInferCore( + image_encoder_model_path, {{"images", RknnInputTensorType::RK_UINT8}}, 5, 2); + + auto box_decoder_factory = CreateRknnInferCoreFactory(box_decoder_model_path, {}, 5, 2); + + auto point_decoder_factory = CreateRknnInferCoreFactory(point_decoder_model_path, {}, 5, 2); + + auto image_preprocess_factory = + CreateCpuDetPreProcessFactory({0, 0, 0}, {255, 255, 255}, false, false); + + return CreateMobileSamModel(nanosam_image_encoder, point_decoder_factory->Create(), + box_decoder_factory->Create(), image_preprocess_factory->Create()); +} + +// benchmark sam_nanosam +static void benchmark_sam_nanosam_rknn_sync(benchmark::State &state) +{ + auto nanosam_image_encoder_model_path = "/workspace/models/nanosam_image_encoder_opset11.rknn"; + benchmark_sam_sync(state, CreateSAMRknnModel(nanosam_image_encoder_model_path)); +} +static void benchmark_sam_nanosam_rknn_async(benchmark::State &state) +{ + auto nanosam_image_encoder_model_path = "/workspace/models/nanosam_image_encoder_opset11.rknn"; + benchmark_sam_async(state, CreateSAMRknnModel(nanosam_image_encoder_model_path)); +} +BENCHMARK(benchmark_sam_nanosam_rknn_sync)->Arg(50)->UseRealTime(); +BENCHMARK(benchmark_sam_nanosam_rknn_async)->Arg(100)->UseRealTime(); + +#endif + +BENCHMARK_MAIN();