From d89fe87279066d6431fdd4eadcf37f01df70de28 Mon Sep 17 00:00:00 2001 From: Sandeep Babu Date: Fri, 8 Jul 2022 10:06:41 +0200 Subject: [PATCH 1/2] Updated code for TensorRt8 --- CMakeLists.txt | 4 +-- cmake/tensorrt-config.cmake | 5 ++-- include/netTensorRT.hpp | 16 +++++------ src/net.cpp | 2 +- src/netTensorRT.cpp | 57 ++++++++++++++++++++++++++----------- 5 files changed, 55 insertions(+), 29 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b46daa7..0b10b46 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,7 +11,7 @@ set(CMAKE_CXX_FLAGS_DEBUG "-g -O0") set(CMAKE_CXX_FLAGS_RELEASE "-g -O3") ## Use C++11 -set (CMAKE_CXX_STANDARD 11) +set (CMAKE_CXX_STANDARD 14) # check flags message("Build type: ${CMAKE_BUILD_TYPE}") @@ -114,7 +114,7 @@ set(rangenet_lib_SOURCES src/net.cpp src/selector.cpp) if(TENSORRT_FOUND) message("Building TensorRT") - list(APPEND rangenet_lib_SOURCES src/netTensorRT.cpp) + list(APPEND rangenet_lib_SOURCES src/netTensorRT.cpp ) endif() # make the library and link stuff to it diff --git a/cmake/tensorrt-config.cmake b/cmake/tensorrt-config.cmake index 014deec..527fe6a 100644 --- a/cmake/tensorrt-config.cmake +++ b/cmake/tensorrt-config.cmake @@ -9,7 +9,7 @@ find_library(NVONNXPARSER NAMES nvonnxparser) find_library(NVONNXPARSERRUNTIME NAMES nvonnxparser_runtime) # If it is ALL there, export libraries as a single package -if(CUDA_FOUND AND NVINFER AND NVINFERPLUGIN AND NVPARSERS AND NVONNXPARSER AND NVONNXPARSERRUNTIME) +if(CUDA_FOUND AND NVINFER AND NVINFERPLUGIN AND NVPARSERS AND NVONNXPARSER ) message("TensorRT available!") message("CUDA Libs: ${CUDA_LIBRARIES}") message("CUDA Headers: ${CUDA_INCLUDE_DIRS}") @@ -18,7 +18,8 @@ if(CUDA_FOUND AND NVINFER AND NVINFERPLUGIN AND NVPARSERS AND NVONNXPARSER AND N message("NVPARSERS: ${NVPARSERS}") message("NVONNXPARSER: ${NVONNXPARSER}") message("NVONNXPARSERRUNTIME: ${NVONNXPARSERRUNTIME}") - list(APPEND TENSORRT_LIBRARIES ${CUDA_LIBRARIES} nvinfer nvinfer_plugin nvparsers nvonnxparser nvonnxparser_runtime) + # list(APPEND TENSORRT_LIBRARIES ${CUDA_LIBRARIES} nvinfer nvinfer_plugin nvparsers nvonnxparser nvonnxparser_runtime) + list(APPEND TENSORRT_LIBRARIES ${CUDA_LIBRARIES} nvinfer nvinfer_plugin nvparsers nvonnxparser) message("All togheter now (libs): ${TENSORRT_LIBRARIES}") list(APPEND TENSORRT_INCLUDE_DIRS ${CUDA_INCLUDE_DIRS}) message("All togheter now (inc): ${TENSORRT_INCLUDE_DIRS}") diff --git a/include/netTensorRT.hpp b/include/netTensorRT.hpp index dce3259..f902d6c 100644 --- a/include/netTensorRT.hpp +++ b/include/netTensorRT.hpp @@ -8,7 +8,7 @@ // For plugin factory #include #include -#include +//#include #include #include #include @@ -20,7 +20,7 @@ (1UL << 33) // gpu workspace size (8gb is pretty good) #define MIN_WORKSPACE_SIZE (1UL << 20) // gpu workspace size (pretty bad) -#define DEVICE_DLA_0 0 // jetson DLA 0 enabled +#define DEVICE_DLA_0 1 // jetson DLA 0 enabled #define DEVICE_DLA_1 0 // jetson DLA 1 enabled using namespace nvinfer1; // I'm taking a liberty because the code is @@ -42,23 +42,23 @@ namespace segmentation { class Logger : public ILogger { public: void set_verbosity(bool verbose) { _verbose = verbose; } - void log(Severity severity, const char* msg) override { + void log(Severity severity, const char* msg) noexcept { if (_verbose) { switch (severity) { case Severity::kINTERNAL_ERROR: - std::cerr << "INTERNAL_ERROR: "; + std::cout << "INTERNAL_ERROR: "; break; case Severity::kERROR: - std::cerr << "ERROR: "; + std::cout << "ERROR: "; break; case Severity::kWARNING: - std::cerr << "WARNING: "; + std::cout << "WARNING: "; break; case Severity::kINFO: - std::cerr << "INFO: "; + std::cout << "INFO: "; break; default: - std::cerr << "UNKNOWN: "; + std::cout << "UNKNOWN: "; break; } std::cout << msg << std::endl; diff --git a/src/net.cpp b/src/net.cpp index d172cc1..be66572 100644 --- a/src/net.cpp +++ b/src/net.cpp @@ -19,7 +19,7 @@ namespace segmentation { * @param[in] model_path The model path for the inference model directory */ Net::Net(const std::string& model_path) - : _model_path(model_path), _verbose(false) { + : _model_path(model_path), _verbose(true) { // set default verbosity level verbosity(_verbose); diff --git a/src/netTensorRT.cpp b/src/netTensorRT.cpp index caace09..4a4ba78 100644 --- a/src/netTensorRT.cpp +++ b/src/netTensorRT.cpp @@ -278,7 +278,7 @@ std::vector> NetTensorRT::infer(const std::vector& sca } - _context->enqueue(1, &_deviceBuffers[_inBindIdx], _cudaStream, nullptr); + _context->enqueueV2(&_deviceBuffers[_inBindIdx], _cudaStream, nullptr); if (_verbose) { CUDA_CHECK(cudaStreamSynchronize(_cudaStream)); @@ -415,8 +415,8 @@ void NetTensorRT::deserializeEngine(const std::string& engine_path) { } // if using DLA, set the desired core before deserialization occurs -#if NV_TENSORRT_MAJOR >= 5 && \ - !(NV_TENSORRT_MAJOR == 5 && NV_TENSORRT_MINOR == 0 && \ +#if NV_TENSORRT_MAJOR >= 8 && \ + !(NV_TENSORRT_MAJOR == 8 && NV_TENSORRT_MINOR == 0 && \ NV_TENSORRT_PATCH == 0) if (DEVICE_DLA_0) { infer->setDLACore(0); @@ -449,11 +449,11 @@ void NetTensorRT::deserializeEngine(const std::string& engine_path) { << std::endl; // because I use onnx-tensorRT i have to use their plugin factory - nvonnxparser::IPluginFactory* plug_fact = - nvonnxparser::createPluginFactory(_gLogger); + // nvinfer1::IPluginV2* plug_fact = + // nvinfer1::IPluginCreator::createPlugin(_gLogger); // Now deserialize - _engine = infer->deserializeCudaEngine(modelMem, modelSize, plug_fact); + _engine = infer->deserializeCudaEngine(modelMem, modelSize, nullptr); free(modelMem); if (_engine) { @@ -497,18 +497,28 @@ void NetTensorRT::generateEngine(const std::string& onnx_path) { std::cout << "Trying to generate trt engine from : " << onnx_path << std::endl; + // create inference builder - IBuilder* builder = createInferBuilder(_gLogger); + auto builder = std::unique_ptr(nvinfer1::createInferBuilder(_gLogger)); + if (!builder) + { + return ; + } + auto buildercfg = std::unique_ptr(builder->createBuilderConfig()); + if (!buildercfg) + { + return ; + } // set optimization parameters here // CAN I DO HALF PRECISION (and report to user) std::cout << "Platform "; if (builder->platformHasFastFp16()) { std::cout << "HAS "; - builder->setFp16Mode(true); + buildercfg->setFlag(BuilderFlag::kFP16); } else { std::cout << "DOESN'T HAVE "; - builder->setFp16Mode(false); + // buildercfg->setFp16Mode(false); } std::cout << "fp16 support." << std::endl; // BATCH SIZE IS ALWAYS ONE @@ -519,14 +529,14 @@ void NetTensorRT::generateEngine(const std::string& onnx_path) { !(NV_TENSORRT_MAJOR == 5 && NV_TENSORRT_MINOR == 0 && \ NV_TENSORRT_PATCH == 0) if (DEVICE_DLA_0 || DEVICE_DLA_1) { - builder->setDefaultDeviceType(DeviceType::kDLA); - builder->allowGPUFallback(true); + buildercfg->setDefaultDeviceType(DeviceType::kDLA); + // buildercfg->allowGPUFallback(true); if (DEVICE_DLA_0) { std::cout << "Successfully selected DLA core 0." << std::endl; - builder->setDLACore(0); + buildercfg->setDLACore(0); } else if (DEVICE_DLA_0) { std::cout << "Successfully selected DLA core 1." << std::endl; - builder->setDLACore(1); + buildercfg->setDLACore(1); } } else { std::cout << "No DLA selected." << std::endl; @@ -534,12 +544,19 @@ void NetTensorRT::generateEngine(const std::string& onnx_path) { #endif // create a network builder - INetworkDefinition* network = builder->createNetwork(); +const auto explicitBatch = 1U << static_cast(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); +auto network = std::unique_ptr(builder->createNetworkV2(explicitBatch)); + if (!network) + { + return ; + } + // INetworkDefinition* network = builder->createNetworkV2(1); // generate a parser to get weights from onnx file nvonnxparser::IParser* parser = nvonnxparser::createParser(*network, _gLogger); + // finally get from file if (!parser->parseFromFile(onnx_path.c_str(), static_cast(ILogger::Severity::kVERBOSE))) { @@ -553,10 +570,12 @@ void NetTensorRT::generateEngine(const std::string& onnx_path) { for (unsigned long ws_size = MAX_WORKSPACE_SIZE; ws_size >= MIN_WORKSPACE_SIZE; ws_size /= 2) { // set size - builder->setMaxWorkspaceSize(ws_size); + buildercfg->setMemoryPoolLimit( nvinfer1::MemoryPoolType::kWORKSPACE,ws_size); + + buildercfg->setFlag(BuilderFlag::kFP16); // try to build - _engine = builder->buildCudaEngine(*network); + _engine = builder->buildEngineWithConfig(*network,*buildercfg); if (!_engine) { std::cerr << "Failure creating engine from ONNX model" << std::endl << "Current trial size is " << ws_size << std::endl; @@ -568,6 +587,12 @@ void NetTensorRT::generateEngine(const std::string& onnx_path) { } } +// builder->destroy(); +// buildercfg->destroy(); +// parser->destroy(); +// network->destroy(); + + // final check if (!_engine) { throw std::runtime_error("ERROR: could not create engine from ONNX."); From f3f254ffcb1cfdb1e8b9c7e52f8e6f88dcc234c0 Mon Sep 17 00:00:00 2001 From: Sandeep Babu Date: Mon, 11 Jul 2022 11:26:31 +0200 Subject: [PATCH 2/2] Fixed all warnings and depricated functions --- CMakeLists.txt | 2 +- cmake/tensorrt-config.cmake | 5 ++- include/netTensorRT.hpp | 16 +++++----- src/netTensorRT.cpp | 62 ++++++++++++++++++++----------------- 4 files changed, 45 insertions(+), 40 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0b10b46..ede5a90 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,7 +11,7 @@ set(CMAKE_CXX_FLAGS_DEBUG "-g -O0") set(CMAKE_CXX_FLAGS_RELEASE "-g -O3") ## Use C++11 -set (CMAKE_CXX_STANDARD 14) +set (CMAKE_CXX_STANDARD 11) # check flags message("Build type: ${CMAKE_BUILD_TYPE}") diff --git a/cmake/tensorrt-config.cmake b/cmake/tensorrt-config.cmake index 527fe6a..7b6a763 100644 --- a/cmake/tensorrt-config.cmake +++ b/cmake/tensorrt-config.cmake @@ -6,7 +6,7 @@ find_library(NVINFER NAMES nvinfer) find_library(NVINFERPLUGIN NAMES nvinfer_plugin) find_library(NVPARSERS NAMES nvparsers) find_library(NVONNXPARSER NAMES nvonnxparser) -find_library(NVONNXPARSERRUNTIME NAMES nvonnxparser_runtime) + # If it is ALL there, export libraries as a single package if(CUDA_FOUND AND NVINFER AND NVINFERPLUGIN AND NVPARSERS AND NVONNXPARSER ) @@ -18,7 +18,6 @@ if(CUDA_FOUND AND NVINFER AND NVINFERPLUGIN AND NVPARSERS AND NVONNXPARSER ) message("NVPARSERS: ${NVPARSERS}") message("NVONNXPARSER: ${NVONNXPARSER}") message("NVONNXPARSERRUNTIME: ${NVONNXPARSERRUNTIME}") - # list(APPEND TENSORRT_LIBRARIES ${CUDA_LIBRARIES} nvinfer nvinfer_plugin nvparsers nvonnxparser nvonnxparser_runtime) list(APPEND TENSORRT_LIBRARIES ${CUDA_LIBRARIES} nvinfer nvinfer_plugin nvparsers nvonnxparser) message("All togheter now (libs): ${TENSORRT_LIBRARIES}") list(APPEND TENSORRT_INCLUDE_DIRS ${CUDA_INCLUDE_DIRS}) @@ -27,4 +26,4 @@ if(CUDA_FOUND AND NVINFER AND NVINFERPLUGIN AND NVPARSERS AND NVONNXPARSER ) else() message("TensorRT NOT Available") set(TENSORRT_FOUND OFF) -endif() \ No newline at end of file +endif() diff --git a/include/netTensorRT.hpp b/include/netTensorRT.hpp index f902d6c..393a908 100644 --- a/include/netTensorRT.hpp +++ b/include/netTensorRT.hpp @@ -15,12 +15,13 @@ #include #include #include "net.hpp" - +#include +#include #define MAX_WORKSPACE_SIZE \ (1UL << 33) // gpu workspace size (8gb is pretty good) #define MIN_WORKSPACE_SIZE (1UL << 20) // gpu workspace size (pretty bad) -#define DEVICE_DLA_0 1 // jetson DLA 0 enabled +#define DEVICE_DLA_0 0 // jetson DLA 0 enabled #define DEVICE_DLA_1 0 // jetson DLA 1 enabled using namespace nvinfer1; // I'm taking a liberty because the code is @@ -95,15 +96,14 @@ class NetTensorRT : public Net { * @return argsorted idxes */ template - std::vector sort_indexes(const std::vector &v) { +boost::container::vector sort_indexes(const boost::container::vector &v) { // initialize original index locations - std::vector idx(v.size()); + boost::container::vector idx(v.size()); std::iota(idx.begin(), idx.end(), 0); // sort indexes based on comparing values in v. >: decrease <: increase - std::sort(idx.begin(), idx.end(), - [&v](size_t i1, size_t i2) {return v[i1] > v[i2];}); + boost::range::sort(idx,[&v](size_t i1, size_t i2) {return v[i1] > v[i2];}); return idx; } @@ -183,8 +183,8 @@ class NetTensorRT : public Net { uint _inBindIdx; uint _outBindIdx; - std::vector proj_xs; // stope a copy in original order - std::vector proj_ys; + boost::container::vector proj_xs; // stope a copy in original order + boost::container::vector proj_ys; // explicitly set the invalid point for both inputs and outputs std::vector invalid_input = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f}; diff --git a/src/netTensorRT.cpp b/src/netTensorRT.cpp index 4a4ba78..efa286c 100644 --- a/src/netTensorRT.cpp +++ b/src/netTensorRT.cpp @@ -9,7 +9,7 @@ #include #include #include - +#include namespace rangenet { namespace segmentation { @@ -33,13 +33,13 @@ NetTensorRT::NetTensorRT(const std::string& model_path) // try to deserialize the engine try { deserializeEngine(engine_path); - } catch (std::exception e) { + } catch (std::exception* e) { std::cout << "Could not deserialize TensorRT engine. " << std::endl << "Generating from sratch... This may take a while..." << std::endl; // destroy crap from engine - if (_engine) _engine->destroy(); + if (_engine) delete _engine; } catch (...) { throw std::runtime_error("Unknown TensorRT exception. Giving up."); @@ -95,7 +95,7 @@ NetTensorRT::~NetTensorRT() { // destroy the execution context if (_context) { - _context->destroy(); + delete _context; } if (_verbose) { @@ -104,7 +104,7 @@ NetTensorRT::~NetTensorRT() { // destroy the engine if (_engine) { - _engine->destroy(); + delete _engine; } if (_verbose) { @@ -124,14 +124,13 @@ std::vector> NetTensorRT::doProjection(const std::vector ranges; - std::vector xs; - std::vector ys; - std::vector zs; - std::vector intensitys; - - std::vector proj_xs_tmp; - std::vector proj_ys_tmp; + boost::container::vector ranges; + boost::container::vector xs; + boost::container::vector ys; + boost::container::vector zs; + boost::container::vector intensitys; + boost::container::vector proj_xs_tmp; + boost::container::vector proj_ys_tmp; for (uint32_t i = 0; i < num_points; i++) { float x = scan[4 * i]; @@ -174,9 +173,9 @@ std::vector> NetTensorRT::doProjection(const std::vector orders = sort_indexes(ranges); - std::vector sorted_proj_xs; - std::vector sorted_proj_ys; + boost::container::vector orders = sort_indexes(ranges); + boost::container::vector sorted_proj_xs; + boost::container::vector sorted_proj_ys; std::vector> inputs; for (size_t idx : orders){ @@ -415,8 +414,8 @@ void NetTensorRT::deserializeEngine(const std::string& engine_path) { } // if using DLA, set the desired core before deserialization occurs -#if NV_TENSORRT_MAJOR >= 8 && \ - !(NV_TENSORRT_MAJOR == 8 && NV_TENSORRT_MINOR == 0 && \ +#if NV_TENSORRT_MAJOR >= 5 && \ + !(NV_TENSORRT_MAJOR == 5 && NV_TENSORRT_MINOR == 0 && \ NV_TENSORRT_PATCH == 0) if (DEVICE_DLA_0) { infer->setDLACore(0); @@ -453,7 +452,7 @@ void NetTensorRT::deserializeEngine(const std::string& engine_path) { // nvinfer1::IPluginCreator::createPlugin(_gLogger); // Now deserialize - _engine = infer->deserializeCudaEngine(modelMem, modelSize, nullptr); + _engine = infer->deserializeCudaEngine(modelMem, modelSize); free(modelMem); if (_engine) { @@ -522,7 +521,7 @@ void NetTensorRT::generateEngine(const std::string& onnx_path) { } std::cout << "fp16 support." << std::endl; // BATCH SIZE IS ALWAYS ONE - builder->setMaxBatchSize(1); + // builder->setMaxBatchSize(1); // if using DLA, set the desired core before deserialization occurs #if NV_TENSORRT_MAJOR >= 5 && \ @@ -550,7 +549,6 @@ auto network = std::unique_ptr(builder->createNetw { return ; } - // INetworkDefinition* network = builder->createNetworkV2(1); // generate a parser to get weights from onnx file nvonnxparser::IParser* parser = @@ -574,8 +572,21 @@ auto network = std::unique_ptr(builder->createNetw buildercfg->setFlag(BuilderFlag::kFP16); - // try to build - _engine = builder->buildEngineWithConfig(*network,*buildercfg); + + std::unique_ptr plan{builder->buildSerializedNetwork(*network, *buildercfg)}; + if (!plan) + { + return ; + } + + std::unique_ptr runtime{createInferRuntime(_gLogger)}; + if (!runtime) + { + return ; + } + + _engine = runtime->deserializeCudaEngine(plan->data(), plan->size()); + if (!_engine) { std::cerr << "Failure creating engine from ONNX model" << std::endl << "Current trial size is " << ws_size << std::endl; @@ -587,11 +598,6 @@ auto network = std::unique_ptr(builder->createNetw } } -// builder->destroy(); -// buildercfg->destroy(); -// parser->destroy(); -// network->destroy(); - // final check if (!_engine) {