diff --git a/example/Ryzen-AI-Library/include/cvml-api-common.h b/example/Ryzen-AI-Library/include/cvml-api-common.h index 13c6b910..823e3c7b 100644 --- a/example/Ryzen-AI-Library/include/cvml-api-common.h +++ b/example/Ryzen-AI-Library/include/cvml-api-common.h @@ -1,5 +1,5 @@ // -// Copyright (C) 2021-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (C) 2021-2024 Advanced Micro Devices, Inc. All rights reserved. // #ifndef EDGEML_FEATURES_COMMON_FRAMEWORK_PUBLIC_INCLUDE_CVML_API_COMMON_H_ @@ -7,6 +7,14 @@ #include +#ifndef WIN32 +#define CVML_SDK_EXPORT +#define CVML_SDK_NO_EXPORT +#define CVML_SDK_DEPRECATED +#define CVML_SDK_DEPRECATED_EXPORT +#define CVML_SDK_DEPRECATED_NO_EXPORT +#else + #ifdef CVML_SDK_STATIC_DEFINE #define CVML_SDK_EXPORT #define CVML_SDK_NO_EXPORT @@ -38,6 +46,8 @@ #define CVML_SDK_DEPRECATED_NO_EXPORT CVML_SDK_NO_EXPORT CVML_SDK_DEPRECATED #endif +#endif + #define AMD_CVML_INTERFACE(TypeName) \ public: \ virtual ~TypeName(); \ @@ -49,57 +59,4 @@ TypeName(TypeName&&) noexcept = delete; \ TypeName& operator=(TypeName&&) noexcept = delete; -namespace amd { -namespace cvml { - -/** - * Encapsulates success or failure of an API call. - */ -template -struct Result { - R result; /**< result of running or building models*/ - F error; /**< error code*/ - /** - * Implementation of operator bool for Result - */ - explicit operator bool() const { return error == F::kSuccess; } - /** - * Implementation of operator-> for Result - */ - R operator->() const { return result; } - /** - * Implementation of operator() for Result - */ - R operator()() const { return result; } -}; - -/** - * Result output due to success - * @param r result to be returned from an successful operation - * @return result object of success - */ -template -const Result Success(const R& r) { - Result ret; - ret.result = r; - ret.error = F::kSuccess; - return ret; -} - -/** - * Result output due to failure - * @param f error code from an failed operation - * @return result object of failure - */ -template -const Result Error(F f) { - Result ret; - ret.result = {}; - ret.error = f; - return ret; -} - -} // namespace cvml -} // namespace amd - #endif // EDGEML_FEATURES_COMMON_FRAMEWORK_PUBLIC_INCLUDE_CVML_API_COMMON_H_ diff --git a/example/Ryzen-AI-Library/include/cvml-context.h b/example/Ryzen-AI-Library/include/cvml-context.h index 740cd601..ce5850d0 100644 --- a/example/Ryzen-AI-Library/include/cvml-context.h +++ b/example/Ryzen-AI-Library/include/cvml-context.h @@ -3,7 +3,7 @@ * * @file * - * Definitions for CVML SDK Contexts and associated structures/functions. + * Definitions for SDK contexts and associated structures/functions. */ #ifndef EDGEML_FEATURES_COMMON_FRAMEWORK_PUBLIC_INCLUDE_CVML_CONTEXT_H_ @@ -16,132 +16,188 @@ namespace amd { namespace cvml { /** - * Maximal number of different platforms CVML SDK can support + * Maximum number of different platforms the SDK can support. */ static const uint32_t MAX_SUPPORTED_PLATFORMS = 10; /** - * Information of platforms supported by CVML SDK + * Structure of platforms supported by the SDK. + * * @see \a amd::cvml::Context */ -typedef struct SupportedPlatformInformation { +struct SupportedPlatformInformation { + /** + * Structure describing a single supported platform. + */ struct SupportedPlatform { /// Device ID of supported AMD APU + /// @deprecated Always returns -1 int64_t device_id; - /// Required mininal vulkan driver version on supported AMD APU + /// Required minimum Vulkan driver version on supported AMD APU int64_t required_gpu_minimal_vulkan_driver_version; - } platform[MAX_SUPPORTED_PLATFORMS]; + } platform[MAX_SUPPORTED_PLATFORMS]; ///< Array of supported platforms. /// Total number of supported AMD APU platforms. + /// @deprecated Always returns amd::cvml::MAX_SUPPORTED_PLATFORMS /// @see \a amd::cvml::MAX_SUPPORTED_PLATFORMS uint32_t supported_platform_count; /// Whether supported platform checking is enforced. bool checking_enforced; -} SupportedPlatformInformation; +}; /** - * Represents a context of a CVML SDK feature - * Can be shared by multiple features of the CVML SDK. + * Execution context for Ryzen AI Library features. + * + * An appropriate context must be created by calling, + * + * amd::cvml::CreateContext() + * + * before using any features in the Ryzen AI Library and provided to the + * feature constructor(s). + * + * The context can be shared by multiple features of the SDK. */ class CVML_SDK_EXPORT Context { AMD_CVML_INTERFACE(Context); public: /** - * Releases the memory allocated by the context. + * Releases all resources for the context and destroys it. */ virtual void Release() = 0; /** * Sets the verbosity of the log. * - * @param level CVML SDK feature log level + * @param level SDK feature log level */ - virtual void SetLogLevel(CvmlLogger::LogLevels level) = 0; + virtual void SetLogLevel(Logger::LogLevels level) = 0; /** - * Gets the pointer to the cvml logger + * Gets the pointer to the logger object. + * + * @return Pointer to logger object */ - virtual CvmlLogger* GetLogger() const = 0; + virtual Logger* GetLogger() const = 0; /** - * Get the Supported Platform Information object + * Get the Supported Platform Information object. * * @param info Pointer to structure for receiving platform information - * @return True on success - * @return False on failure + * @return true on success, false on failure */ static bool GetSupportedPlatformInformation(amd::cvml::SupportedPlatformInformation* info); /** - * Defines the inference backends that can be supported by the CVML SDK. + * Defines the inference backends that can be supported by the SDK. * * These are provided to the \a SetInferenceBackend API function. */ enum InferenceBackend { - AUTO, ///< Allow the CVML SDK to select the hardware for inference operations + AUTO, ///< Allow the SDK to select the hardware for inference operations GPU, ///< Use GPU hardware for inference operations NPU, ///< Use NPU hardware for inference operations CPU, ///< Use CPU hardware for inference operations - ONNX, ///< Use NPU hardware for ONNX inference operations + dGPU ///< Use discrete GPU hardware, if available, for inference operations }; /** - * Define input source streaming mode that can be supported by CVMLSDK - * + * Defines the source streaming mode for feature processing. */ enum StreamingMode { - ONE_SHOT, ///< Input source is image - ONLINE_STREAMING, ///< Input source is video/audio file, or camera stream - OFFLINE_STREAMING ///< Input source is image playback + ONE_SHOT, ///< Features should expect to process independent images. + ONLINE_STREAMING, ///< Input images are part of real-time streaming content. + OFFLINE_STREAMING ///< Features are intended to process offline streaming content. }; /** * Specifies the inference backend for subsequently created features. * - * This function does not affect any CVML features that were instantiated - * via the context before its call. If a CVML feature is unable to support + * This function does not affect any features that were instantiated + * via the context before its call. If a feature is unable to support * a specified inference backend, it will refuse to construct and an * exception will be thrown instead. * * @param inference_backend Desired hardware inference backend + * @return true if backend updated */ - virtual void SetInferenceBackend(InferenceBackend inference_backend) = 0; + bool SetInferenceBackend(InferenceBackend inference_backend); /** * Returns the inference backend selection strategy for newly created features. * * @return Current hardware inference backend selection */ - virtual InferenceBackend GetInferenceBackend(void) = 0; + InferenceBackend GetInferenceBackend(void); + + /** + * Returns the current streaming mode. + * + * See \a amd::cvml::Context::SetStreamingMode for more details. + * + * @return Currently configured streaming mode. + */ + StreamingMode GetStreamingMode(void); + + /** + * Set the streaming mode for the context. + * + * The requested streaming mode is used to configure new features + * that are constructed against the context. Any features that + * were created before are not affected by changing streaming + * mode changes. + * + * See \a amd::cvml::Context::StreamingMode + * + * @param mode Desired streaming mode. + */ + void SetStreamingMode(StreamingMode mode); + + /** + * Return if NPU is available on platform + * + * @return true if NPU available + */ + static bool IsNPUAvailable(); + + /** + * Return if iGPU is available on platform + * + * @return true if iGPU available + */ + static bool IsiGPUAvailable(); /** - * Returns StreamingMode of input source, an enum class + * Return if dGPU is available on platform * + * @return true if dGPU available */ - virtual StreamingMode GetStreamingMode(void) = 0; + static bool IsdGPUAvailable(); /** - * Set input source type - * 0: one-shot image - * 1: online streaming mode (e.g. streaming video/audio, camera) - * 2: offline streaming model (e.g. image loop playback) + * Get detected NPU driver version. + * + * @return NPU driver version, or 0 if not detected */ - virtual void SetStreamingMode(StreamingMode mode) = 0; + uint32_t GetNPUDriverVersion(); + + public: + class Impl; + Impl* impl_; ///< Pointer to context implementation }; /** - * API to Create CVML Context. + * Create a Ryzen AI context. * * @param log_level Sets the log level. Default value is kINFO - * @param logger External logger for cvml context. Default value is nullptr - * @see \a amd::cvml::CvmlLogger + * @param logger External logger for the context. Default value is nullptr + * @see \a amd::cvml::Logger * @return Pointer to the created Context */ CVML_SDK_EXPORT amd::cvml::Context* CreateContext( - CvmlLogger::LogLevels log_level = CvmlLogger::LogLevels::kINFO, CvmlLogger* logger = nullptr); + Logger::LogLevels log_level = Logger::LogLevels::kINFO, Logger* logger = nullptr); } // namespace cvml } // namespace amd diff --git a/example/Ryzen-AI-Library/include/cvml-depth-estimation.h b/example/Ryzen-AI-Library/include/cvml-depth-estimation.h index 0ce6257c..ba93a2b9 100644 --- a/example/Ryzen-AI-Library/include/cvml-depth-estimation.h +++ b/example/Ryzen-AI-Library/include/cvml-depth-estimation.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (C) 2022-2024 Advanced Micro Devices, Inc. All rights reserved. */ #ifndef EDGEML_FEATURES_DEPTH_ESTIMATION_INCLUDE_CVML_DEPTH_ESTIMATION_H_ @@ -14,39 +14,68 @@ namespace amd { namespace cvml { /** - * Interface class for the depth estimation feature. + * Depth Estimation feature class. + * + * Based on the provided images, the feature calculates a relative depth map + * for each invocation of the \a GenerateDepthMap() function. Appropriate + * resize and normalization is done during pre/post processing by the + * \a GenerateDepthMap() function to generate a depth map for each frame. + * + * Example + * + * // create Ryzen AI context + * auto context = amd::cvml::CreateContext(); + * + * // create depth estimation feature + * amd::cvml::DepthEstimation feature(context); + * + * // iterate over input frames + * for (auto frame ... ) { + * // encapsulate input image + * amd::cvml::Image input( ... ); + * + * // encapsulate output image + * amd::cvml::Image output( ... ); + * + * // generate depth map + * feature.GenerateDepthMap(input, &output); + * } */ class CVML_SDK_EXPORT DepthEstimation { AMD_CVML_INTERFACE(DepthEstimation); public: - // Depth Estimation model types - enum class DepthModelType { Fast, Precise }; - /** - * Constructor + * Constructor for the Depth Estimation feature. * * @param context Pointer to CVML SDK context - * @param model_type Whether to prefer Fast or Precise depth estimation */ - explicit DepthEstimation(Context* context, DepthModelType model_type = DepthModelType::Fast); + explicit DepthEstimation(Context* context); /** - * Generate depth map from an image + * Generate depth map from an image. + * + * This function throws exceptions on errors. * - * @param input A reference to the Image input; format: rgb-interleaved-uint8_t - * @param output a pointer to the Image output; format: NCHW grayscale-float32 - * @return true, if inference output is assigned to output - * \n data from image is valid as long as the feature has not been destroyed + * Each call of this function returns a depth map of + * floating point values representing the relative depth of the pixels + * corresponding to the width/height of the uncropped image frame. + * + * @param input Reference to the Image input + * @param output Pointer to the Image output as a floating point grayscale buffer + * @return true if the output Image has been populated with inference information */ bool GenerateDepthMap(const Image& input, Image* output); /** - * Set the image type of the depth map output data/ + * Set the image type of the depth map output data. + * + * This function throws exceptions on errors. * * @param t The desired ImageType of the depth map output data - * \n valid values are: kGrayScaleFloat16, kGrayScaleFloat32. + * \n Valid values are: kGrayScaleFloat16, kGrayScaleFloat32. */ + [[deprecated("Output type is determined by the provided output image buffer")]] void SetOutputType(ImageType t); /** @@ -58,7 +87,7 @@ class CVML_SDK_EXPORT DepthEstimation { protected: class Impl; - Impl* impl_; + Impl* impl_; ///< Implementation of depth estimation interface. }; } // namespace cvml diff --git a/example/Ryzen-AI-Library/include/cvml-image.h b/example/Ryzen-AI-Library/include/cvml-image.h index fb7281d4..aab54d77 100644 --- a/example/Ryzen-AI-Library/include/cvml-image.h +++ b/example/Ryzen-AI-Library/include/cvml-image.h @@ -10,7 +10,6 @@ #include "cvml-types.h" namespace amd { - namespace cvml { class Context; @@ -38,10 +37,11 @@ class CVML_SDK_EXPORT Image { kYUV420p, kYUYV422, kP010, + kUNDEFINED = -1 }; /** - * An enumeration of image data types + * Defines supported image data types. */ enum DataType { kUint8, @@ -54,7 +54,7 @@ class CVML_SDK_EXPORT Image { }; /** - * A valid set of flags used to describe the image + * A valid set of flags used to describe the image. */ enum Flags { /** @@ -83,19 +83,20 @@ class CVML_SDK_EXPORT Image { }; /** - * Construct a CVML Image class object - * @param format: Image format - * @param data_type: Image data type - * @param width: The pixel width of the image - * @param height: The pixel height of the image - * @param buffer: (optional): A pointer to the image data. + * Initializing constructor for image objects. + * + * @param format Image format + * @param data_type Image data type + * @param width The pixel width of the image + * @param height The pixel height of the image + * @param buffer (optional) A pointer to the image data. * If buffer is null, the data will be allocated by the CVML context specified via Map() * If buffer not null: * - If Flags::kDeviceMemoryImport is not specified, the buffer is expected to be a host buffer. * - If Flags::kDeviceMemoryImport is specified, the buffer shall point to a HANDLE/fd * to a device local memory (for example, vulkan device local memory). - * @param stride: (optional) if stride is not specified, image will be stored continuously. - * @param flags: (optional): bit mask of Flags specifying the valid usage of the image. + * @param stride (optional) if stride is not specified, image will be stored continuously. + * @param flags (optional): bit mask of Flags specifying the valid usage of the image. * See Image::Flags for more information. Defaults to both source and target. * If not specified, the default value is Flags::kSource | Flags::kTarget */ @@ -119,28 +120,34 @@ class CVML_SDK_EXPORT Image { [[deprecated("Use GetFormat()/GetDataType().")]] ImageType GetImageType() const; /** - * @deprecated - * @return address fo the CVML Image buffer + * Get CPU pointer to the buffer. + * If the Image object was constructed without an existing buffer pointer, + * a new CPU buffer is allocated to back the object and returned by this + * function. + * @return Underlying CPU pointer for the Image buffer object */ uint8_t* GetBuffer() const; /** - * Map CVML Image buffer using the specified CVML context - * @param context: CVML context to be associated with the image - * @param flags: flags for the operation - * @return address of the mapped CVML Image buffer + * @deprecated + * Map CVML Image buffer using the specified CVML context. + * This function is deprecated. Use GetBuffer() instead. + * + * @param context CVML context to be associated with the image + * @param flags Flags for the operation + * @return Address of the mapped CVML Image buffer */ uint8_t* Map(Context* context, uint32_t flags = 0); /** * Get the width of the image. - * @return: The width of the image. + * @return The width of the image. */ uint32_t GetWidth() const; /** * Get the height of the image. - * @return: The height of the image. + * @return The height of the image. */ uint32_t GetHeight() const; @@ -150,6 +157,12 @@ class CVML_SDK_EXPORT Image { */ Format GetFormat() const; + /** + * Get the stride of the image. + * @return The stride (bytes per row) that the image was created with. + */ + uint32_t GetStride() const; + /** * Get data type for this image. * @return The data type that the image was created with. @@ -158,6 +171,7 @@ class CVML_SDK_EXPORT Image { /** * Get the usage flag bit mask for this image. + * * @return The bit mask of flags that the image was created with. */ uint32_t GetFlags() const; @@ -166,10 +180,11 @@ class CVML_SDK_EXPORT Image { * Export the image so that it can be imported in a different device context * (for example, vulkan context). To make an image exportable, the image must * be created with Flags::kExport in constructor. - * @param handle: pointer to a handle the image wil be exported to. + * + * @param handle Pointer to a handle the image wil be exported to. * For windows, the pointer shall point to windows HANDLE struct. * For linux, the pointer shall point to file desriptor (int). - * @return Returns true on success, false on failure. + * @return true on success, false on failure. */ bool Export(void* handle); @@ -180,7 +195,7 @@ class CVML_SDK_EXPORT Image { Image& operator=(Image&&) noexcept = delete; class Impl; - Impl* impl_; + Impl* impl_; ///< Implementation of Image interface. }; } // namespace cvml diff --git a/example/Ryzen-AI-Library/include/cvml-logger.h b/example/Ryzen-AI-Library/include/cvml-logger.h index 9f6a9267..ab3421f4 100644 --- a/example/Ryzen-AI-Library/include/cvml-logger.h +++ b/example/Ryzen-AI-Library/include/cvml-logger.h @@ -1,33 +1,36 @@ /* - * Copyright (C) 2021-2023 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (C) 2021-2024 Advanced Micro Devices, Inc. All rights reserved. */ #ifndef EDGEML_FEATURES_COMMON_FRAMEWORK_PUBLIC_INCLUDE_CVML_LOGGER_H_ #define EDGEML_FEATURES_COMMON_FRAMEWORK_PUBLIC_INCLUDE_CVML_LOGGER_H_ -#include #include #include "cvml-api-common.h" -using std::time_t; - namespace amd { namespace cvml { /** - * Interface to handle logging in cvml sdk + * Base class for capturing log messages from the SDK. + * + * To customize the target of log messages from the SDK, create a new C++ + * class derived from amd::cvml::Logger and implement its \a LogStr member + * function to direct formatted log messages to the target of choice. For + * example, a derived Logger class may choose to capture all log messages + * to a file on the file system or send them to another process or device. */ -class CVML_SDK_EXPORT CvmlLogger { - AMD_CVML_INTERFACE(CvmlLogger); +class CVML_SDK_EXPORT Logger { + AMD_CVML_INTERFACE(Logger); public: /** * Log levels to set the log output verbosity. * Logger will print all the log messages if the log level is greater than - * or equal to the level which is already set + * or equal to the level which is already set. */ - enum class LogLevels { + enum LogLevels { kVERBOSE = 0, ///< To print all types of log messages kDEBUG = 1, ///< To print debug type messages and the levels above kDEBUG kINFO = 2, ///< To print information type messages and the levels above kINFO @@ -40,16 +43,16 @@ class CVML_SDK_EXPORT CvmlLogger { /** * Set the required log level * - * @param level A valid value from CvmlLogger::LogLevels + * @param level A valid value from Logger::LogLevels */ - void SetLogLevel(CvmlLogger::LogLevels level) { level_ = level; } + void SetLogLevel(Logger::LogLevels level) { level_ = level; } /** * Get the log level * - * @return level A valid value from CvmlLogger::LogLevels + * @return level A valid value from Logger::LogLevels */ - CvmlLogger::LogLevels GetLogLevel() { return level_; } + Logger::LogLevels GetLogLevel() { return level_; } /** * Write an entry into the log with a std::string message as input @@ -57,7 +60,7 @@ class CVML_SDK_EXPORT CvmlLogger { * @param log_level Type of the log message * @param msg Message of std::string type that needs to be logged */ - void Log(amd::cvml::CvmlLogger::LogLevels log_level, const std::string& msg); + void Log(amd::cvml::Logger::LogLevels log_level, const std::string& msg); /** * Write an entry into the log with a C type string as input @@ -65,22 +68,29 @@ class CVML_SDK_EXPORT CvmlLogger { * @param log_level Type of the log message * @param msg C type string message that needs to be logged */ - void Log(amd::cvml::CvmlLogger::LogLevels log_level, const char* msg); + void Log(amd::cvml::Logger::LogLevels log_level, const char* msg); /** - * Print the actual log message.Implemented by the respective child class + * Output the actual log message. * - * @param msg C type string message that needs to be logged + * This capability is must be implemented by a derived class. + * + * @param msg C type string message to be logged */ virtual void LogStr(const char* msg) = 0; protected: - CvmlLogger::LogLevels level_ = CvmlLogger::LogLevels::kINFO; + /// Currently configured log level + Logger::LogLevels level_ = Logger::LogLevels::kINFO; }; // \deprecated -// Below typedef is retained for backward compatibility -typedef CvmlLogger ICvmlLogger; +// This definition is retained for backward compatibility only. +using ICvmlLogger = Logger; + +// \deprecated +// This definition is retained for backward compatibility only. +using CvmlLogger = Logger; } // namespace cvml } // namespace amd diff --git a/example/Ryzen-AI-Library/include/cvml-types.h b/example/Ryzen-AI-Library/include/cvml-types.h index 7d7c6c6c..dd6366d9 100644 --- a/example/Ryzen-AI-Library/include/cvml-types.h +++ b/example/Ryzen-AI-Library/include/cvml-types.h @@ -1,5 +1,5 @@ /*! - * Copyright (C) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (C) 2022-2024 Advanced Micro Devices, Inc. All rights reserved. * * @file * @@ -57,7 +57,7 @@ struct CVML_SDK_EXPORT Rect { * * @param x X cordinate of top left corner * @param y Y cordinate of top left corner - * @param wdith Rectange width + * @param width Rectange width * @param height Rectange height */ Rect(_Tp x, _Tp y, _Tp width, _Tp height) : x_(x), y_(y), width_(width), height_(height) {} @@ -75,17 +75,13 @@ struct CVML_SDK_EXPORT Rect { _Tp height_; }; -// explicitly exporting template definition -template struct CVML_SDK_EXPORT Rect; -template struct CVML_SDK_EXPORT Rect; -template struct CVML_SDK_EXPORT Rect; - typedef Rect Rect_i; typedef Rect Rect_f; typedef Rect Rect_d; +typedef Rect Rect_u; /** - * Point with integer values + * Structure for 2-dimensional Point values. */ template struct CVML_SDK_EXPORT Point { @@ -109,17 +105,12 @@ struct CVML_SDK_EXPORT Point { _Tp y_; }; -// explicitly exporting template definition -template struct CVML_SDK_EXPORT Point; -template struct CVML_SDK_EXPORT Point; -template struct CVML_SDK_EXPORT Point; - typedef Point Point2i; typedef Point Point2f; typedef Point Point2d; /** - * Point with 3D values + * Structure for 3-dimensional Point values. */ template struct CVML_SDK_EXPORT Point3 { @@ -147,42 +138,36 @@ struct CVML_SDK_EXPORT Point3 { _Tp z_; }; -// explicitly exporting template definition -template struct CVML_SDK_EXPORT Point3; -template struct CVML_SDK_EXPORT Point3; -template struct CVML_SDK_EXPORT Point3; - typedef Point3 Point3i; typedef Point3 Point3f; typedef Point3 Point3d; /** - * Struct for bounding boxes at angles + * Structure for quadrilaterals at arbitrary angles. */ template -struct CVML_SDK_EXPORT BoundingQuad { +struct CVML_SDK_EXPORT Quad { /** * Default constructor. */ - BoundingQuad() = default; + Quad() = default; /** - * Initializing constructor using Points + * Initializing constructor using Points. * * @param top_left coordinates of top left point * @param top_right coordinates of top right point * @param bottom_left coordinates of bottom left point * @param bottom_right coordinates of bottom right point */ - BoundingQuad(Point<_Tp> top_left, Point<_Tp> top_right, Point<_Tp> bottom_left, - Point<_Tp> bottom_right) + Quad(Point<_Tp> top_left, Point<_Tp> top_right, Point<_Tp> bottom_left, Point<_Tp> bottom_right) : top_left_(top_left), top_right_(top_right), bottom_left_(bottom_left), bottom_right_(bottom_right) {} /** - * Initializing contrustor using explict x and y values + * Initializing contructor using explict x and y values. * * @param x_tl top left x value * @param y_tl top left y value @@ -193,7 +178,7 @@ struct CVML_SDK_EXPORT BoundingQuad { * @param x_br bottom right x value * @param y_br bottom right y value */ - BoundingQuad(_Tp x_tl, _Tp y_tl, _Tp x_tr, _Tp y_tr, _Tp x_bl, _Tp y_bl, _Tp x_br, _Tp y_br) + Quad(_Tp x_tl, _Tp y_tl, _Tp x_tr, _Tp y_tr, _Tp x_bl, _Tp y_bl, _Tp x_br, _Tp y_br) : top_left_(Point<_Tp>(x_tl, y_tl)), top_right_(Point<_Tp>(x_tr, y_tr)), bottom_left_(Point<_Tp>(x_bl, y_bl)), @@ -209,37 +194,41 @@ struct CVML_SDK_EXPORT BoundingQuad { Point<_Tp> bottom_right_; }; -// explicitly exporting template definition -template struct CVML_SDK_EXPORT BoundingQuad; -template struct CVML_SDK_EXPORT BoundingQuad; -template struct CVML_SDK_EXPORT BoundingQuad; +/// Alias to older 'BoundingQuad' template definition. +template +using BoundingQuad = Quad; -typedef BoundingQuad BoundingQuadi; -typedef BoundingQuad BoundingQuadf; -typedef BoundingQuad BoundingQuadd; +typedef Quad Quadi; +typedef Quad Quadf; +typedef Quad Quadd; /** - * An Array class that can contain instances of classes T + * Fixed size array template class for multiple instances of class T. */ template class CVML_SDK_EXPORT Array { public: /** - * Default constructor - **/ + * Default constructor. + */ Array() : v_(nullptr), size_(0) {} /** - * Move constructor - **/ + * Move constructor. + * + * @param other Source array + */ Array(Array&& other) noexcept : v_(std::move(other.v_)), size_(std::exchange(other.size_, 0)) { other.v_ = nullptr; other.size_ = 0; } /** - * Move assignment - **/ + * Move assignment operator. + * + * @param other Source array + * @return Reference to updated object + */ Array& operator=(Array&& other) noexcept { if (this != &other) { if (v_) delete[] v_; @@ -252,24 +241,34 @@ class CVML_SDK_EXPORT Array { } /** - * Constructor that initilize required number of classes T - * throw an exception in case of error - **/ + * Constructor that initilize required number of classes T. + * + * This function throws exceptions on errors. + * + * @param size Desired size of the array + */ explicit Array(size_t size) : v_{new T[size]}, size_(size) {} /** - * Copy constructor - * throw an exception in case of error - **/ + * Copy constructor. + * + * This function throws exceptions on errors. + * + * @param other Source array + */ Array(const Array& other) : v_{new T[other.size()]}, size_(other.size()) { for (size_t i = 0; i < other.size(); i++) // copy elements v_[i] = other[i]; } /** - * assign operator - * throw an exception in case of error - **/ + * Assignment operator. + * + * This operator throws exceptions on errors. + * + * @param other Source array + * @return Reference to updated object + */ Array& operator=(const Array& other) { if (&other != this) { T* p = new T[other.size()]; @@ -284,7 +283,7 @@ class CVML_SDK_EXPORT Array { /** * Read only operator[] for const objects. * - * Throws exceptions on out-of-range subscript. + * This operator throws exceptions on out-of-range subscripts. * * @param i Index to array * @return Array value @@ -297,7 +296,7 @@ class CVML_SDK_EXPORT Array { /** * operator[] for subscript access. * - * Throws exceptions on out-of-range subscript. + * This operator throws exceptions on out-of-range subscripts. * * @param i Index to array * @return Reference to array entry @@ -308,7 +307,9 @@ class CVML_SDK_EXPORT Array { } /** - * Returns the size of the array + * Returns the size of the array. + * + * @return Current size of the array */ size_t size() const { return size_; } @@ -325,14 +326,8 @@ class CVML_SDK_EXPORT Array { size_t size_; ///< Current size of the array }; -/// explicitly exporting template definition -template class CVML_SDK_EXPORT Array; -template class CVML_SDK_EXPORT Array; -template class CVML_SDK_EXPORT Array; -template class CVML_SDK_EXPORT Array; - /** - * This structure represents face location and landmarks for a single person. + * Structure representing face location and landmarks for a single person. */ struct CVML_SDK_EXPORT Face { /// Constructor @@ -351,13 +346,16 @@ struct CVML_SDK_EXPORT Face { /// Facial landmarks are used to localize and represent important regions of the face, such as: /// mouth, eyes, eyebrows, nose Array landmarks_; + + /// Get bounding box + Rect_i GetROI() { return face_; } }; /// explicitly exporting template definition template class CVML_SDK_EXPORT Array; /** - * This structure represents the landmarks and bounding box for a single person. + * Structure representing landmarks and bounding box for a single person. */ struct Person { /// Bounding box for this person @@ -371,6 +369,9 @@ struct Person { /// Detected landmark scores for this person Array landmark_scores_; + + /// Get bounding box + Rect_i GetROI() { return person_; } }; } // namespace cvml diff --git a/example/Ryzen-AI-Library/include/float16.hpp b/example/Ryzen-AI-Library/include/float16.hpp deleted file mode 100644 index ae5313d2..00000000 --- a/example/Ryzen-AI-Library/include/float16.hpp +++ /dev/null @@ -1,289 +0,0 @@ -/** - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright Shilei Tian - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef HALF_PRECISION_LIBRARY_H -#define HALF_PRECISION_LIBRARY_H - -#pragma once - -#include -#include - -#define HLF_EPSILON 1e-3 -#define HLF_MAX 65504 -#define HLF_MIN 6.1035e-05 - -struct float16 { - float16() = default; - - float16(const float16&) = default; - - float16(float16&&) = default; - - explicit float16(const float& rhs); - - float16& operator=(const float& rhs); - - // Operator +=, -=, *=, /= -#define BINARY_ARITHMETIC_OPERATOR(OP) \ - float16& operator OP##=(const float16& rhs) { \ - *this = operator float() OP static_cast(rhs); \ - return *this; \ - } - - BINARY_ARITHMETIC_OPERATOR(+) - - BINARY_ARITHMETIC_OPERATOR(-) - - BINARY_ARITHMETIC_OPERATOR(*) - - BINARY_ARITHMETIC_OPERATOR(/) - -#undef BINARY_ARITHMETIC_OPERATOR - - // Operator ++, -- - float16& operator++() { - *this += float16(1); - return *this; - } - - float16 operator++(int) { - float16 ret(*this); - operator++(); - return ret; - } - - float16& operator--() { - *this -= float16(1); - return *this; - } - - float16 operator--(int) { - float16 ret(*this); - operator--(); - return ret; - } - - // Operator float - operator float() const; - - template friend - struct std::hash; - -private: - uint16_t buf; -}; - -namespace std { - - template<> - struct hash { - std::size_t operator()(const float16& key) const { - return hash()(key.buf); - } - }; - -} - -#endif diff --git a/example/Ryzen-AI-Library/samples/common-sample-utils/include/common-sample-utils.h b/example/Ryzen-AI-Library/samples/common-sample-utils/include/common-sample-utils.h index 524bfd12..c6d04067 100644 --- a/example/Ryzen-AI-Library/samples/common-sample-utils/include/common-sample-utils.h +++ b/example/Ryzen-AI-Library/samples/common-sample-utils/include/common-sample-utils.h @@ -1,10 +1,11 @@ /* - * Copyright (C) 2021-2023 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (C) 2021-2024 Advanced Micro Devices, Inc. All rights reserved. */ #ifndef SAMPLES_COMMON_SAMPLE_UTILS_INCLUDE_COMMON_SAMPLE_UTILS_H_ #define SAMPLES_COMMON_SAMPLE_UTILS_INCLUDE_COMMON_SAMPLE_UTILS_H_ +#include #include #include @@ -24,6 +25,16 @@ typedef struct CamRes { int height; } CamRes; +/** + * Returns list of files in folder with specified file extensions (excludes subdirectories) + * @param folder path to folder + * @param supported_exts accepted file extensions + * + * @return list of filenames + */ +std::vector GetListOfFilesInDir(const std::filesystem::path& folder, + std::vector supported_exts); + /** * Sets up the camera with the specified camera id according to the preferred resolution list * @param camera_index: camera to open @@ -79,6 +90,16 @@ class RunFeatureClass { // Called for specific run feature code in each feature virtual cv::Mat Feature(const cv::Mat& input_frame_rgb) { return input_frame_rgb; } + /** + * The input extension is used to establish streaming mode. If a camera index is + * passed then the streaming mode is set to be online. + * + * @param src_path: Uses the same semantics as RunFeature's input. + * Input file name, or "" if camera desired + * @param context: CVML context being used in the sample + */ + void SetContextStreamingModeBySrc(amd::cvml::Context* context, const std::string& src_path); + /** * Opens video source and executes the feature. * @@ -104,6 +125,7 @@ class RunFeatureClass { std::vector* supported_res = nullptr); protected: + bool is_camera_; ///< Whether input is camera cv::VideoCapture video_input_; ///< OpenCV video capture device cv::VideoWriter video_output_; ///< Video writer for main output @@ -168,21 +190,34 @@ bool ParseArguments(int argc, char** const argv, std::string* input_str, std::st */ void PutRectangle(cv::Mat* image, const cv::Rect& rect, const cv::Scalar& color); +/** + * Specify extra flags for PutText's override_x parameter. + */ +enum PUTTEXT { + /// Specify X center for PutText() + OVERRIDE_CENTER = 0x0, + + /// Specify absolute X offset for PutText() + OVERRIDE_ABSOLUTE = 1 << (sizeof(int) * 8 - 2) +}; + /** * Render text strings into the frame. + * To center text around a point, set override_x = PUTTEXT * * @param image Target image buffer * @param display_text String of text to render * @param row Zero-based row number to render text, assuming text console * @param text_color Color of text to render - * @param center_x If non-zero, text will be centered around this point + * @param override_x If non-zero, change behavior based on PUTTEXT flags * @param text_height If non-zero, specifies text height as a percentage of the frame height * @param fill_background Whether or not an opaque background should be added * @param background_color Color of background, if specified + * @return End X value of the rendered text */ -void PutText(cv::Mat* image, const std::string& display_text, const int text_row, - cv::Scalar text_color, const int center_x, const int text_height, - const bool fill_background = false, cv::Scalar background_color = cv::Scalar(0, 0, 0)); +int PutText(cv::Mat* image, const std::string& display_text, const int text_row, + cv::Scalar text_color, const int override_x, const int text_height, + const bool fill_background = false, cv::Scalar background_color = cv::Scalar(0, 0, 0)); } // namespace utils } // namespace sample diff --git a/example/Ryzen-AI-Library/samples/common-sample-utils/src/common-sample-camera-utils.cpp b/example/Ryzen-AI-Library/samples/common-sample-utils/src/common-sample-camera-utils.cpp new file mode 100644 index 00000000..12abfff7 --- /dev/null +++ b/example/Ryzen-AI-Library/samples/common-sample-utils/src/common-sample-camera-utils.cpp @@ -0,0 +1,236 @@ +/* + * Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved. + */ + +#include "common-sample-utils.h" + +#ifdef _WIN32 +#include +#include +#include +#include +#include + +#pragma comment(lib, "Mfplat.lib") +#pragma comment(lib, "Mf.lib") +#pragma comment(lib, "mfreadwrite.lib") +#pragma comment(lib, "mfuuid.lib") +#endif + +using amd::cvml::sample::utils::CamRes; + +namespace amd { +namespace cvml { +namespace sample { +namespace utils { + +#ifdef _WIN32 +/** + * camera supported media type + */ +struct MediaTypeInfo { + GUID type; /// Image type + UINT32 width; + UINT32 height; /// Resolution + UINT32 fps; /// Frame rate +}; + +/** + * Helper function to enumerate camera supported image type and resolution. + * + * @param camera_index: selected camera index + * @return Enumeration of image type and resolution + */ +std::vector EnumerateCameraImageTypes(int camera_index) { + std::vector formats; + + // Initialize Media Foundation + HRESULT hr = MFStartup(MF_VERSION); + if (FAILED(hr)) { + std::cout << "Failed to initialize Media Foundation" << std::endl; + return formats; + } + + // Enumerate video capture devices + IMFAttributes* pAttributes = nullptr; + IMFActivate** ppDevices = nullptr; + UINT32 devicecount = 0; + + hr = MFCreateAttributes(&pAttributes, 1); + if (FAILED(hr) || pAttributes == nullptr) { + std::cerr << "Failed to create source resolver" << std::endl; + MFShutdown(); + return formats; + } + + hr = pAttributes->SetGUID(MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE, + MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE_VIDCAP_GUID); + if (FAILED(hr)) { + std::cerr << "Failed to set device capture attribute" << std::endl; + pAttributes->Release(); + MFShutdown(); + return formats; + } + + hr = MFEnumDeviceSources(pAttributes, &ppDevices, &devicecount); + pAttributes->Release(); + if (FAILED(hr) || ppDevices == nullptr || devicecount == 0 || + camera_index >= static_cast(devicecount)) { + std::cerr << "No valid video capture devices found" << std::endl; + if (ppDevices) { + for (UINT32 i = 0; i < devicecount; i++) { + ppDevices[i]->Release(); + } + CoTaskMemFree(ppDevices); + } + MFShutdown(); + return formats; + } + + // Activate the selected device + IMFMediaSource* pMediaSource = nullptr; + hr = ppDevices[camera_index]->ActivateObject(IID_PPV_ARGS(&pMediaSource)); + for (UINT32 i = 0; i < devicecount; i++) { + ppDevices[i]->Release(); + } + CoTaskMemFree(ppDevices); + if (FAILED(hr) || pMediaSource == nullptr) { + std::cerr << "Failed to activate media source" << std::endl; + MFShutdown(); + return formats; + } + + IMFSourceReader* pSourceReader = nullptr; + hr = MFCreateSourceReaderFromMediaSource(pMediaSource, nullptr, &pSourceReader); + pMediaSource->Release(); + if (FAILED(hr) || pSourceReader == nullptr) { + std::cerr << "Failed to create source reader" << std::endl; + MFShutdown(); + return formats; + } + + // Enumerate available formats + DWORD dwStreamIndex = 0, mediaTypeIndex = 0; + while (true) { + IMFMediaType* pType = nullptr; + hr = pSourceReader->GetNativeMediaType(dwStreamIndex, mediaTypeIndex, &pType); + if (hr == MF_E_NO_MORE_TYPES) { + mediaTypeIndex = 0; + dwStreamIndex++; + hr = pSourceReader->GetNativeMediaType(dwStreamIndex, mediaTypeIndex, &pType); + if (hr == MF_E_INVALIDREQUEST || hr == MF_E_NO_MORE_TYPES) break; + } + if (FAILED(hr)) break; + GUID subtype; + hr = pType->GetGUID(MF_MT_SUBTYPE, &subtype); + if (SUCCEEDED(hr)) { + // Get the resolution + UINT32 width = 0, height = 0; + hr = MFGetAttributeSize(pType, MF_MT_FRAME_SIZE, &width, &height); + if (SUCCEEDED(hr)) { + // Get the frame rate + UINT32 numerator = 0, denominator = 0; + hr = MFGetAttributeRatio(pType, MF_MT_FRAME_RATE, &numerator, &denominator); + if (SUCCEEDED(hr) && denominator != 0) { + UINT32 fps = numerator / denominator; + if (fps <= 30) { + formats.push_back({subtype, width, height, fps}); + } + } + } + } + pType->Release(); + mediaTypeIndex++; + } + + pSourceReader->Release(); + MFShutdown(); + + return formats; +} +#endif + +bool SetupCamera(int camera_index, const std::vector& res_list, cv::VideoCapture* camera) { + // list certain API preferences before CAP_ANY to try them first + // regardless of opencv's ordering + static const int camera_api_preference[] = { +#ifdef _WIN32 + cv::CAP_DSHOW, cv::CAP_MSMF, +#endif + cv::CAP_ANY}; + + if (camera == nullptr) { + return false; + } + +#ifdef _WIN32 + std::vector camera_format = EnumerateCameraImageTypes(camera_index); +#endif + + for (auto api : camera_api_preference) { + try { + camera->open(camera_index, api); + if (camera->isOpened()) { + break; + } + } catch (std::exception& e) { + std::cout << "SetupCamera exception(" << api << "): " << e.what() << std::endl; + } + } + + if (camera->isOpened() != true) { + std::cout << "Failed to open camera device with id:" << camera_index << std::endl; + return false; + } + + bool result = false; + + for (auto res : res_list) { + camera->set(cv::CAP_PROP_FRAME_WIDTH, res.width); + camera->set(cv::CAP_PROP_FRAME_HEIGHT, res.height); + camera->set(cv::CAP_PROP_FPS, 30); + auto w = camera->get(cv::CAP_PROP_FRAME_WIDTH); + auto h = camera->get(cv::CAP_PROP_FRAME_HEIGHT); + auto fps = camera->get(cv::CAP_PROP_FPS); + if (w != res.width || h != res.height) { + std::cout << "Camera doesn't support " << res.width << "x" << res.height << std::endl; + } else { + std::cout << "Camera enabled at " << w << "x" << h << "@" << fps << std::endl; + result = true; + break; + } + } + if (!result) { + std::cout << "No supported resolution for camera." << std::endl; + camera->release(); + return result; + } + +#ifdef _WIN32 + if (std::any_of(camera_format.cbegin(), camera_format.cend(), + [&camera](const MediaTypeInfo& format) { + return format.type == MFVideoFormat_MJPG && + format.width == camera->get(cv::CAP_PROP_FRAME_WIDTH) && + format.height == camera->get(cv::CAP_PROP_FRAME_HEIGHT) && + camera->get(cv::CAP_PROP_FRAME_HEIGHT) >= 1080 && format.fps >= 30; + })) { + camera->set(cv::CAP_PROP_FOURCC, cv::VideoWriter::fourcc('M', 'J', 'P', 'G')); + std::cout << "Image type set to MJPG!" << std::endl; + if (!camera->isOpened()) { + std::cout << "Set MJPG format failed!" << std::endl; + result = false; + } + } + std::cout << "Selected " << camera->get(cv::CAP_PROP_FRAME_WIDTH) << "x" + << camera->get(cv::CAP_PROP_FRAME_HEIGHT) << "@" << camera->get(cv::CAP_PROP_FPS) + << std::endl; + +#endif + + return result; +} + +} // namespace utils +} // namespace sample +} // namespace cvml +} // namespace amd diff --git a/example/Ryzen-AI-Library/samples/common-sample-utils/src/common-sample-utils.cpp b/example/Ryzen-AI-Library/samples/common-sample-utils/src/common-sample-utils.cpp index 20a0bf81..2f870ee6 100644 --- a/example/Ryzen-AI-Library/samples/common-sample-utils/src/common-sample-utils.cpp +++ b/example/Ryzen-AI-Library/samples/common-sample-utils/src/common-sample-utils.cpp @@ -7,6 +7,7 @@ #include #include #include +#include using amd::cvml::sample::utils::CamRes; @@ -15,57 +16,6 @@ namespace cvml { namespace sample { namespace utils { -bool SetupCamera(int camera_index, const std::vector& res_list, cv::VideoCapture* camera) { - // list certain API preferences before CAP_ANY to try them first - // regardless of opencv's ordering - static const int camera_api_preference[] = { -#ifdef _WIN32 - cv::CAP_DSHOW, cv::CAP_MSMF, -#endif - cv::CAP_ANY}; - - if (camera == nullptr) { - return false; - } - - for (auto api : camera_api_preference) { - try { - camera->open(camera_index, api); - if (camera->isOpened()) { - break; - } - } catch (std::exception& e) { - std::cout << "SetupCamera exception(" << api << "): " << e.what() << std::endl; - } - } - - if (camera->isOpened() != true) { - std::cout << "Failed to open camera device with id:" << camera_index << std::endl; - return false; - } - - bool result = false; - - for (auto res : res_list) { - camera->set(cv::CAP_PROP_FRAME_WIDTH, res.width); - camera->set(cv::CAP_PROP_FRAME_HEIGHT, res.height); - auto w = camera->get(cv::CAP_PROP_FRAME_WIDTH); - auto h = camera->get(cv::CAP_PROP_FRAME_HEIGHT); - if (w != res.width || h != res.height) { - std::cout << "Camera doesn't support " << res.width << "x" << res.height << std::endl; - } else { - std::cout << "Camera enabled at " << w << "x" << h << std::endl; - result = true; - break; - } - } - if (!result) { - std::cout << "No supported resolution for camera." << std::endl; - camera->release(); - } - return result; -} - /** * Helper function to determine output display scale factor. * @@ -109,6 +59,25 @@ bool RunFeatureClass::GetSingleVideoFrame(uint32_t frame_id) { return true; } +void RunFeatureClass::SetContextStreamingModeBySrc(amd::cvml::Context* context, + const std::string& src_path) { + // assume camera index if a number is provided + const std::string input_str = src_path.empty() ? "0" : src_path; + std::string ext = static_cast(input_str).extension().string(); + if (ext.length() == 0 && std::isdigit(input_str[0])) { + context->SetStreamingMode(amd::cvml::Context::StreamingMode::ONLINE_STREAMING); + } else { + // check if we can treat the input as an image + auto frame_rgb = cv::imread(input_str); + if (!frame_rgb.empty()) { + context->SetStreamingMode(amd::cvml::Context::StreamingMode::ONE_SHOT); + } else { + // assume the input is a video file + context->SetStreamingMode(amd::cvml::Context::StreamingMode::OFFLINE_STREAMING); + } + } +} + bool RunFeatureClass::RunFeatureStreaming() { bool user_exit = false; @@ -117,6 +86,7 @@ bool RunFeatureClass::RunFeatureStreaming() { // set FPS to be the same as the input device/file, or 30FPS if (video_input_.isOpened()) { stream_fps_ = video_input_.get(cv::CAP_PROP_FPS); + if (is_camera_) stream_fps_ = 30.0f; } // ms time for stream_fps_ @@ -124,7 +94,8 @@ bool RunFeatureClass::RunFeatureStreaming() { std::chrono::milliseconds(static_cast(1000 / stream_fps_)); // record start time - std::chrono::steady_clock::time_point start_time = std::chrono::steady_clock::now(); + // std::chrono::steady_clock::time_point start_time = std::chrono::steady_clock::now(); + std::chrono::steady_clock::time_point start_time; // // Iterate over frames @@ -132,6 +103,8 @@ bool RunFeatureClass::RunFeatureStreaming() { // so is good enough for a sample application. // for (frame_id = 1; GetSingleVideoFrame(frame_id - 1); ++frame_id) { + if (frame_id == 2) start_time = std::chrono::steady_clock::now(); + // Run feature and measure effective fps (execution time of a single feature call) std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now(); cv::Mat frame_out = Feature(frame_rgb_); @@ -239,7 +212,8 @@ bool RunFeatureClass::RunFeatureStreaming() { std::chrono::duration_cast(current_time - start_time); // extra sleep to simulate expected FPS - if (elapsed_time < test_fps_period_ * frame_id) { + if (elapsed_time < test_fps_period_ * (frame_id - 1)) { + // std::cout << "Sleep triggered, printout" << std::endl; std::this_thread::sleep_for(test_fps_period_ * frame_id - elapsed_time); } } @@ -279,7 +253,10 @@ void RunFeatureClass::RunFeature(const std::string& input, const std::string& ou std::vector* supported_res) { // attempt to open output file later if name specified open_output_file_ = output_file.size() > 0; - output_file_ = output_file; + if (open_output_file_) { + std::filesystem::path output_path(output_file); + output_file_ = std::filesystem::absolute(output_path).string(); + } output_window_name_ = window_title; // default frame rate @@ -297,11 +274,11 @@ void RunFeatureClass::RunFeature(const std::string& input, const std::string& ou std::string ext = static_cast(input_str).extension().string(); bool is_image{false}; bool is_video{false}; - bool is_camera{false}; + is_camera_ = false; // assume camera index if a number is provided if (ext.length() == 0 && std::isdigit(input_str[0])) { - is_camera = true; + is_camera_ = true; } else { // check if we can treat the input as an image frame_rgb_ = cv::imread(input_str); @@ -314,7 +291,7 @@ void RunFeatureClass::RunFeature(const std::string& input, const std::string& ou } } - if (is_camera) { + if (is_camera_) { // // Camera // @@ -354,6 +331,26 @@ void RunFeatureClass::RunFeature(const std::string& input, const std::string& ou } } +std::vector GetListOfFilesInDir(const std::filesystem::path& folder, + std::vector supported_exts) { + std::vector listOfFiles{}; + if (std::filesystem::exists(folder) && std::filesystem::is_directory(folder)) { + for (const std::filesystem::directory_entry& iter : + std::filesystem::directory_iterator(folder)) { + if (iter.is_regular_file()) { + std::string ext = iter.path().extension().string(); + if (std::find(supported_exts.begin(), supported_exts.end(), ext) != + supported_exts.end()) { // Add current file to list if extension is supported + listOfFiles.push_back(iter.path().string()); + } + } + } + } else { + std::cout << "Error accessing folder " << folder.string() << std::endl; + } + return listOfFiles; +} + std::string CreateFolderWithTimestamp() { std::string file_save_path = GetTimestamp(); namespace fs = std::filesystem; @@ -365,10 +362,16 @@ std::string CreateFolderWithTimestamp() { std::string GetTimestamp() { std::string timestamp{}; + std::stringstream mon_s, day_s, hour_s, min_s, sec_s; +#ifdef _WIN32 struct tm ltm; time_t now = time(0); localtime_s(<m, &now); - std::stringstream mon_s, day_s, hour_s, min_s, sec_s; +#else + time_t now = time(&now); + struct tm ltm; + localtime_r(&now, <m); +#endif mon_s << std::setw(2) << std::setfill('0') << (ltm.tm_mon + 1); day_s << std::setw(2) << std::setfill('0') << ltm.tm_mday; hour_s << std::setw(2) << std::setfill('0') << ltm.tm_hour; @@ -383,14 +386,10 @@ void GetPlatformInformation() { amd::cvml::SupportedPlatformInformation info{}; amd::cvml::Context::GetSupportedPlatformInformation(&info); - for (size_t i = 0; i < info.supported_platform_count; i++) { - std::cout << "supported APU devide-id: 0x" << std::hex << info.platform[i].device_id << std::dec - << std::endl; - std::cout << "required minimal-vulkan-driver-version: 0x" << std::hex - << info.platform[i].required_gpu_minimal_vulkan_driver_version << std::dec - << std::endl; + if (info.supported_platform_count > 0) { + std::cout << "Required minimam Vulkan driver version: 0x" << std::hex + << info.platform[0].required_gpu_minimal_vulkan_driver_version << std::endl; } - std::cout << "supported_platform_count=" << info.supported_platform_count << std::endl; } bool ParseArguments(int argc, char** const argv, std::string* input_str, std::string* output_file, @@ -415,7 +414,7 @@ bool ParseArguments(int argc, char** const argv, std::string* input_str, std::st (void)e; } if (arg_help == nullptr) { - std::cout << "Usage: " << app_name << ".exe" + std::cout << "Usage: " << app_name << " [-i input] [-o file]\n" " -i\tSpecify an input image/video file or camera device index\n" " -o\tSpecify output image/video file name\n"; @@ -469,9 +468,9 @@ void PutRectangle(cv::Mat* image, const cv::Rect& rect, const cv::Scalar& color) } } -void PutText(cv::Mat* image, const std::string& display_text, const int text_row, - cv::Scalar text_color, const int center_x, const int text_height, - const bool fill_background, cv::Scalar background_color) { +int PutText(cv::Mat* image, const std::string& display_text, const int text_row, + cv::Scalar text_color, const int override_x, const int text_height, + const bool fill_background, cv::Scalar background_color) { static int TEXT_HEIGHT = 30; // hard coded text height, because getTextSize isn't reliable static int TEXT_BOX_OFFSET = 5; // offset for background box static int TEXT_PADDING = 3; // space between rows of text @@ -480,7 +479,7 @@ void PutText(cv::Mat* image, const std::string& display_text, const int text_row if (image == nullptr || text_row < 0) { // silently return - return; + return -1; } double text_scale = TEXT_SCALE; @@ -496,7 +495,7 @@ void PutText(cv::Mat* image, const std::string& display_text, const int text_row } // cppcheck-suppress knownConditionTrueFalse - int text_font = text_scale > 1.0 ? cv::FONT_HERSHEY_COMPLEX : cv::FONT_HERSHEY_DUPLEX; + int text_font = text_scale > 1.0 ? cv::FONT_HERSHEY_DUPLEX : cv::FONT_HERSHEY_PLAIN; int text_box_offset = static_cast(TEXT_BOX_OFFSET * text_scale + 0.5); int text_padding = static_cast(TEXT_PADDING * text_scale + 0.5); int text_thickness = static_cast(TEXT_THICKNESS * text_scale); @@ -505,14 +504,18 @@ void PutText(cv::Mat* image, const std::string& display_text, const int text_row auto text_size = cv::getTextSize(display_text, text_font, text_scale, text_thickness, nullptr); // constant left starting point for english text - int origin_x = TEXT_PADDING; + int org_x = TEXT_PADDING; - // handle text centering - if (center_x != 0) { - origin_x += center_x - text_size.width / 2; + if (override_x > 0) { + if (override_x & PUTTEXT::OVERRIDE_ABSOLUTE) { + org_x = override_x & ~(PUTTEXT::OVERRIDE_ABSOLUTE); + } else { + // handle text centering + org_x = override_x - text_size.width / 2; + } } - cv::Point2i origin = cv::Point2i(origin_x, (text_h + text_padding) * (text_row + 1)); + cv::Point2i origin = cv::Point2i(org_x, (text_h + text_padding) * (text_row + 1)); if (fill_background) { // draw rectangle on frame for each text @@ -523,6 +526,7 @@ void PutText(cv::Mat* image, const std::string& display_text, const int text_row // actually display the text cv::putText(*image, display_text, origin, text_font, text_scale, text_color, text_thickness); + return org_x + text_size.width; } } // namespace utils diff --git a/example/Ryzen-AI-Library/samples/cvml-sample-depth-estimation/README.md b/example/Ryzen-AI-Library/samples/cvml-sample-depth-estimation/README.md index a2487ed7..b7f7e3a4 100644 --- a/example/Ryzen-AI-Library/samples/cvml-sample-depth-estimation/README.md +++ b/example/Ryzen-AI-Library/samples/cvml-sample-depth-estimation/README.md @@ -18,7 +18,7 @@ set PYTHONHOME=\envs\ set PYTHONPATH=\envs\ ``` -4. Move ``vaip_config.json`` from ``path\to\ryzen-ai-sw-1.0\voe-4.0-win_amd64\`` to ``path\to\Ryzen-AI-Library-Public-Release\windows`` +4. Move ``vaip_config.json`` from ``path\to\ryzen-ai-sw-1.0\voe-4.0-win_amd64\`` to ``path\to\Ryzen-AI-Library-Public-Release\windows\onnx\vai`` 5. Navigate to the samples directory and run depth-estimation application: ``` cvml-samples-depth-estimation.exe -i path\to\input -o path\to\output @@ -38,7 +38,7 @@ pip install numpy pip install voe-0.1.0-cp39-cp39-win_amd64.whl ``` -3. Move ``vaip_config.json`` from ``path\to\ryzen-ai-sw-1.0\voe-4.0-win_amd64\`` to ``path\to\Ryzen-AI-Library-Public-Release\windows`` +3. Move ``vaip_config.json`` from ``path\to\ryzen-ai-sw-1.0\voe-4.0-win_amd64\`` to ``path\to\Ryzen-AI-Library-Public-Release\windows\onnx\vai`` 4. Run RyzenAI sample diff --git a/example/Ryzen-AI-Library/samples/cvml-sample-depth-estimation/cvml-sample-depth-estimation.exe b/example/Ryzen-AI-Library/samples/cvml-sample-depth-estimation/cvml-sample-depth-estimation.exe index 0a5c98bc..97601de0 100644 Binary files a/example/Ryzen-AI-Library/samples/cvml-sample-depth-estimation/cvml-sample-depth-estimation.exe and b/example/Ryzen-AI-Library/samples/cvml-sample-depth-estimation/cvml-sample-depth-estimation.exe differ diff --git a/example/Ryzen-AI-Library/samples/cvml-sample-depth-estimation/main.cpp b/example/Ryzen-AI-Library/samples/cvml-sample-depth-estimation/main.cpp index d3adcef6..831eb86f 100644 --- a/example/Ryzen-AI-Library/samples/cvml-sample-depth-estimation/main.cpp +++ b/example/Ryzen-AI-Library/samples/cvml-sample-depth-estimation/main.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (C) 2022-2024 Advanced Micro Devices, Inc. All rights reserved. */ #include #include @@ -22,7 +22,6 @@ class DepthEstimationSample : public amd::cvml::sample::utils::RunFeatureClass { std::string input_str_{}; ///< frame source: image or video or camera std::string output_file_{}; ///< Output file path/name bool use_fp16_; ///< depth output type - amd::cvml::DepthEstimation::DepthModelType de_model_{}; ///< Depth model to use /** * Post process depth map for opencv visualization. @@ -64,7 +63,7 @@ cv::Mat DepthEstimationSample::Feature(const cv::Mat& frame_rgb) { depth_estimation_->GenerateDepthMap(input_frame_amd_image, &output_img); if (!depth_map_generated) { std::cout << "Failed to generate depth map" << std::endl; - throw std::exception("Failed to generate depth map!"); + throw std::runtime_error("Failed to generate depth map!"); } return DepthEstimationCvmlToOpenCV(&output_img); } @@ -77,7 +76,7 @@ cv::Mat DepthEstimationSample::DepthEstimationCvmlToOpenCV(const Image* depth_ma float* depth_map_or_p = reinterpret_cast(reinterpret_cast(depth_map->GetBuffer())); if (depth_map_or_p == nullptr) { - throw std::exception("Failed to get depth map data!"); + throw std::runtime_error("Failed to get depth map data!"); } if (use_fp16_) { cv::Mat depth_map_or_mat_raw2 = @@ -98,48 +97,6 @@ cv::Mat DepthEstimationSample::DepthEstimationCvmlToOpenCV(const Image* depth_ma return frame_out; } -void PrintHelpMessage() { - std::cout << "Usage: cvml-sample-depth-estimation.exe [-i input image/video] [-o output " - "image/video] [-m depth model] [-h]" - << std::endl; - std::cout << " -i\tSpecify an input image/video file or camera device index" << std::endl; - std::cout << " -o\tSpecify output image/video file name" << std::endl; - std::cout << " -m\tspecify depth estimation model. e.g. . Optional. Fast " - "is the default" - << std::endl; - std::cout << " -h\tshow usage" << std::endl; - - std::cout << "Example 1: cvml-sample-depth-estimation.exe -i image.jpg" << std::endl; - std::cout << "Example 2: cvml-sample-depth-estimation.exe -i image.jpg -m precise" << std::endl; -} - -bool ParseArguments(int argc, char** argv, DepthEstimationSample* local_data) { - std::string de_model_str; - for (int i = 1; i < argc; i++) { - if (std::string(argv[i]) == "-i" && ((i + 1) < argc)) { - local_data->input_str_ = argv[i + 1]; - } else if (std::string(argv[i]) == "-o" && ((i + 1) < argc)) { - local_data->output_file_ = argv[i + 1]; - } else if (std::string(argv[i]) == "-m" && ((i + 1) < argc)) { - de_model_str = argv[i + 1]; - } else if (std::string(argv[i]) == "-h") { - PrintHelpMessage(); - return false; - } - } - - // choose depth model - if (de_model_str == "precise") { - local_data->de_model_ = amd::cvml::DepthEstimation::DepthModelType::Precise; - std::cout << "Running with precise Depth Estimation model" << std::endl; - } else { // default - local_data->de_model_ = amd::cvml::DepthEstimation::DepthModelType::Fast; - std::cout << "Running with fast Depth Estimation model" << std::endl; - } - - return true; -} - /** * Main entry point of the sample application. * @@ -154,7 +111,8 @@ int main(int argc, char** argv) { de_sample.side_by_side_ = true; // parse command line arguments - if (!ParseArguments(argc, argv, &de_sample)) { + if (!amd::cvml::sample::utils::ParseArguments(argc, argv, &de_sample.input_str_, + &de_sample.output_file_)) { return -1; } @@ -168,7 +126,7 @@ int main(int argc, char** argv) { context->SetInferenceBackend(amd::cvml::Context::InferenceBackend::AUTO); // initialize depth estimation class - amd::cvml::DepthEstimation depth_estimation(context, de_sample.de_model_); + amd::cvml::DepthEstimation depth_estimation(context); // execute main sample application loop with the created feature de_sample.depth_estimation_ = &depth_estimation; diff --git a/example/Ryzen-AI-Library/windows/amdblitter.dll b/example/Ryzen-AI-Library/windows/amdblitter.dll deleted file mode 100644 index 0c14956b..00000000 --- a/example/Ryzen-AI-Library/windows/amdblitter.dll +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fe8c2df7c1ad31bac2a73d59b8ef3df3b73d414c0d71dde5fec0fc093cb5a721 -size 8950784 diff --git a/example/Ryzen-AI-Library/windows/common-sample-utils.lib b/example/Ryzen-AI-Library/windows/common-sample-utils.lib index d5c4343d..e50cba7c 100644 Binary files a/example/Ryzen-AI-Library/windows/common-sample-utils.lib and b/example/Ryzen-AI-Library/windows/common-sample-utils.lib differ diff --git a/example/Ryzen-AI-Library/windows/cvml-depth-estimation.dll b/example/Ryzen-AI-Library/windows/cvml-depth-estimation.dll index ac3f6c59..41cbf572 100644 --- a/example/Ryzen-AI-Library/windows/cvml-depth-estimation.dll +++ b/example/Ryzen-AI-Library/windows/cvml-depth-estimation.dll @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9dda0a9191ebcd19ec8831b0b15bffeec339467e62b2349a87f2f342de46235b -size 54267904 +oid sha256:80b530ecfa97203b8b1d0a6d390bc7e72f1ae9decde840741e7d51f837e24d2b +size 67993808 diff --git a/example/Ryzen-AI-Library/windows/cvml-depth-estimation.lib b/example/Ryzen-AI-Library/windows/cvml-depth-estimation.lib index 0a848547..66dc540f 100644 Binary files a/example/Ryzen-AI-Library/windows/cvml-depth-estimation.lib and b/example/Ryzen-AI-Library/windows/cvml-depth-estimation.lib differ diff --git a/example/Ryzen-AI-Library/windows/cvml-sdk.dll b/example/Ryzen-AI-Library/windows/cvml-sdk.dll index dbd65e4a..2fd95b3a 100644 --- a/example/Ryzen-AI-Library/windows/cvml-sdk.dll +++ b/example/Ryzen-AI-Library/windows/cvml-sdk.dll @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4dbcd9001699cdc142ef9a0cf6d99c3a9953e44e80e4ef675e01b7b7200e347a -size 307712 +oid sha256:2b482748e079d91e6d8654380631eaa39646078a46df1b4d1a1a3bee5effdd4b +size 2659024 diff --git a/example/Ryzen-AI-Library/windows/cvml-sdk.lib b/example/Ryzen-AI-Library/windows/cvml-sdk.lib index ef9e6367..a1243680 100644 Binary files a/example/Ryzen-AI-Library/windows/cvml-sdk.lib and b/example/Ryzen-AI-Library/windows/cvml-sdk.lib differ diff --git a/example/Ryzen-AI-Library/windows/onnx/cpu/onnxruntime.dll b/example/Ryzen-AI-Library/windows/onnx/cpu/onnxruntime.dll new file mode 100644 index 00000000..43ce5088 --- /dev/null +++ b/example/Ryzen-AI-Library/windows/onnx/cpu/onnxruntime.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08fdfcb4747207a0577fd3b5f0f83c8288fcfa84e9c3cf344e06160c8ec10fc8 +size 10766040 diff --git a/example/Ryzen-AI-Library/windows/onnx/dml/DirectML.dll b/example/Ryzen-AI-Library/windows/onnx/dml/DirectML.dll new file mode 100644 index 00000000..aaec0eb9 --- /dev/null +++ b/example/Ryzen-AI-Library/windows/onnx/dml/DirectML.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc44fc08f1edb3cafca24de5361af11ce7ebd327bda2caa7936c632591f93393 +size 12159424 diff --git a/example/Ryzen-AI-Library/windows/onnx/dml/onnxruntime.dll b/example/Ryzen-AI-Library/windows/onnx/dml/onnxruntime.dll new file mode 100644 index 00000000..5b29312a --- /dev/null +++ b/example/Ryzen-AI-Library/windows/onnx/dml/onnxruntime.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0275aab0ce420e60a9bc3e2e1ac65a6b1c5236d05645d4d0fb05cf8c63edff12 +size 12633816 diff --git a/example/Ryzen-AI-Library/windows/onnx/dml19/DirectML.dll b/example/Ryzen-AI-Library/windows/onnx/dml19/DirectML.dll new file mode 100644 index 00000000..33209a26 --- /dev/null +++ b/example/Ryzen-AI-Library/windows/onnx/dml19/DirectML.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3b672b9fe6d8228ae978e74217a9bb1d6605643550be166eab15f1c09eddb86 +size 18521008 diff --git a/example/Ryzen-AI-Library/windows/onnx/dml19/onnxruntime.dll b/example/Ryzen-AI-Library/windows/onnx/dml19/onnxruntime.dll new file mode 100644 index 00000000..d262d070 --- /dev/null +++ b/example/Ryzen-AI-Library/windows/onnx/dml19/onnxruntime.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f65faa0e6499fe569509d9a36089bda8f4e8929845299a4541fea114b82fcea8 +size 17863888 diff --git a/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu1/1x4.xclbin b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu1/1x4.xclbin new file mode 100644 index 00000000..d150105c Binary files /dev/null and b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu1/1x4.xclbin differ diff --git a/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu1/4x4.xclbin b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu1/4x4.xclbin new file mode 100644 index 00000000..fcfd9038 Binary files /dev/null and b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu1/4x4.xclbin differ diff --git a/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu1/onnxruntime.dll b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu1/onnxruntime.dll new file mode 100644 index 00000000..904cd921 --- /dev/null +++ b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu1/onnxruntime.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32897f6f4616fdba87db4649f30c5b8c3fc3a889bcfaac35aff20e2e7a3a465b +size 18930896 diff --git a/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu1/onnxruntime_providers_shared.dll b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu1/onnxruntime_providers_shared.dll new file mode 100644 index 00000000..ee74d35b --- /dev/null +++ b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu1/onnxruntime_providers_shared.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bab6360d3efd2f90fd3218602b6bc2aa5a80c65381486864a006c616cabda46 +size 22224 diff --git a/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu1/onnxruntime_providers_vitisai.dll b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu1/onnxruntime_providers_vitisai.dll new file mode 100644 index 00000000..6c66d362 --- /dev/null +++ b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu1/onnxruntime_providers_vitisai.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c628b1d3327eb7ac4b9219c826c305c53c4f8979a322866b3a4e855de96ad895 +size 314064 diff --git a/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu1/onnxruntime_vitis_ai_custom_ops.dll b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu1/onnxruntime_vitis_ai_custom_ops.dll new file mode 100644 index 00000000..a27159d1 --- /dev/null +++ b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu1/onnxruntime_vitis_ai_custom_ops.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54e233cd1a1d86df48a206565c633db9abfb65560b3732ff0ecee13f88637b62 +size 19033808 diff --git a/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu1/onnxruntime_vitisai_ep.dll b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu1/onnxruntime_vitisai_ep.dll new file mode 100644 index 00000000..00d78693 --- /dev/null +++ b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu1/onnxruntime_vitisai_ep.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86b694a371624063243021a92c12a4d8edf4da02dcb9aad7e337a010931d86c1 +size 78634192 diff --git a/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu1/vaip_config_npu_1.json b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu1/vaip_config_npu_1.json new file mode 100644 index 00000000..36a69a3a --- /dev/null +++ b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu1/vaip_config_npu_1.json @@ -0,0 +1,1019 @@ +{ + "passes": [ + { + "name": "init", + "plugin": "vaip-pass_init" + }, + { + "name": "fuse_resize_norm", + "plugin": "vaip-pass_py_ext", + "disabled": false, + "pyExt": { + "moduleName": "voe.passes.fuse_resize_norm", + "methodName": "rules" + } + }, + { + "name": "fuse_softmax", + "plugin": "vaip-pass_py_ext", + "disabled": false, + "pyExt": { + "moduleName": "voe.passes.fuse_softmax", + "methodName": "rules" + } + }, + { + "name": "fuse_topk", + "plugin": "vaip-pass_py_ext", + "disabled": false, + "pyExt": { + "moduleName": "voe.passes.fuse_topk", + "methodName": "rules" + } + }, + { + "name": "fuse_decode_filter_boxes", + "plugin": "vaip-pass_py_ext", + "disabled": false, + "pyExt": { + "moduleName": "voe.passes.fuse_decode_filter_boxes", + "methodName": "rules" + } + }, + { + "name": "vaip_pass_norm_k", + "plugin": "vaip-pass_norm_k", + "enable_gc": true, + "disabled": false + }, + { + "name": "fuse_NMS", + "plugin": "vaip-pass_py_ext", + "disabled": true, + "pyExt": { + "moduleName": "voe.passes.fuse_NMS", + "methodName": "rules" + } + }, + { + "name": "fuse_dynamic_dispatch", + "plugin": "vaip-pass_level1_dd", + "passDpuParam": { + "subPass": [ + { + "name": "dd_compiler_pass_transformation", + "plugin": "vaip-pass_py_ext", + "pyExt": { + "moduleName": "voe.passes.dynamic_dispatch", + "methodName": "rules" + } + }, + { + "name": "dd_compiler_pass_generation", + "plugin": "vaip-pass_py_ext", + "pyExt": { + "moduleName": "voe.passes.dynamic_dispatch_gen", + "methodName": "rules" + } + }, + { + "name": "dd_compiler_pass_transformation_2", + "plugin": "vaip-pass_py_ext", + "pyExt": { + "moduleName": "voe.passes.dynamic_dispatch_2", + "methodName": "rules" + } + }, + { + "name": "dd_compiler_pass_generation_2", + "plugin": "vaip-pass_py_ext", + "pyExt": { + "moduleName": "voe.passes.dynamic_dispatch_gen_2", + "methodName": "rules" + } + } + ] + } + }, + { + "name": "fuse_DPU", + "plugin": "vaip-pass_level1_dpu", + "passDpuParam": { + "subPass": [ + { + "name": "convert_ending_blacklist_ops_to_unknown_op", + "plugin": "vaip-pass_convert_ending_blacklist_ops_to_unknown_op", + "disabled": false + }, + { + "name": "dynamic_input_batch", + "plugin": "vaip-pass_dynamic_input_batch" + }, + { + "name": "create_const_op", + "plugin": "vaip-pass_create_const_op" + }, + { + "name": "convert_split_to_xir", + "plugin": "vaip-pass_convert_split_to_xir_op" + }, + { + "name": "to_xir", + "plugin": "vaip-pass_to_xir_ops" + }, + { + "name": "convert_pad", + "plugin": "vaip-pass_convert_pad", + "enableGc": true + }, + { + "name": "convert_in_to_gn", + "plugin": "vaip-pass_convert_instancenorm_to_groupnorm", + "enableGc": true + }, + { + "name": "remove_extra_q_dq", + "plugin": "vaip-pass_remove_extra_q_dq" + }, + { + "name": "merge_add_into_conv_bias", + "plugin": "vaip-pass_merge_add_into_conv_bias" + }, + { + "name": "merge_fix", + "plugin": "vaip-pass_merge_fix", + "enableGc": true + }, + { + "name": "layoutransform", + "plugin": "vaip-pass_layout_transform_via_adding_transpose" + }, + { + "name": "gc_after_layout_transform", + "plugin": "vaip-pass_remove_isolated_node" + }, + { + "name": "fuse_transpose", + "plugin": "vaip-pass_fuse_transpose", + "enableGc": true + }, + { + "name": "gc_after_fuse_transpose", + "plugin": "vaip-pass_remove_isolated_node" + }, + { + "name": "remove_identity", + "plugin": "vaip-pass_remove_identity", + "logVerbosity": 1 + }, + { + "name": "add_fix_after_const", + "plugin": "vaip-pass_const_add_fix" + }, + { + "name": "remove_reshape_fix", + "plugin": "vaip-pass_py_ext", + "disabled": true, + "pyExt": { + "moduleName": "voe.passes.remove_reshape_fix", + "methodName": "rules" + } + }, + { + "name": "const_fold_batchnorm_to_scale", + "plugin": "vaip-pass_const_fold_batchnorm_to_scale" + }, + { + "name": "const_fold_transpose", + "plugin": "vaip-pass_const_fold_transpose" + }, + { + "name": "merge_pad", + "plugin": "vaip-pass_merge_pad" + }, + { + "name": "merge_hard_sigmoid", + "plugin": "vaip-pass_merge_hard_sigmoid" + }, + { + "name": "merge_mul", + "plugin": "vaip-pass_merge_mul", + "enableGc": true + }, + { + "name": "merge_consecutive_fix", + "plugin": "vaip-pass_merge_consecutive_fix", + "disabled": true, + "enableLog": true, + "logVerbosity": 1 + }, + { + "_comment": "test case 110", + "name": "convert_softmax_to_hard_softmax", + "plugin": "vaip-pass_convert_softmax_to_hard_softmax", + "disabled": true + }, + { + "name": "merge_fix_fix_transpose", + "plugin": "vaip-pass_merge_fix_fix_transpose", + "enableGc": true, + "disabled": true + }, + { + "name": "final_gc", + "plugin": "vaip-pass_remove_isolated_node" + } + ], + "xcompilerAttrs": { + "debug_mode": { + "stringValue": "performance" + }, + "dpu_subgraph_num": { + "uintValue": 32 + }, + "opt_level": { + "uintValue": 2 + }, + "dump_subgraph_ops": { + "boolValue": false + }, + "profile": { + "uintValue": 0 + }, + "disable_std_quant": { + "boolValue": false + }, + "enable_fast_pm": { + "boolValue": true + } + }, + "minimum_num_of_conv": 2 + } + }, + { + "name": "fuse_DPU_MHA", + "plugin": "vaip-pass_level1_dpu", + "passDpuParam": { + "subPass": [ + { + "name": "convert_MHA", + "plugin": "vaip-pass_convert_MHA", + "enableGc": true + }, + { + "name": "convert_ending_blacklist_ops_to_unknown_op", + "plugin": "vaip-pass_convert_ending_blacklist_ops_to_unknown_op", + "disabled": false + }, + { + "name": "dynamic_input_batch", + "plugin": "vaip-pass_dynamic_input_batch" + }, + { + "name": "create_const_op", + "plugin": "vaip-pass_create_const_op" + }, + { + "name": "convert_split_to_xir", + "plugin": "vaip-pass_convert_split_to_xir_op" + }, + { + "name": "to_xir", + "plugin": "vaip-pass_to_xir_ops" + }, + { + "name": "convert_pad", + "plugin": "vaip-pass_convert_pad", + "enableGc": true + }, + { + "name": "convert_in_to_gn", + "plugin": "vaip-pass_convert_instancenorm_to_groupnorm", + "enableGc": true + }, + { + "name": "remove_extra_q_dq", + "plugin": "vaip-pass_remove_extra_q_dq" + }, + { + "name": "merge_add_into_conv_bias", + "plugin": "vaip-pass_merge_add_into_conv_bias" + }, + { + "name": "merge_fix", + "plugin": "vaip-pass_merge_fix", + "enableGc": true + }, + { + "name": "layoutransform", + "plugin": "vaip-pass_layout_transform_via_adding_transpose" + }, + { + "name": "gc_after_layout_transform", + "plugin": "vaip-pass_remove_isolated_node" + }, + { + "name": "fuse_transpose", + "plugin": "vaip-pass_fuse_transpose", + "enableGc": true + }, + { + "name": "gc_after_fuse_transpose", + "plugin": "vaip-pass_remove_isolated_node" + }, + { + "name": "remove_identity", + "plugin": "vaip-pass_remove_identity", + "logVerbosity": 1 + }, + { + "name": "add_fix_after_const", + "plugin": "vaip-pass_const_add_fix" + }, + { + "name": "const_fold_batchnorm_to_scale", + "plugin": "vaip-pass_const_fold_batchnorm_to_scale" + }, + { + "name": "const_fold_transpose", + "plugin": "vaip-pass_const_fold_transpose" + }, + { + "name": "merge_pad", + "plugin": "vaip-pass_merge_pad" + }, + { + "name": "merge_hard_sigmoid", + "plugin": "vaip-pass_merge_hard_sigmoid" + }, + { + "name": "merge_mul", + "plugin": "vaip-pass_merge_mul", + "enableGc": true + }, + { + "name": "merge_consecutive_fix", + "plugin": "vaip-pass_merge_consecutive_fix", + "disabled": true, + "enableLog": true, + "logVerbosity": 1 + }, + { + "_comment": "test case 110", + "name": "convert_softmax_to_hard_softmax", + "plugin": "vaip-pass_convert_softmax_to_hard_softmax", + "disabled": true + }, + { + "name": "merge_fix_fix_transpose", + "plugin": "vaip-pass_merge_fix_fix_transpose", + "enableGc": true, + "disabled": true + }, + { + "name": "final_gc", + "plugin": "vaip-pass_remove_isolated_node" + } + ], + "xcompilerAttrs": { + "debug_mode": { + "stringValue": "performance" + }, + "dpu_subgraph_num": { + "uintValue": 1000 + }, + "opt_level": { + "uintValue": 65536 + }, + "dump_subgraph_ops": { + "boolValue": false + }, + "profile": { + "uintValue": 0 + }, + "disable_std_quant": { + "boolValue": false + }, + "enable_fast_pm": { + "boolValue": true + } + }, + "minimum_num_of_conv": 2, + "xclbin": "AMD_AIE2P_4x4_Overlay_CFG2.xclbin" + } + }, + { + "name": "vaiml_partition", + "plugin": "vaip-pass_vaiml_partition", + "vaiml_config": { + "vaiml_model_path": "vaiml_par_0", + "max_num_partitions": 200, + "device_name": "phx", + "debug": true + } + }, + { + "name": "vaip_pass_dd_merge_dqcastgather", + "plugin": "vaip-pass_dd_merge_dqcastgather", + "enable_gc": true, + "disabled": false + }, + { + "name": "vaip_pass_dd_merge_qop", + "plugin": "vaip-pass_dd_merge_qop", + "enable_gc": true, + "disabled": false + }, + { + "name": "vaip_pass_dd_merge_dqop", + "plugin": "vaip-pass_dd_merge_dqop", + "enable_gc": true, + "disabled": false + }, + { + "name": "vaip_pass_dd_merge_qop_onnx", + "plugin": "vaip-pass_dd_merge_qop_onnx", + "enable_gc": true, + "disabled": false + }, + { + "name": "vaip_pass_dd_merge_dqop_onnx", + "plugin": "vaip-pass_dd_merge_dqop_onnx", + "enable_gc": true, + "disabled": false + } + ], + "mepTable": [ + { + "modelName": "PSA", + "md5sumOnDisk": "356f009d5cb141881fe4c27d09b1f43b", + "md5sumInMemory": "ca42121518cca903f07262b8f2751a42", + "target": "RyzenAI_shell_config_1" + }, + { + "modelName": "PSO0_A", + "md5sumOnDisk": "17cbedd0695d097f1226797bee711710", + "md5sumInMemory": "74ded15705d2c958177604029a20a208", + "target": "RyzenAI_shell_config_1" + }, + { + "modelName": "PSO0_B", + "md5sumOnDisk": "ee58a2f6e0bbd99e9921731be4b0a15c", + "md5sumInMemory": "3ee8e6a8f08912a9a92a260b68447bb2", + "target": "RyzenAI_shell_config_1" + }, + { + "modelName": "PSO0_0", + "md5sumOnDisk": "da35c43876d1b21835a08320688a69d2", + "md5sumInMemory": "452a80b01d29ebc42559d59b42de03ca", + "target": "RyzenAI_shell_config_1" + }, + { + "modelName": "PSO0_1", + "md5sumOnDisk": "da35c43876d1b21835a08320688a69d2", + "md5sumInMemory": "4853206b53d2cae2a40aad448d73370c", + "target": "RyzenAI_shell_config_1" + }, + { + "modelName": "PSO0_2", + "md5sumOnDisk": "0abe0b0bbc8314b482f0737da3d831ad", + "md5sumInMemory": "ee7f757248851d28061617f87043805f", + "target": "RyzenAI_shell_config_1" + }, + { + "modelName": "PSO0_3", + "md5sumOnDisk": "a87968c033291ab04069feaafb5fd7df", + "md5sumInMemory": "19ef4a7ffa9c9ab3871ea3142db4a5db", + "target": "RyzenAI_shell_config_1" + }, + { + "modelName": "PSO0_4", + "md5sumOnDisk": "ab9c6f1bb23d04765708622f5e48c0da", + "md5sumInMemory": "4c54574f384ddf99ed01b09bd249ca8b", + "target": "RyzenAI_shell_config_1" + }, + { + "modelName": "mmjbka", + "md5sumOnDisk": "da35c43876d1b21835a08320688a69d2", + "md5sumInMemory": "352de20cfd0a050f6083eb661237c6cc", + "target": "RyzenAI_shell_config_1" + }, + { + "modelName": "mmjbka_MSFT", + "md5sumOnDisk": "F42BC6AC686C72B8CAA38DE28DFAA553", + "md5sumInMemory": "F42BC6AC686C72B8CAA38DE28DFAA553", + "target": "RyzenAI_shell_config_1" + }, + { + "modelName": "PSA_1", + "md5sumOnDisk": "356f009d5cb141881fe4c27d09b1f43b", + "md5sumInMemory": "ee9a5fc4b79342b98049c4826983f18e", + "target": "RyzenAI_shell_config_1" + }, + { + "modelName": "mmjbka_1", + "md5sumOnDisk": "da35c43876d1b21835a08320688a69d2", + "md5sumInMemory": "27b186167f3a3957b32141846bcf81f8", + "target": "RyzenAI_shell_config_1" + }, + { + "modelName": "PSA_1.0.1_nchw", + "md5sumOnDisk": "356f009d5cb141881fe4c27d09b1f43b", + "md5sumInMemory": "bbea0a61a3df92f73df644d39bab7046", + "md5sumInMemoryWithIo": "92b1357557fa7ceaa73a48661bbe1ef8", + "target": "RyzenAI_shell_config_1" + }, + { + "modelName": "PSA6.3", + "md5sumOnDisk": "b5e9f87a18d925e43f6e74ef34a299c3", + "md5sumInMemory": "bbea0a61a3df92f73df644d39bab7046", + "md5sumInMemoryWithIo": "92b1357557fa7ceaa73a48661bbe1ef8", + "target": "RyzenAI_shell_config_1" + }, + { + "modelName": "PSO0_A_1.0.1_nchw", + "md5sumOnDisk": "17cbedd0695d097f1226797bee711710", + "md5sumInMemory": "6cefe9e4244aa0f3f8dbf21d43789e86", + "md5sumInMemoryWithIo": "e3497d38e57aa72df6aec42833b784a8", + "target": "RyzenAI_shell_config_1" + }, + { + "modelName": "PSO0_B_1.0.1_nchw", + "md5sumOnDisk": "ee58a2f6e0bbd99e9921731be4b0a15c", + "md5sumInMemory": "48f349c792e8e2a7562e092968750947", + "md5sumInMemoryWithIo": "dc6c1b8774c65f20fcbaaae86cd05f8c", + "target": "RyzenAI_shell_config_1" + }, + { + "modelName": "mmjbka_1.0.1_nchw", + "md5sumOnDisk": "da35c43876d1b21835a08320688a69d2", + "md5sumInMemory": "35527caf46c38e60ad74204a1d966847", + "md5sumInMemoryWithIo": "4b0f1adedf7f6e7f74b22dd8d1fb22fb", + "target": "RyzenAI_shell_config_1" + }, + { + "modelName": "PSO0_A_nhwc", + "md5sumOnDisk": "777bd69cc9e12b6e7868277e639f97e2", + "md5sumInMemory": "3a9abd050f70b09de3546f1d61e43c74", + "target": "RyzenAI_shell_config_1" + }, + { + "modelName": "PSO0_A_1.1.0_nhwc", + "md5sumOnDisk": "b9eb351ca7af65a2a43f99f41dda7dc2", + "md5sumInMemory": "e3f795258afe1d046f56fad0f8574864", + "md5sumInMemoryWithIo": "3af1b536b95d9eebe3190294e311f57a", + "target": "RyzenAI_shell_config_1" + }, + { + "modelName": "PSO0_B_1.1.0_nhwc", + "md5sumOnDisk": "fa6d63ec0a7674cb9464020802e5f3ca", + "md5sumInMemory": "862aea9de93c2e711ec842f942d9b8b1", + "md5sumInMemoryWithIo": "b54766ff357e41c4ca538d017e126385", + "target": "RyzenAI_shell_config_1" + }, + { + "modelName": "mmjbka_1.1.0_nhwc", + "md5sumOnDisk": "3a56581e403def2548e50c77808c4174", + "md5sumInMemory": "377a5a02ffbba68bc1fdd25b54f0f18b", + "md5sumInMemoryWithIo": "ac4f50ea2c10c863db4bfbd6ca024f6e", + "target": "RyzenAI_shell_config_1" + }, + { + "modelName": "PSF", + "md5sumOnDisk": "d38670a70c72561cb3f718125829b5fa", + "md5sumInMemory": "2fbcab29de9dd547562c46319a225a9c", + "md5sumInMemoryWithIo": "5c07dc0856549dabc4d092763e1ce5cf", + "xclbin": "4x2_psf_model_a8w8_qdq.xclbin", + "target": "RyzenAI_transformer_config_2" + }, + { + "modelName": "PSFv1.1", + "md5sumOnDisk": "c711cb8798e562011383bae4c5c91ce1", + "md5sumInMemory": "a394ffc0e58a8e841f5ae415c15a63e8", + "md5sumInMemoryWithIo": "037fca5fd1b0c7b195ef410c3e9b88df", + "xclbin": "4x2_psf_v1.1_model_a8w8_qdq.xclbin", + "target": "RyzenAI_transformer_config_2" + }, + { + "modelName": "PSH", + "md5sumOnDisk": "e5fd1d2783dca2bdc9f40e4941e2a9b9", + "md5sumInMemory": "62099384a6af2956912b2d31a99be483", + "md5sumInMemoryWithIo": "b3a1041f9de14dae3b52e711d8de0037", + "xclbin": "4x2_psh_model_a16w8_qdq.xclbin", + "target": "RyzenAI_transformer_config_2" + }, + { + "modelName": "PSHv1.2", + "md5sumOnDisk": "08678ce4a4eab9eaa2cd8b3dcbdf5697", + "md5sumInMemory": "62099384a6af2956912b2d31a99be483", + "md5sumInMemoryWithIo": "b3a1041f9de14dae3b52e711d8de0037", + "xclbin": "4x2_psh_model_a16w8_qdq.xclbin", + "target": "RyzenAI_transformer_config_2" + }, + { + "modelName": "mxpzi", + "md5sumOnDisk": "15d1515c86b40874ac954869798d0e77", + "md5sumInMemory": "546b8eca24af15302c647edb3e575d54", + "md5sumInMemoryWithIo": "9f9d91808166382b325459dfb88494c0", + "xclbin": "4x2_psj_model_a16w8_qdq.xclbin", + "target": "RyzenAI_transformer_config_2" + }, + { + "modelName": "PSIv1.1", + "md5sumOnDisk": "72651ec6ae2fe552dd0604527d73c1e5", + "md5sumInMemory": "0e3ff9de7ff3d7eddc7712316ca7ab11", + "md5sumInMemoryWithIo": "5a4479883ad7e7724442977a88a257e0", + "xclbin": "4x2_psi_integrated_model_a16w8_qdq.xclbin", + "target": "RyzenAI_transformer_config_2" + }, + { + "modelName": "PSIv1.2", + "md5sumOnDisk": "91aa788cdf570ebd5434bd7b8937feb2", + "md5sumInMemory": "6ed69ec59ba231b919877d12f81cabd3", + "xclbin": "4x2_psi_integrated_model_a16w8_qdq.xclbin", + "target": "RyzenAI_transformer_config_2" + }, + { + "modelName": "PSIv1.3", + "md5sumOnDisk": "028f6808733628e822b86c12d38df4b6", + "md5sumInMemory": "7af92f5ac4aefd28d0fd84a836322f5e", + "md5sumInMemoryWithIo": "2dc156817e5201dc51c39e821b9d5ec7", + "xclbin": "4x2_psi_integrated_model_a16w8_qdq.xclbin", + "target": "RyzenAI_transformer_config_2" + }, + { + "modelName": "PSIv1.4", + "md5sumOnDisk": "96fefa03d63137796293448db34d78e4", + "md5sumInMemory": "7af92f5ac4aefd28d0fd84a836322f5e", + "md5sumInMemoryWithIo": "8f4fbd7e1475b7b470e77449211455f2", + "xclbin": "4x2_psi_integrated_model_a16w8_qdq.xclbin", + "target": "RyzenAI_transformer_config_2" + }, + { + "modelName": "mtea0a", + "md5sumOnDisk": "4e17c61308b9170dda55586dee6c0751", + "md5sumInMemory": "58eb51eccd44a084b185159b67e2c1b6", + "md5sumInMemoryWithIo": "50c2fb23e40a0617f58ebcedbbfac359", + "xclbin": "4x2_psq_model_a8w8_qdq.xclbin", + "target": "RyzenAI_transformer_config_2" + }, + { + "modelName": "m7h4xjg", + "md5sumOnDisk": "4e485de54588d95209560c0a29049b68", + "md5sumInMemory": "4c121cc7cd35dc04c30f46a94b2baf7a", + "md5sumInMemoryWithIo": "3223d1f84b9dd740f3a829ad9680469f", + "xclbin": "4x2_psq2_model_a16w8_qdq.xclbin", + "target": "RyzenAI_transformer_config_2" + }, + { + "modelName": "mswbjvw-320", + "md5sumOnDisk": "4a6a777fc11158c1ca70ebdd6caae3fe", + "md5sumInMemory": "1bd58439b3a31d4e6edce0689e552ccf", + "md5sumInMemoryWithIo": "eb0cd98491be811c3acdf79d38981cd6", + "xclbin": "4x2_pso2_model_a16w16_qdq.xclbin", + "target": "RyzenAI_transformer_config_2" + }, + { + "modelName": "mswbjvw-640", + "md5sumOnDisk": "67927fb103d7fcdd3eeca44475eac6df", + "md5sumInMemory": "9313ce55730e051e4d32eb4f9986f1f2", + "md5sumInMemoryWithIo": "3bea2158bc5ec0d4ee3954cd73de3986", + "xclbin": "4x2_pso2_model_a16w16_qdq.xclbin", + "target": "RyzenAI_transformer_config_2" + }, + { + "modelName": "mswbjvw-1280", + "md5sumOnDisk": "dc4cfca432a4e09eb4275b55a485b126", + "md5sumInMemory": "1cf0b88f93d4d5d458e408abc0a4cf5d", + "md5sumInMemoryWithIo": "4abc5e4f8292dbe10da0f9eb12b0d1b4", + "xclbin": "4x2_pso2_model_a16w16_qdq.xclbin", + "target": "RyzenAI_transformer_config_2" + }, + { + "modelName": "mswbjvw-2560", + "md5sumOnDisk": "de50b0722e512613188a39429e70ead7", + "md5sumInMemory": "5dc5872cd47b9da7f64b6855b1035595", + "md5sumInMemoryWithIo": "8e5b703bb200bc7d7ea7a415e3139385", + "xclbin": "4x2_pso2_model_a16w16_qdq.xclbin", + "target": "RyzenAI_transformer_config_2" + }, + { + "modelName": "mswbjvw-320-v1.1", + "md5sumOnDisk": "e6b7d1656f92ca990c7ec94f8b17813d", + "md5sumInMemory": "d0d6cbd1d89f60347ab6c9453e35507e", + "md5sumInMemoryWithIo": "eb0cd98491be811c3acdf79d38981cd6", + "xclbin": "4x2_pso2_model_a16w16_qdq.xclbin", + "target": "RyzenAI_transformer_config_2" + }, + { + "modelName": "mswbjvw-640-v1.1", + "md5sumOnDisk": "7d789256f2d568367974b7b74fe9de71", + "md5sumInMemory": "be1539116a956da222b7b678a369fbf6", + "md5sumInMemoryWithIo": "3bea2158bc5ec0d4ee3954cd73de3986", + "xclbin": "4x2_pso2_model_a16w16_qdq.xclbin", + "target": "RyzenAI_transformer_config_2" + }, + { + "modelName": "mswbjvw-1280-v1.1", + "md5sumOnDisk": "b815ee6d465f51de249c4048aa2515cb", + "md5sumInMemory": "4333bedcd96799e4bc2fba9ec4746617", + "md5sumInMemoryWithIo": "4abc5e4f8292dbe10da0f9eb12b0d1b4", + "xclbin": "4x2_pso2_model_a16w16_qdq.xclbin", + "target": "RyzenAI_transformer_config_2" + }, + { + "modelName": "mswbjvw-2560-v1.1", + "md5sumOnDisk": "5d0c439244e9cf46d45d7ce249c0a3e7", + "md5sumInMemory": "577399f309357fc62e307c38945ce770", + "md5sumInMemoryWithIo": "8e5b703bb200bc7d7ea7a415e3139385", + "xclbin": "4x2_pso2_model_a16w16_qdq.xclbin", + "target": "RyzenAI_transformer_config_2" + }, + { + "modelName": "mzdk5", + "md5sumOnDisk": "0e94597db08d8e0fcd0e867b3ce2c686", + "md5sumInMemory": "f3b3206d1e984ce729cec3533287227e", + "md5sumInMemoryWithIo": "d61aa9b5c7af655c10b97533814c2244", + "xclbin": "4x4_psr_model_a16w8_qdq.xclbin", + "target": "RyzenAI_transformer_config_2" + }, + { + "modelName": "PST_v1.1", + "md5sumOnDisk": "92358bd7e8a68ea9c6e9d327423069e3", + "md5sumInMemory": "67d78f48fd05ce03e3efb69212243d30", + "md5sumInMemoryWithIo": "a807ae1f05fc42e16d57d59186a414b4", + "target": "RyzenAI_transformer_cxx_pss_pst", + "modelCategory": "PST" + }, + { + "modelName": "PSS_v1.1", + "md5sumOnDisk": "63a4651d48b4281ddf6a6a33ebad5fc7", + "md5sumInMemory": "60142dfa473572b34fbf476c37ebfa1b", + "md5sumInMemoryWithIo": "3c875c1144bcfa0dfd788b05116d589b", + "target": "RyzenAI_transformer_cxx_pss_pst", + "modelCategory": "PSS" + }, + { + "modelName": "PSS", + "md5sumOnDisk": "5ad2857510b5646376f3f9348591b83e", + "md5sumInMemory": "03daf292839461b5dd16b97c66dee37d", + "md5sumInMemoryWithIo": "d7a01430b6509651f0293c0d5e265a2a", + "target": "RyzenAI_xcompiler_and_dd_config" + }, + { + "modelName": "PST", + "md5sumOnDisk": "c8c2ab668b56daf7d2228e53c9a4f0db", + "md5sumInMemory": "62d1f9a68e9a1af013852ed3d1564d02", + "md5sumInMemoryWithIo": "4413357faa7c2f1ce6036f869f4d7e14", + "target": "RyzenAI_xcompiler_and_dd_config" + }, + { + "modelName": "PSS_v1.0", + "md5sumOnDisk": "fbf3fd6e6bab35efba46b7e9060f2d62", + "md5sumInMemory": "03daf292839461b5dd16b97c66dee37d", + "md5sumInMemoryWithIo": "d7a01430b6509651f0293c0d5e265a2a", + "target": "RyzenAI_vision_config_3_mha" + }, + { + "modelName": "PSS_nhwc", + "md5sumOnDisk": "1d46fbe6a09e79b36e21d985d937df3b", + "md5sumInMemory": "ccf646813e6e91ff09f9d4216047a6ec", + "md5sumInMemoryWithIo": "3c875c1144bcfa0dfd788b05116d589b", + "target": "RyzenAI_xcompiler_and_dd_config" + }, + { + "modelName": "PST_nhwc", + "md5sumOnDisk": "02c8e157824d0c75289f2333b307a5a9", + "md5sumInMemory": "087423cf961c2df293d363abd712d5d7", + "md5sumInMemoryWithIo": "6512311dac77f235e3ef637287389419", + "target": "RyzenAI_xcompiler_and_dd_config" + }, + { + "modelName": "GT_v1.2", + "md5sumOnDisk": "4daa45a72a36d731279b7c01e4545637", + "md5sumInMemory": "0fd6cc09fe78a6a5e4fb697c0e8670e9", + "target": "VAIML_config_0", + "xclbin": "4x4_gt_ht_00.xclbin" + }, + { + "modelName": "GT_v1.3", + "md5sumOnDisk": "d799de8b1e1fa572daad06d7a49a7afe", + "md5sumInMemory": "97bcaa432a2c634a707dfc23bf222032", + "target": "VAIML_config_0", + "xclbin": "4x4_gt_ht_01.xclbin" + }, + { + "modelName": "HT_v1.2", + "md5sumOnDisk": "01fa81ebc4c70ea06c2f4d28c16af389", + "md5sumInMemory": "6809517bbd0a7b44acc31942d410ca3a", + "md5sumInMemoryWithIo": "9731a32b64df8abdead6b6f370a293fc", + "target": "VAIML_config_0", + "xclbin": "4x4_gt_ht_01.xclbin" + } + ], + "target": "RyzenAI_vision_config_1", + "targets": [ + { + "name": "VAIML_config_0", + "pass": [ + "init", + "vaiml_partition", + "vaip_pass_dd_merge_dqcastgather", + "vaip_pass_dd_merge_qop", + "vaip_pass_dd_merge_dqop", + "vaip_pass_dd_merge_qop_onnx", + "vaip_pass_dd_merge_dqop_onnx" + ] + }, + { + "name": "RyzenAI_vision_config_1", + "pass": [ + "init", + "fuse_DPU" + ] + }, + { + "name": "RyzenAI_shell_config_1", + "xclbin": "AMD_AIE2P_2x4x1_Overlay.xclbin", + "share_hw_context": true, + "pass": [ + "init", + "fuse_DPU", + "vaip_pass_dd_merge_qop", + "vaip_pass_dd_merge_dqop", + "vaip_pass_dd_merge_qop_onnx", + "vaip_pass_dd_merge_dqop_onnx" + ], + "target_opts": { + "xcompilerAttrs": { + "dpu_subgraph_num": { + "uintValue": 1000 + }, + "opt_level": { + "uintValue": 3 + }, + "profile": { + "uintValue": 0 + }, + "enable_fast_pm": { + "boolValue": true + }, + "enable_weights_prefetch": { + "boolValue": true + }, + "enable_cost_model_tiling": { + "boolValue": true + }, + "enable_mergesync": { + "boolValue": true + } + } + }, + "graph_engine_qos_priority": 640 + }, + { + "name": "RyzenAI_vision_config_2", + "xclbin": "1x4.xclbin", + "pass": [ + "init", + "fuse_DPU" + ], + "target_opts": { + "xcompilerAttrs": { + "debug_mode": { + "stringValue": "performance" + }, + "dpu_subgraph_num": { + "uintValue": 16 + }, + "opt_level": { + "uintValue": 0 + }, + "dump_subgraph_ops": { + "boolValue": false + }, + "profile": { + "uintValue": 0 + }, + "disable_std_quant": { + "boolValue": false + } + } + } + }, + { + "name": "RyzenAI_vision_config_3", + "xclbin": "AMD_AIE2P_4x4_Overlay_CFG2.xclbin", + "pass": [ + "init", + "fuse_DPU" + ], + "target_opts": { + "xcompilerAttrs": { + "debug_mode": { + "stringValue": "performance" + }, + "dpu_subgraph_num": { + "uintValue": 1000 + }, + "opt_level": { + "uintValue": 65536 + } + } + } + }, + { + "name": "RyzenAI_vision_config_3_mha", + "xclbin": "AMD_AIE2P_4x4_Overlay_CFG2.xclbin", + "pass": [ + "init", + "fuse_DPU_MHA" + ], + "target_opts": { + "xcompilerAttrs": { + "debug_mode": { + "stringValue": "performance" + }, + "dpu_subgraph_num": { + "uintValue": 1000 + }, + "opt_level": { + "uintValue": 65536 + }, + "enable_fast_pm": { + "boolValue": true + }, + "advanced_opt": { + "boolValue": true + } + } + } + }, + { + "name": "RyzenAI_transformer_config_2", + "xclbin": "4x2_psf_model_a8w8_qdq.xclbin", + "pass": [ + "init", + "fuse_dynamic_dispatch" + ] + }, + { + "name": "RyzenAI_xcompiler_and_dd_config", + "xclbin": "2x4x2_pss_pst_model_mha_qdq.xclbin", + "pass": [ + "init", + "fuse_DPU_MHA", + "fuse_dynamic_dispatch" + ], + "target_opts": { + "xcompilerAttrs": { + "debug_mode": { + "stringValue": "performance" + }, + "dpu_subgraph_num": { + "uintValue": 1000 + }, + "opt_level": { + "uintValue": 65536 + }, + "enable_fast_pm": { + "boolValue": true + }, + "advanced_opt": { + "boolValue": true + } + } + } + }, + { + "name": "RyzenAI_transformer_cxx_pss_pst", + "xclbin": "2x4x2_pss_pst_4x2_psq_model_qdq.xclbin", + "share_hw_context": true, + "pass": [ + "init", + "fuse_DPU_MHA", + "fuse_dynamic_dispatch_pss_pst" + ], + "target_opts": { + "xcompilerAttrs": { + "debug_mode": { + "stringValue": "performance" + }, + "dpu_subgraph_num": { + "uintValue": 1000 + }, + "opt_level": { + "uintValue": 65536 + }, + "enable_fast_pm": { + "boolValue": true + }, + "advanced_opt": { + "boolValue": true + } + } + } + } + ], + "enable_cache_file_io_in_mem": true +} diff --git a/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu1_wdf/1x4.xclbin b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu1_wdf/1x4.xclbin new file mode 100644 index 00000000..eb63d209 Binary files /dev/null and b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu1_wdf/1x4.xclbin differ diff --git a/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu1_wdf/4x4.xclbin b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu1_wdf/4x4.xclbin new file mode 100644 index 00000000..3b7d0741 Binary files /dev/null and b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu1_wdf/4x4.xclbin differ diff --git a/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu1_wdf/onnxruntime.dll b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu1_wdf/onnxruntime.dll new file mode 100644 index 00000000..ed81825f --- /dev/null +++ b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu1_wdf/onnxruntime.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e842bef529f9182dda23cafb045681988ab4e05555a15c9b32847de58c2aeb81 +size 17973976 diff --git a/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu1_wdf/onnxruntime_providers_shared.dll b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu1_wdf/onnxruntime_providers_shared.dll new file mode 100644 index 00000000..0fef47cd --- /dev/null +++ b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu1_wdf/onnxruntime_providers_shared.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2798304dd60cfd0dd354badb549fd40790109eb25f5f03cafd23233d1d172351 +size 22744 diff --git a/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu1_wdf/onnxruntime_providers_vitisai.dll b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu1_wdf/onnxruntime_providers_vitisai.dll new file mode 100644 index 00000000..a931949a --- /dev/null +++ b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu1_wdf/onnxruntime_providers_vitisai.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe67fc51cee909fbea16ab794d270bf11d6c907f527c838888ae7567022bf51a +size 243928 diff --git a/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu1_wdf/onnxruntime_vitisai_ep.dll b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu1_wdf/onnxruntime_vitisai_ep.dll new file mode 100644 index 00000000..06edb713 --- /dev/null +++ b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu1_wdf/onnxruntime_vitisai_ep.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ae118083bf8b500ba1bc216f79743769426b15b9fdc659dffc27157378b04bd +size 64899800 diff --git a/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu1_wdf/vaip_config_npu_1.json b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu1_wdf/vaip_config_npu_1.json new file mode 100644 index 00000000..8523b100 --- /dev/null +++ b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu1_wdf/vaip_config_npu_1.json @@ -0,0 +1,214 @@ +{ + "passes": [ + { + "name": "init", + "plugin": "vaip-pass_init" + }, + { + "name": "fuse_resize_norm", + "plugin": "vaip-pass_py_ext", + "disabled": false, + "pyExt": { + "moduleName": "voe.passes.fuse_resize_norm", + "methodName": "rules" + } + }, + { + "name": "fuse_softmax", + "plugin": "vaip-pass_py_ext", + "disabled": false, + "pyExt": { + "moduleName": "voe.passes.fuse_softmax", + "methodName": "rules" + } + }, + { + "name": "fuse_topk", + "plugin": "vaip-pass_py_ext", + "disabled": false, + "pyExt": { + "moduleName": "voe.passes.fuse_topk", + "methodName": "rules" + } + }, + { + "name": "fuse_decode_filter_boxes", + "plugin": "vaip-pass_py_ext", + "disabled": false, + "pyExt": { + "moduleName": "voe.passes.fuse_decode_filter_boxes", + "methodName": "rules" + } + }, + { + "name": "fuse_NMS", + "plugin": "vaip-pass_py_ext", + "disabled": true, + "pyExt": { + "moduleName": "voe.passes.fuse_NMS", + "methodName": "rules" + } + }, + { + "name": "fuse_DPU", + "plugin": "vaip-pass_level1_dpu", + "passDpuParam": { + "subPass": [ + { + "name": "convert_ending_blacklist_ops_to_unknown_op", + "plugin": "vaip-pass_convert_ending_blacklist_ops_to_unknown_op", + "disabled": false + }, + { + "name": "dynamic_input_batch", + "plugin": "vaip-pass_dynamic_input_batch" + }, + { + "name": "create_const_op", + "plugin": "vaip-pass_create_const_op" + }, + { + "name": "convert_to_xir_op", + "plugin": "vaip-pass_py_ext", + "pyExt": { + "moduleName": "voe.passes.convert_to_xir_op", + "methodName": "rules" + } + }, + { + "name": "to_xir", + "plugin": "vaip-pass_to_xir_ops" + }, + { + "name": "remove_extra_q_dq", + "plugin": "vaip-pass_remove_extra_q_dq" + }, + { + "name": "merge_add_into_conv_bias", + "plugin": "vaip-pass_merge_add_into_conv_bias" + }, + { + "name": "merge_fix", + "plugin": "vaip-pass_merge_fix", + "enableGc": true + }, + { + "name": "layoutransform", + "plugin": "vaip-pass_layout_transform_via_adding_transpose" + }, + { + "name": "gc_after_layout_transform", + "plugin": "vaip-pass_remove_isolated_node" + }, + { + "name": "fuse_transpose", + "plugin": "vaip-pass_fuse_transpose", + "enableGc": true + }, + { + "name": "gc_after_fuse_transpose", + "plugin": "vaip-pass_remove_isolated_node" + }, + { + "name": "remove_identity", + "plugin": "vaip-pass_remove_identity", + "logVerbosity": 1 + }, + { + "name": "add_fix_after_const", + "plugin": "vaip-pass_const_add_fix" + }, + { + "name": "remove_reshape_fix", + "plugin": "vaip-pass_py_ext", + "pyExt": { + "moduleName": "voe.passes.remove_reshape_fix", + "methodName": "rules" + } + }, + { + "name": "const_fold_batchnorm_to_scale", + "plugin": "vaip-pass_py_ext", + "pyExt": { + "moduleName": "voe.passes.const_fold_batchnorm_to_scale", + "methodName": "rules" + } + }, + { + "name": "const_fold_transpose", + "plugin": "vaip-pass_const_fold_transpose" + }, + { + "name": "merge_pad", + "plugin": "vaip-pass_merge_pad" + }, + { + "name": "merge_hard_sigmoid", + "plugin": "vaip-pass_merge_hard_sigmoid" + }, + { + "name": "merge_mul", + "plugin": "vaip-pass_py_ext", + "enableGc": true, + "pyExt": { + "moduleName": "voe.passes.merge_mul", + "methodName": "rules" + } + }, + { + "name": "merge_consecutive_fix", + "plugin": "vaip-pass_merge_consecutive_fix", + "disabled": true, + "enableLog": true, + "logVerbosity": 1 + }, + { + "_comment": "test case 110", + "name": "convert_softmax_to_hard_softmax", + "plugin": "vaip-pass_py_ext", + "disabled": true, + "pyExt": { + "moduleName": "voe.passes.convert_softmax_to_hard_softmax", + "methodName": "rules" + } + }, + { + "name": "merge_fix_fix_transpose", + "plugin": "vaip-pass_py_ext", + "enableGc": true, + "disabled": true, + "pyExt": { + "moduleName": "voe.passes.merge_fix_fix_transpose", + "methodName": "rules" + } + }, + { + "name": "final_gc", + "plugin": "vaip-pass_remove_isolated_node" + } + ], + "xcompilerAttrs": { + "debug_mode": { + "stringValue": "performance" + }, + "dpu_subgraph_num": { + "uintValue": 32 + }, + "opt_level": { + "uintValue": 0 + }, + "dump_subgraph_ops": { + "boolValue": false + }, + "profile": { + "uintValue": 0 + }, + "disable_std_quant": { + "boolValue": false + } + }, + "minimum_num_of_conv": 2 + } + } + ] +} diff --git a/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu2/AMD_AIE2P_4x4_Overlay.xclbin b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu2/AMD_AIE2P_4x4_Overlay.xclbin new file mode 100644 index 00000000..da632c17 Binary files /dev/null and b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu2/AMD_AIE2P_4x4_Overlay.xclbin differ diff --git a/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu2/AMD_AIE2P_4x4_Overlay_CFG0.xclbin b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu2/AMD_AIE2P_4x4_Overlay_CFG0.xclbin new file mode 100644 index 00000000..de3fa11f Binary files /dev/null and b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu2/AMD_AIE2P_4x4_Overlay_CFG0.xclbin differ diff --git a/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu2/AMD_AIE2P_4x4_Overlay_CFG1.xclbin b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu2/AMD_AIE2P_4x4_Overlay_CFG1.xclbin new file mode 100644 index 00000000..f847a577 Binary files /dev/null and b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu2/AMD_AIE2P_4x4_Overlay_CFG1.xclbin differ diff --git a/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu2/AMD_AIE2P_Nx4_Overlay.xclbin b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu2/AMD_AIE2P_Nx4_Overlay.xclbin new file mode 100644 index 00000000..f04206bb Binary files /dev/null and b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu2/AMD_AIE2P_Nx4_Overlay.xclbin differ diff --git a/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu2/onnxruntime.dll b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu2/onnxruntime.dll new file mode 100644 index 00000000..904cd921 --- /dev/null +++ b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu2/onnxruntime.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32897f6f4616fdba87db4649f30c5b8c3fc3a889bcfaac35aff20e2e7a3a465b +size 18930896 diff --git a/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu2/onnxruntime_providers_shared.dll b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu2/onnxruntime_providers_shared.dll new file mode 100644 index 00000000..ee74d35b --- /dev/null +++ b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu2/onnxruntime_providers_shared.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bab6360d3efd2f90fd3218602b6bc2aa5a80c65381486864a006c616cabda46 +size 22224 diff --git a/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu2/onnxruntime_providers_vitisai.dll b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu2/onnxruntime_providers_vitisai.dll new file mode 100644 index 00000000..6c66d362 --- /dev/null +++ b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu2/onnxruntime_providers_vitisai.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c628b1d3327eb7ac4b9219c826c305c53c4f8979a322866b3a4e855de96ad895 +size 314064 diff --git a/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu2/onnxruntime_vitis_ai_custom_ops.dll b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu2/onnxruntime_vitis_ai_custom_ops.dll new file mode 100644 index 00000000..a27159d1 --- /dev/null +++ b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu2/onnxruntime_vitis_ai_custom_ops.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54e233cd1a1d86df48a206565c633db9abfb65560b3732ff0ecee13f88637b62 +size 19033808 diff --git a/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu2/onnxruntime_vitisai_ep.dll b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu2/onnxruntime_vitisai_ep.dll new file mode 100644 index 00000000..00d78693 --- /dev/null +++ b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu2/onnxruntime_vitisai_ep.dll @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86b694a371624063243021a92c12a4d8edf4da02dcb9aad7e337a010931d86c1 +size 78634192 diff --git a/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu2/vaip_config_npu_2.json b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu2/vaip_config_npu_2.json new file mode 100644 index 00000000..15e195a5 --- /dev/null +++ b/example/Ryzen-AI-Library/windows/onnx/ryzenAI/npu2/vaip_config_npu_2.json @@ -0,0 +1,1019 @@ +{ + "passes": [ + { + "name": "init", + "plugin": "vaip-pass_init" + }, + { + "name": "fuse_resize_norm", + "plugin": "vaip-pass_py_ext", + "disabled": false, + "pyExt": { + "moduleName": "voe.passes.fuse_resize_norm", + "methodName": "rules" + } + }, + { + "name": "fuse_softmax", + "plugin": "vaip-pass_py_ext", + "disabled": false, + "pyExt": { + "moduleName": "voe.passes.fuse_softmax", + "methodName": "rules" + } + }, + { + "name": "fuse_topk", + "plugin": "vaip-pass_py_ext", + "disabled": false, + "pyExt": { + "moduleName": "voe.passes.fuse_topk", + "methodName": "rules" + } + }, + { + "name": "fuse_decode_filter_boxes", + "plugin": "vaip-pass_py_ext", + "disabled": false, + "pyExt": { + "moduleName": "voe.passes.fuse_decode_filter_boxes", + "methodName": "rules" + } + }, + { + "name": "vaip_pass_norm_k", + "plugin": "vaip-pass_norm_k", + "enable_gc": true, + "disabled": false + }, + { + "name": "fuse_NMS", + "plugin": "vaip-pass_py_ext", + "disabled": true, + "pyExt": { + "moduleName": "voe.passes.fuse_NMS", + "methodName": "rules" + } + }, + { + "name": "fuse_dynamic_dispatch", + "plugin": "vaip-pass_level1_dd", + "passDpuParam": { + "subPass": [ + { + "name": "dd_compiler_pass_transformation", + "plugin": "vaip-pass_py_ext", + "pyExt": { + "moduleName": "voe.passes.dynamic_dispatch", + "methodName": "rules" + } + }, + { + "name": "dd_compiler_pass_generation", + "plugin": "vaip-pass_py_ext", + "pyExt": { + "moduleName": "voe.passes.dynamic_dispatch_gen", + "methodName": "rules" + } + }, + { + "name": "dd_compiler_pass_transformation_2", + "plugin": "vaip-pass_py_ext", + "pyExt": { + "moduleName": "voe.passes.dynamic_dispatch_2", + "methodName": "rules" + } + }, + { + "name": "dd_compiler_pass_generation_2", + "plugin": "vaip-pass_py_ext", + "pyExt": { + "moduleName": "voe.passes.dynamic_dispatch_gen_2", + "methodName": "rules" + } + } + ] + } + }, + { + "name": "fuse_DPU", + "plugin": "vaip-pass_level1_dpu", + "passDpuParam": { + "subPass": [ + { + "name": "convert_ending_blacklist_ops_to_unknown_op", + "plugin": "vaip-pass_convert_ending_blacklist_ops_to_unknown_op", + "disabled": false + }, + { + "name": "dynamic_input_batch", + "plugin": "vaip-pass_dynamic_input_batch" + }, + { + "name": "create_const_op", + "plugin": "vaip-pass_create_const_op" + }, + { + "name": "convert_split_to_xir", + "plugin": "vaip-pass_convert_split_to_xir_op" + }, + { + "name": "to_xir", + "plugin": "vaip-pass_to_xir_ops" + }, + { + "name": "convert_pad", + "plugin": "vaip-pass_convert_pad", + "enableGc": true + }, + { + "name": "convert_in_to_gn", + "plugin": "vaip-pass_convert_instancenorm_to_groupnorm", + "enableGc": true + }, + { + "name": "remove_extra_q_dq", + "plugin": "vaip-pass_remove_extra_q_dq" + }, + { + "name": "merge_add_into_conv_bias", + "plugin": "vaip-pass_merge_add_into_conv_bias" + }, + { + "name": "merge_fix", + "plugin": "vaip-pass_merge_fix", + "enableGc": true + }, + { + "name": "layoutransform", + "plugin": "vaip-pass_layout_transform_via_adding_transpose" + }, + { + "name": "gc_after_layout_transform", + "plugin": "vaip-pass_remove_isolated_node" + }, + { + "name": "fuse_transpose", + "plugin": "vaip-pass_fuse_transpose", + "enableGc": true + }, + { + "name": "gc_after_fuse_transpose", + "plugin": "vaip-pass_remove_isolated_node" + }, + { + "name": "remove_identity", + "plugin": "vaip-pass_remove_identity", + "logVerbosity": 1 + }, + { + "name": "add_fix_after_const", + "plugin": "vaip-pass_const_add_fix" + }, + { + "name": "remove_reshape_fix", + "plugin": "vaip-pass_py_ext", + "disabled": true, + "pyExt": { + "moduleName": "voe.passes.remove_reshape_fix", + "methodName": "rules" + } + }, + { + "name": "const_fold_batchnorm_to_scale", + "plugin": "vaip-pass_const_fold_batchnorm_to_scale" + }, + { + "name": "const_fold_transpose", + "plugin": "vaip-pass_const_fold_transpose" + }, + { + "name": "merge_pad", + "plugin": "vaip-pass_merge_pad" + }, + { + "name": "merge_hard_sigmoid", + "plugin": "vaip-pass_merge_hard_sigmoid" + }, + { + "name": "merge_mul", + "plugin": "vaip-pass_merge_mul", + "enableGc": true + }, + { + "name": "merge_consecutive_fix", + "plugin": "vaip-pass_merge_consecutive_fix", + "disabled": true, + "enableLog": true, + "logVerbosity": 1 + }, + { + "_comment": "test case 110", + "name": "convert_softmax_to_hard_softmax", + "plugin": "vaip-pass_convert_softmax_to_hard_softmax", + "disabled": true + }, + { + "name": "merge_fix_fix_transpose", + "plugin": "vaip-pass_merge_fix_fix_transpose", + "enableGc": true, + "disabled": true + }, + { + "name": "final_gc", + "plugin": "vaip-pass_remove_isolated_node" + } + ], + "xcompilerAttrs": { + "debug_mode": { + "stringValue": "performance" + }, + "dpu_subgraph_num": { + "uintValue": 32 + }, + "opt_level": { + "uintValue": 3 + }, + "dump_subgraph_ops": { + "boolValue": false + }, + "profile": { + "uintValue": 0 + }, + "disable_std_quant": { + "boolValue": false + }, + "enable_fast_pm": { + "boolValue": true + } + }, + "minimum_num_of_conv": 2 + } + }, + { + "name": "fuse_DPU_MHA", + "plugin": "vaip-pass_level1_dpu", + "passDpuParam": { + "subPass": [ + { + "name": "convert_MHA", + "plugin": "vaip-pass_convert_MHA", + "enableGc": true + }, + { + "name": "convert_ending_blacklist_ops_to_unknown_op", + "plugin": "vaip-pass_convert_ending_blacklist_ops_to_unknown_op", + "disabled": false + }, + { + "name": "dynamic_input_batch", + "plugin": "vaip-pass_dynamic_input_batch" + }, + { + "name": "create_const_op", + "plugin": "vaip-pass_create_const_op" + }, + { + "name": "convert_split_to_xir", + "plugin": "vaip-pass_convert_split_to_xir_op" + }, + { + "name": "to_xir", + "plugin": "vaip-pass_to_xir_ops" + }, + { + "name": "convert_pad", + "plugin": "vaip-pass_convert_pad", + "enableGc": true + }, + { + "name": "convert_in_to_gn", + "plugin": "vaip-pass_convert_instancenorm_to_groupnorm", + "enableGc": true + }, + { + "name": "remove_extra_q_dq", + "plugin": "vaip-pass_remove_extra_q_dq" + }, + { + "name": "merge_add_into_conv_bias", + "plugin": "vaip-pass_merge_add_into_conv_bias" + }, + { + "name": "merge_fix", + "plugin": "vaip-pass_merge_fix", + "enableGc": true + }, + { + "name": "layoutransform", + "plugin": "vaip-pass_layout_transform_via_adding_transpose" + }, + { + "name": "gc_after_layout_transform", + "plugin": "vaip-pass_remove_isolated_node" + }, + { + "name": "fuse_transpose", + "plugin": "vaip-pass_fuse_transpose", + "enableGc": true + }, + { + "name": "gc_after_fuse_transpose", + "plugin": "vaip-pass_remove_isolated_node" + }, + { + "name": "remove_identity", + "plugin": "vaip-pass_remove_identity", + "logVerbosity": 1 + }, + { + "name": "add_fix_after_const", + "plugin": "vaip-pass_const_add_fix" + }, + { + "name": "const_fold_batchnorm_to_scale", + "plugin": "vaip-pass_const_fold_batchnorm_to_scale" + }, + { + "name": "const_fold_transpose", + "plugin": "vaip-pass_const_fold_transpose" + }, + { + "name": "merge_pad", + "plugin": "vaip-pass_merge_pad" + }, + { + "name": "merge_hard_sigmoid", + "plugin": "vaip-pass_merge_hard_sigmoid" + }, + { + "name": "merge_mul", + "plugin": "vaip-pass_merge_mul", + "enableGc": true + }, + { + "name": "merge_consecutive_fix", + "plugin": "vaip-pass_merge_consecutive_fix", + "disabled": true, + "enableLog": true, + "logVerbosity": 1 + }, + { + "_comment": "test case 110", + "name": "convert_softmax_to_hard_softmax", + "plugin": "vaip-pass_convert_softmax_to_hard_softmax", + "disabled": true + }, + { + "name": "merge_fix_fix_transpose", + "plugin": "vaip-pass_merge_fix_fix_transpose", + "enableGc": true, + "disabled": true + }, + { + "name": "final_gc", + "plugin": "vaip-pass_remove_isolated_node" + } + ], + "xcompilerAttrs": { + "debug_mode": { + "stringValue": "performance" + }, + "dpu_subgraph_num": { + "uintValue": 1000 + }, + "opt_level": { + "uintValue": 65536 + }, + "dump_subgraph_ops": { + "boolValue": false + }, + "profile": { + "uintValue": 0 + }, + "disable_std_quant": { + "boolValue": false + }, + "enable_fast_pm": { + "boolValue": true + } + }, + "minimum_num_of_conv": 2, + "xclbin": "AMD_AIE2P_4x4_Overlay_CFG2.xclbin" + } + }, + { + "name": "vaiml_partition", + "plugin": "vaip-pass_vaiml_partition", + "vaiml_config": { + "vaiml_model_path": "vaiml_par_0", + "max_num_partitions": 200, + "device_name": "phx", + "debug": true + } + }, + { + "name": "vaip_pass_dd_merge_dqcastgather", + "plugin": "vaip-pass_dd_merge_dqcastgather", + "enable_gc": true, + "disabled": false + }, + { + "name": "vaip_pass_dd_merge_qop", + "plugin": "vaip-pass_dd_merge_qop", + "enable_gc": true, + "disabled": false + }, + { + "name": "vaip_pass_dd_merge_dqop", + "plugin": "vaip-pass_dd_merge_dqop", + "enable_gc": true, + "disabled": false + }, + { + "name": "vaip_pass_dd_merge_qop_onnx", + "plugin": "vaip-pass_dd_merge_qop_onnx", + "enable_gc": true, + "disabled": false + }, + { + "name": "vaip_pass_dd_merge_dqop_onnx", + "plugin": "vaip-pass_dd_merge_dqop_onnx", + "enable_gc": true, + "disabled": false + } + ], + "mepTable": [ + { + "modelName": "PSA", + "md5sumOnDisk": "356f009d5cb141881fe4c27d09b1f43b", + "md5sumInMemory": "ca42121518cca903f07262b8f2751a42", + "target": "RyzenAI_shell_config_1" + }, + { + "modelName": "PSO0_A", + "md5sumOnDisk": "17cbedd0695d097f1226797bee711710", + "md5sumInMemory": "74ded15705d2c958177604029a20a208", + "target": "RyzenAI_shell_config_1" + }, + { + "modelName": "PSO0_B", + "md5sumOnDisk": "ee58a2f6e0bbd99e9921731be4b0a15c", + "md5sumInMemory": "3ee8e6a8f08912a9a92a260b68447bb2", + "target": "RyzenAI_shell_config_1" + }, + { + "modelName": "PSO0_0", + "md5sumOnDisk": "da35c43876d1b21835a08320688a69d2", + "md5sumInMemory": "452a80b01d29ebc42559d59b42de03ca", + "target": "RyzenAI_shell_config_1" + }, + { + "modelName": "PSO0_1", + "md5sumOnDisk": "da35c43876d1b21835a08320688a69d2", + "md5sumInMemory": "4853206b53d2cae2a40aad448d73370c", + "target": "RyzenAI_shell_config_1" + }, + { + "modelName": "PSO0_2", + "md5sumOnDisk": "0abe0b0bbc8314b482f0737da3d831ad", + "md5sumInMemory": "ee7f757248851d28061617f87043805f", + "target": "RyzenAI_shell_config_1" + }, + { + "modelName": "PSO0_3", + "md5sumOnDisk": "a87968c033291ab04069feaafb5fd7df", + "md5sumInMemory": "19ef4a7ffa9c9ab3871ea3142db4a5db", + "target": "RyzenAI_shell_config_1" + }, + { + "modelName": "PSO0_4", + "md5sumOnDisk": "ab9c6f1bb23d04765708622f5e48c0da", + "md5sumInMemory": "4c54574f384ddf99ed01b09bd249ca8b", + "target": "RyzenAI_shell_config_1" + }, + { + "modelName": "mmjbka", + "md5sumOnDisk": "da35c43876d1b21835a08320688a69d2", + "md5sumInMemory": "352de20cfd0a050f6083eb661237c6cc", + "target": "RyzenAI_shell_config_1" + }, + { + "modelName": "mmjbka_MSFT", + "md5sumOnDisk": "F42BC6AC686C72B8CAA38DE28DFAA553", + "md5sumInMemory": "F42BC6AC686C72B8CAA38DE28DFAA553", + "target": "RyzenAI_shell_config_1" + }, + { + "modelName": "PSA_1", + "md5sumOnDisk": "356f009d5cb141881fe4c27d09b1f43b", + "md5sumInMemory": "ee9a5fc4b79342b98049c4826983f18e", + "target": "RyzenAI_shell_config_1" + }, + { + "modelName": "mmjbka_1", + "md5sumOnDisk": "da35c43876d1b21835a08320688a69d2", + "md5sumInMemory": "27b186167f3a3957b32141846bcf81f8", + "target": "RyzenAI_shell_config_1" + }, + { + "modelName": "PSA_1.0.1_nchw", + "md5sumOnDisk": "356f009d5cb141881fe4c27d09b1f43b", + "md5sumInMemory": "bbea0a61a3df92f73df644d39bab7046", + "md5sumInMemoryWithIo": "92b1357557fa7ceaa73a48661bbe1ef8", + "target": "RyzenAI_shell_config_1" + }, + { + "modelName": "PSA6.3", + "md5sumOnDisk": "b5e9f87a18d925e43f6e74ef34a299c3", + "md5sumInMemory": "bbea0a61a3df92f73df644d39bab7046", + "md5sumInMemoryWithIo": "92b1357557fa7ceaa73a48661bbe1ef8", + "target": "RyzenAI_shell_config_1" + }, + { + "modelName": "PSO0_A_1.0.1_nchw", + "md5sumOnDisk": "17cbedd0695d097f1226797bee711710", + "md5sumInMemory": "6cefe9e4244aa0f3f8dbf21d43789e86", + "md5sumInMemoryWithIo": "e3497d38e57aa72df6aec42833b784a8", + "target": "RyzenAI_shell_config_1" + }, + { + "modelName": "PSO0_B_1.0.1_nchw", + "md5sumOnDisk": "ee58a2f6e0bbd99e9921731be4b0a15c", + "md5sumInMemory": "48f349c792e8e2a7562e092968750947", + "md5sumInMemoryWithIo": "dc6c1b8774c65f20fcbaaae86cd05f8c", + "target": "RyzenAI_shell_config_1" + }, + { + "modelName": "mmjbka_1.0.1_nchw", + "md5sumOnDisk": "da35c43876d1b21835a08320688a69d2", + "md5sumInMemory": "35527caf46c38e60ad74204a1d966847", + "md5sumInMemoryWithIo": "4b0f1adedf7f6e7f74b22dd8d1fb22fb", + "target": "RyzenAI_shell_config_1" + }, + { + "modelName": "PSO0_A_nhwc", + "md5sumOnDisk": "777bd69cc9e12b6e7868277e639f97e2", + "md5sumInMemory": "3a9abd050f70b09de3546f1d61e43c74", + "target": "RyzenAI_shell_config_1" + }, + { + "modelName": "PSO0_A_1.1.0_nhwc", + "md5sumOnDisk": "b9eb351ca7af65a2a43f99f41dda7dc2", + "md5sumInMemory": "e3f795258afe1d046f56fad0f8574864", + "md5sumInMemoryWithIo": "3af1b536b95d9eebe3190294e311f57a", + "target": "RyzenAI_shell_config_1" + }, + { + "modelName": "PSO0_B_1.1.0_nhwc", + "md5sumOnDisk": "fa6d63ec0a7674cb9464020802e5f3ca", + "md5sumInMemory": "862aea9de93c2e711ec842f942d9b8b1", + "md5sumInMemoryWithIo": "b54766ff357e41c4ca538d017e126385", + "target": "RyzenAI_shell_config_1" + }, + { + "modelName": "mmjbka_1.1.0_nhwc", + "md5sumOnDisk": "3a56581e403def2548e50c77808c4174", + "md5sumInMemory": "377a5a02ffbba68bc1fdd25b54f0f18b", + "md5sumInMemoryWithIo": "ac4f50ea2c10c863db4bfbd6ca024f6e", + "target": "RyzenAI_shell_config_1" + }, + { + "modelName": "PSF", + "md5sumOnDisk": "d38670a70c72561cb3f718125829b5fa", + "md5sumInMemory": "2fbcab29de9dd547562c46319a225a9c", + "md5sumInMemoryWithIo": "5c07dc0856549dabc4d092763e1ce5cf", + "xclbin": "4x2_psf_model_a8w8_qdq.xclbin", + "target": "RyzenAI_transformer_config_2" + }, + { + "modelName": "PSFv1.1", + "md5sumOnDisk": "c711cb8798e562011383bae4c5c91ce1", + "md5sumInMemory": "a394ffc0e58a8e841f5ae415c15a63e8", + "md5sumInMemoryWithIo": "037fca5fd1b0c7b195ef410c3e9b88df", + "xclbin": "4x2_psf_v1.1_model_a8w8_qdq.xclbin", + "target": "RyzenAI_transformer_config_2" + }, + { + "modelName": "PSH", + "md5sumOnDisk": "e5fd1d2783dca2bdc9f40e4941e2a9b9", + "md5sumInMemory": "62099384a6af2956912b2d31a99be483", + "md5sumInMemoryWithIo": "b3a1041f9de14dae3b52e711d8de0037", + "xclbin": "4x2_psh_model_a16w8_qdq.xclbin", + "target": "RyzenAI_transformer_config_2" + }, + { + "modelName": "PSHv1.2", + "md5sumOnDisk": "08678ce4a4eab9eaa2cd8b3dcbdf5697", + "md5sumInMemory": "62099384a6af2956912b2d31a99be483", + "md5sumInMemoryWithIo": "b3a1041f9de14dae3b52e711d8de0037", + "xclbin": "4x2_psh_model_a16w8_qdq.xclbin", + "target": "RyzenAI_transformer_config_2" + }, + { + "modelName": "mxpzi", + "md5sumOnDisk": "15d1515c86b40874ac954869798d0e77", + "md5sumInMemory": "546b8eca24af15302c647edb3e575d54", + "md5sumInMemoryWithIo": "9f9d91808166382b325459dfb88494c0", + "xclbin": "4x2_psj_model_a16w8_qdq.xclbin", + "target": "RyzenAI_transformer_config_2" + }, + { + "modelName": "PSIv1.1", + "md5sumOnDisk": "72651ec6ae2fe552dd0604527d73c1e5", + "md5sumInMemory": "0e3ff9de7ff3d7eddc7712316ca7ab11", + "md5sumInMemoryWithIo": "5a4479883ad7e7724442977a88a257e0", + "xclbin": "4x2_psi_integrated_model_a16w8_qdq.xclbin", + "target": "RyzenAI_transformer_config_2" + }, + { + "modelName": "PSIv1.2", + "md5sumOnDisk": "91aa788cdf570ebd5434bd7b8937feb2", + "md5sumInMemory": "6ed69ec59ba231b919877d12f81cabd3", + "xclbin": "4x2_psi_integrated_model_a16w8_qdq.xclbin", + "target": "RyzenAI_transformer_config_2" + }, + { + "modelName": "PSIv1.3", + "md5sumOnDisk": "028f6808733628e822b86c12d38df4b6", + "md5sumInMemory": "7af92f5ac4aefd28d0fd84a836322f5e", + "md5sumInMemoryWithIo": "2dc156817e5201dc51c39e821b9d5ec7", + "xclbin": "4x2_psi_integrated_model_a16w8_qdq.xclbin", + "target": "RyzenAI_transformer_config_2" + }, + { + "modelName": "PSIv1.4", + "md5sumOnDisk": "96fefa03d63137796293448db34d78e4", + "md5sumInMemory": "7af92f5ac4aefd28d0fd84a836322f5e", + "md5sumInMemoryWithIo": "8f4fbd7e1475b7b470e77449211455f2", + "xclbin": "4x2_psi_integrated_model_a16w8_qdq.xclbin", + "target": "RyzenAI_transformer_config_2" + }, + { + "modelName": "mtea0a", + "md5sumOnDisk": "4e17c61308b9170dda55586dee6c0751", + "md5sumInMemory": "58eb51eccd44a084b185159b67e2c1b6", + "md5sumInMemoryWithIo": "50c2fb23e40a0617f58ebcedbbfac359", + "xclbin": "4x2_psq_model_a8w8_qdq.xclbin", + "target": "RyzenAI_transformer_config_2" + }, + { + "modelName": "m7h4xjg", + "md5sumOnDisk": "4e485de54588d95209560c0a29049b68", + "md5sumInMemory": "4c121cc7cd35dc04c30f46a94b2baf7a", + "md5sumInMemoryWithIo": "3223d1f84b9dd740f3a829ad9680469f", + "xclbin": "4x2_psq2_model_a16w8_qdq.xclbin", + "target": "RyzenAI_transformer_config_2" + }, + { + "modelName": "mswbjvw-320", + "md5sumOnDisk": "4a6a777fc11158c1ca70ebdd6caae3fe", + "md5sumInMemory": "1bd58439b3a31d4e6edce0689e552ccf", + "md5sumInMemoryWithIo": "eb0cd98491be811c3acdf79d38981cd6", + "xclbin": "4x2_pso2_model_a16w16_qdq.xclbin", + "target": "RyzenAI_transformer_config_2" + }, + { + "modelName": "mswbjvw-640", + "md5sumOnDisk": "67927fb103d7fcdd3eeca44475eac6df", + "md5sumInMemory": "9313ce55730e051e4d32eb4f9986f1f2", + "md5sumInMemoryWithIo": "3bea2158bc5ec0d4ee3954cd73de3986", + "xclbin": "4x2_pso2_model_a16w16_qdq.xclbin", + "target": "RyzenAI_transformer_config_2" + }, + { + "modelName": "mswbjvw-1280", + "md5sumOnDisk": "dc4cfca432a4e09eb4275b55a485b126", + "md5sumInMemory": "1cf0b88f93d4d5d458e408abc0a4cf5d", + "md5sumInMemoryWithIo": "4abc5e4f8292dbe10da0f9eb12b0d1b4", + "xclbin": "4x2_pso2_model_a16w16_qdq.xclbin", + "target": "RyzenAI_transformer_config_2" + }, + { + "modelName": "mswbjvw-2560", + "md5sumOnDisk": "de50b0722e512613188a39429e70ead7", + "md5sumInMemory": "5dc5872cd47b9da7f64b6855b1035595", + "md5sumInMemoryWithIo": "8e5b703bb200bc7d7ea7a415e3139385", + "xclbin": "4x2_pso2_model_a16w16_qdq.xclbin", + "target": "RyzenAI_transformer_config_2" + }, + { + "modelName": "mswbjvw-320-v1.1", + "md5sumOnDisk": "e6b7d1656f92ca990c7ec94f8b17813d", + "md5sumInMemory": "d0d6cbd1d89f60347ab6c9453e35507e", + "md5sumInMemoryWithIo": "eb0cd98491be811c3acdf79d38981cd6", + "xclbin": "4x2_pso2_model_a16w16_qdq.xclbin", + "target": "RyzenAI_transformer_config_2" + }, + { + "modelName": "mswbjvw-640-v1.1", + "md5sumOnDisk": "7d789256f2d568367974b7b74fe9de71", + "md5sumInMemory": "be1539116a956da222b7b678a369fbf6", + "md5sumInMemoryWithIo": "3bea2158bc5ec0d4ee3954cd73de3986", + "xclbin": "4x2_pso2_model_a16w16_qdq.xclbin", + "target": "RyzenAI_transformer_config_2" + }, + { + "modelName": "mswbjvw-1280-v1.1", + "md5sumOnDisk": "b815ee6d465f51de249c4048aa2515cb", + "md5sumInMemory": "4333bedcd96799e4bc2fba9ec4746617", + "md5sumInMemoryWithIo": "4abc5e4f8292dbe10da0f9eb12b0d1b4", + "xclbin": "4x2_pso2_model_a16w16_qdq.xclbin", + "target": "RyzenAI_transformer_config_2" + }, + { + "modelName": "mswbjvw-2560-v1.1", + "md5sumOnDisk": "5d0c439244e9cf46d45d7ce249c0a3e7", + "md5sumInMemory": "577399f309357fc62e307c38945ce770", + "md5sumInMemoryWithIo": "8e5b703bb200bc7d7ea7a415e3139385", + "xclbin": "4x2_pso2_model_a16w16_qdq.xclbin", + "target": "RyzenAI_transformer_config_2" + }, + { + "modelName": "mzdk5", + "md5sumOnDisk": "0e94597db08d8e0fcd0e867b3ce2c686", + "md5sumInMemory": "f3b3206d1e984ce729cec3533287227e", + "md5sumInMemoryWithIo": "d61aa9b5c7af655c10b97533814c2244", + "xclbin": "4x4_psr_model_a16w8_qdq.xclbin", + "target": "RyzenAI_transformer_config_2" + }, + { + "modelName": "PST_v1.1", + "md5sumOnDisk": "92358bd7e8a68ea9c6e9d327423069e3", + "md5sumInMemory": "67d78f48fd05ce03e3efb69212243d30", + "md5sumInMemoryWithIo": "a807ae1f05fc42e16d57d59186a414b4", + "target": "RyzenAI_transformer_cxx_pss_pst", + "modelCategory": "PST" + }, + { + "modelName": "PSS_v1.1", + "md5sumOnDisk": "63a4651d48b4281ddf6a6a33ebad5fc7", + "md5sumInMemory": "60142dfa473572b34fbf476c37ebfa1b", + "md5sumInMemoryWithIo": "3c875c1144bcfa0dfd788b05116d589b", + "target": "RyzenAI_transformer_cxx_pss_pst", + "modelCategory": "PSS" + }, + { + "modelName": "PSS", + "md5sumOnDisk": "5ad2857510b5646376f3f9348591b83e", + "md5sumInMemory": "03daf292839461b5dd16b97c66dee37d", + "md5sumInMemoryWithIo": "d7a01430b6509651f0293c0d5e265a2a", + "target": "RyzenAI_xcompiler_and_dd_config" + }, + { + "modelName": "PST", + "md5sumOnDisk": "c8c2ab668b56daf7d2228e53c9a4f0db", + "md5sumInMemory": "62d1f9a68e9a1af013852ed3d1564d02", + "md5sumInMemoryWithIo": "4413357faa7c2f1ce6036f869f4d7e14", + "target": "RyzenAI_xcompiler_and_dd_config" + }, + { + "modelName": "PSS_v1.0", + "md5sumOnDisk": "fbf3fd6e6bab35efba46b7e9060f2d62", + "md5sumInMemory": "03daf292839461b5dd16b97c66dee37d", + "md5sumInMemoryWithIo": "d7a01430b6509651f0293c0d5e265a2a", + "target": "RyzenAI_vision_config_3_mha" + }, + { + "modelName": "PSS_nhwc", + "md5sumOnDisk": "1d46fbe6a09e79b36e21d985d937df3b", + "md5sumInMemory": "ccf646813e6e91ff09f9d4216047a6ec", + "md5sumInMemoryWithIo": "3c875c1144bcfa0dfd788b05116d589b", + "target": "RyzenAI_xcompiler_and_dd_config" + }, + { + "modelName": "PST_nhwc", + "md5sumOnDisk": "02c8e157824d0c75289f2333b307a5a9", + "md5sumInMemory": "087423cf961c2df293d363abd712d5d7", + "md5sumInMemoryWithIo": "6512311dac77f235e3ef637287389419", + "target": "RyzenAI_xcompiler_and_dd_config" + }, + { + "modelName": "GT_v1.2", + "md5sumOnDisk": "4daa45a72a36d731279b7c01e4545637", + "md5sumInMemory": "0fd6cc09fe78a6a5e4fb697c0e8670e9", + "target": "VAIML_config_0", + "xclbin": "4x4_gt_ht_00.xclbin" + }, + { + "modelName": "GT_v1.3", + "md5sumOnDisk": "d799de8b1e1fa572daad06d7a49a7afe", + "md5sumInMemory": "97bcaa432a2c634a707dfc23bf222032", + "target": "VAIML_config_0", + "xclbin": "4x4_gt_ht_01.xclbin" + }, + { + "modelName": "HT_v1.2", + "md5sumOnDisk": "01fa81ebc4c70ea06c2f4d28c16af389", + "md5sumInMemory": "6809517bbd0a7b44acc31942d410ca3a", + "md5sumInMemoryWithIo": "9731a32b64df8abdead6b6f370a293fc", + "target": "VAIML_config_0", + "xclbin": "4x4_gt_ht_01.xclbin" + } + ], + "target": "RyzenAI_vision_config_1", + "targets": [ + { + "name": "VAIML_config_0", + "pass": [ + "init", + "vaiml_partition", + "vaip_pass_dd_merge_dqcastgather", + "vaip_pass_dd_merge_qop", + "vaip_pass_dd_merge_dqop", + "vaip_pass_dd_merge_qop_onnx", + "vaip_pass_dd_merge_dqop_onnx" + ] + }, + { + "name": "RyzenAI_vision_config_1", + "pass": [ + "init", + "fuse_DPU" + ] + }, + { + "name": "RyzenAI_shell_config_1", + "xclbin": "AMD_AIE2P_2x4x1_Overlay.xclbin", + "share_hw_context": true, + "pass": [ + "init", + "fuse_DPU", + "vaip_pass_dd_merge_qop", + "vaip_pass_dd_merge_dqop", + "vaip_pass_dd_merge_qop_onnx", + "vaip_pass_dd_merge_dqop_onnx" + ], + "target_opts": { + "xcompilerAttrs": { + "dpu_subgraph_num": { + "uintValue": 1000 + }, + "opt_level": { + "uintValue": 3 + }, + "profile": { + "uintValue": 0 + }, + "enable_fast_pm": { + "boolValue": true + }, + "enable_weights_prefetch": { + "boolValue": true + }, + "enable_cost_model_tiling": { + "boolValue": true + }, + "enable_mergesync": { + "boolValue": true + } + } + }, + "graph_engine_qos_priority": 640 + }, + { + "name": "RyzenAI_vision_config_2", + "xclbin": "1x4.xclbin", + "pass": [ + "init", + "fuse_DPU" + ], + "target_opts": { + "xcompilerAttrs": { + "debug_mode": { + "stringValue": "performance" + }, + "dpu_subgraph_num": { + "uintValue": 16 + }, + "opt_level": { + "uintValue": 0 + }, + "dump_subgraph_ops": { + "boolValue": false + }, + "profile": { + "uintValue": 0 + }, + "disable_std_quant": { + "boolValue": false + } + } + } + }, + { + "name": "RyzenAI_vision_config_3", + "xclbin": "AMD_AIE2P_4x4_Overlay_CFG2.xclbin", + "pass": [ + "init", + "fuse_DPU" + ], + "target_opts": { + "xcompilerAttrs": { + "debug_mode": { + "stringValue": "performance" + }, + "dpu_subgraph_num": { + "uintValue": 1000 + }, + "opt_level": { + "uintValue": 65536 + } + } + } + }, + { + "name": "RyzenAI_vision_config_3_mha", + "xclbin": "AMD_AIE2P_4x4_Overlay_CFG2.xclbin", + "pass": [ + "init", + "fuse_DPU_MHA" + ], + "target_opts": { + "xcompilerAttrs": { + "debug_mode": { + "stringValue": "performance" + }, + "dpu_subgraph_num": { + "uintValue": 1000 + }, + "opt_level": { + "uintValue": 65536 + }, + "enable_fast_pm": { + "boolValue": true + }, + "advanced_opt": { + "boolValue": true + } + } + } + }, + { + "name": "RyzenAI_transformer_config_2", + "xclbin": "4x2_psf_model_a8w8_qdq.xclbin", + "pass": [ + "init", + "fuse_dynamic_dispatch" + ] + }, + { + "name": "RyzenAI_xcompiler_and_dd_config", + "xclbin": "2x4x2_pss_pst_model_mha_qdq.xclbin", + "pass": [ + "init", + "fuse_DPU_MHA", + "fuse_dynamic_dispatch" + ], + "target_opts": { + "xcompilerAttrs": { + "debug_mode": { + "stringValue": "performance" + }, + "dpu_subgraph_num": { + "uintValue": 1000 + }, + "opt_level": { + "uintValue": 65536 + }, + "enable_fast_pm": { + "boolValue": true + }, + "advanced_opt": { + "boolValue": true + } + } + } + }, + { + "name": "RyzenAI_transformer_cxx_pss_pst", + "xclbin": "2x4x2_pss_pst_4x2_psq_model_qdq.xclbin", + "share_hw_context": true, + "pass": [ + "init", + "fuse_DPU_MHA", + "fuse_dynamic_dispatch_pss_pst" + ], + "target_opts": { + "xcompilerAttrs": { + "debug_mode": { + "stringValue": "performance" + }, + "dpu_subgraph_num": { + "uintValue": 1000 + }, + "opt_level": { + "uintValue": 65536 + }, + "enable_fast_pm": { + "boolValue": true + }, + "advanced_opt": { + "boolValue": true + } + } + } + } + ], + "enable_cache_file_io_in_mem": true +} diff --git a/example/Ryzen-AI-Library/windows/vaip_config.json b/example/Ryzen-AI-Library/windows/vaip_config.json deleted file mode 100644 index a2debc82..00000000 --- a/example/Ryzen-AI-Library/windows/vaip_config.json +++ /dev/null @@ -1,287 +0,0 @@ -{ - "passes": [ - { - "name": "init", - "plugin": "vaip-pass_init" - }, - { - "name": "fuse_resize_norm", - "plugin": "vaip-pass_py_ext", - "disabled": false, - "pyExt": { - "moduleName": "voe.passes.fuse_resize_norm", - "methodName": "rules" - } - }, - { - "name": "fuse_decode_filter_boxes", - "plugin": "vaip-pass_py_ext", - "disabled": false, - "pyExt": { - "moduleName": "voe.passes.fuse_decode_filter_boxes", - "methodName": "rules" - } - }, - { - "name": "fuse_NMS", - "plugin": "vaip-pass_py_ext", - "disabled": true, - "pyExt": { - "moduleName": "voe.passes.fuse_NMS", - "methodName": "rules" - } - }, - { - "name": "fuse_DPU", - "plugin": "vaip-pass_level1_dpu", - "passDpuParam": { - "subPass": [ - { - "_comment" : "# issue 1048", - "name": "convert_ending_blacklist_ops_to_unknown_op", - "plugin": "vaip-pass_convert_ending_blacklist_ops_to_unknown_op", - "disabled": false - }, - { - "_comment" : "test case : yolov5s6", - "name": "manual_partition", - "plugin": "vaip-pass_manual_partition", - "disabled": true, - "manualPartition": { - "fromOps": [ - "1745/duplicated_token_14", - "1764/duplicated_token_10", - "1783/duplicated_token_6", - "1802/duplicated_token_2" - ], - "toOps": [ - "2895" - ] - } - }, - { - "name": "dynamic_input_batch", - "plugin": "vaip-pass_dynamic_input_batch" - }, - { - "_comment" : "test case q_operator_resnet50", - "name": "convert_qlinear_to_qdq", - "plugin": "vaip-pass_py_ext", - "disabled": true, - "enableGc": true, - "pyExt": { - "moduleName": "voe.passes.convert_qlinear_to_qdq", - "methodName": "rules" - } - }, - { - "name": "create_const_op", - "plugin": "vaip-pass_create_const_op" - }, - { - "name": "convert_to_xir_op", - "plugin": "vaip-pass_py_ext", - "disabled" : false, - "pyExt": { - "moduleName": "voe.passes.convert_to_xir_op", - "methodName": "rules" - } - }, - { - "name": "to_xir", - "plugin": "vaip-pass_to_xir_ops" - }, - { - "name": "remove_extra_q_dq", - "plugin": "vaip-pass_remove_extra_q_dq" - }, - { - "name": "merge_add_into_conv_bias", - "plugin": "vaip-pass_merge_add_into_conv_bias" - }, - { - "name": "merge_fix", - "plugin": "vaip-pass_py_ext", - "enableGc": true, - "pyExt": { - "moduleName": "voe.passes.merge_fix", - "methodName": "rules" - } - }, - { - "name": "layoutransform", - "plugin": "vaip-pass_layout_transform_via_adding_transpose" - }, - { - "name": "gc_after_layout_transform", - "plugin": "vaip-pass_remove_isolated_node" - }, - { - "name": "fuse_transpose", - "plugin": "vaip-pass_fuse_transpose", - "enableGc": true - }, - { - "name": "gc_after_fuse_transpose", - "plugin": "vaip-pass_remove_isolated_node" - }, - { - "name": "remove_identity", - "plugin": "vaip-pass_remove_identity", - "logVerbosity": 1 - }, - { - "name": "add_fix_after_const", - "plugin": "vaip-pass_const_add_fix" - }, - { - "_comment" : "test case 41 see issue #611 #626 for more detail", - "name": "merge_duplicated_fix", - "plugin": "vaip-pass_merge_duplicated_fix", - "disabled": true, - "enableGc": true - }, - { - "_comment": "test case 112", - "name": "remove_reshape_fix", - "plugin": "vaip-pass_py_ext", - "pyExt": { - "moduleName": "voe.passes.remove_reshape_fix", - "methodName": "rules" - } - }, - { - "_comment" : "test case 5", - "name": "const_fold_batchnorm_to_scale", - "plugin": "vaip-pass_py_ext", - "pyExt": { - "moduleName": "voe.passes.const_fold_batchnorm_to_scale", - "methodName": "rules" - } - }, - { - "name": "const_fold_transpose", - "plugin": "vaip-pass_const_fold_transpose" - }, - { - "name": "merge_pad", - "plugin": "vaip-pass_merge_pad" - }, - { - "name": "merge_hard_sigmoid", - "plugin": "vaip-pass_merge_hard_sigmoid" - }, - { - "_comment" : "test case 112", - "name": "merge_mul", - "plugin": "vaip-pass_py_ext", - "pyExt": { - "moduleName": "voe.passes.merge_mul", - "methodName": "rules" - } - }, - { - "name": "merge_consecutive_fix", - "plugin": "vaip-pass_merge_consecutive_fix", - "disabled": true, - "enableLog": true, - "logVerbosity": 1 - }, - { - "name": "graph_output_add_node", - "plugin": "vaip-pass_graph_output_add_node", - "disabled": true - }, - { - "_comment" : "test case 20", - "name": "convert_transpose_add_fix_input_fix_input", - "plugin": "vaip-pass_py_ext", - "disabled": true, - "pyExt": { - "moduleName": "voe.passes.convert_transpose_add_fix_input_fix_input", - "methodName": "process" - } - }, - { - "_comment" : "test case 100", - "name": "convert_transpose_fix_pad_fix_input", - "plugin": "vaip-pass_py_ext", - "disabled": true, - "pyExt": { - "moduleName": "voe.passes.convert_transpose_fix_pad_fix_input", - "methodName": "process" - } - }, - { - "_comment" : "test case 100", - "name": "convert_transpose_fix_input", - "plugin": "vaip-pass_py_ext", - "enableGc": true, - "disabled": true, - "pyExt": { - "moduleName": "voe.passes.convert_transpose_fix_input", - "methodName": "process" - } - }, - { - "_comment": "test case 110", - "name": "convert_softmax_to_hard_softmax", - "plugin": "vaip-pass_py_ext", - "disabled" : true, - "pyExt": { - "moduleName": "voe.passes.convert_softmax_to_hard_softmax", - "methodName": "rules" - } - }, - { - "_comment": "test case 43", - "name": "remove_top_transpose", - "plugin": "vaip-pass_merge_input_transpose", - "disabled": true, - "enableGc": true - }, - { - "_comment": "test case 110", - "name": "remove_bottom_transpose", - "plugin": "vaip-pass_remove_bottom_transpose", - "disabled": true, - "enableGc": true - }, - { - "name": "final_gc", - "plugin": "vaip-pass_remove_isolated_node" - } - ], - "xcompilerAttrs": { - "debug_mode" : { - "stringValue" : "performance" - }, - "dpu_subgraph_num" : { - "intValue" : 32 - }, - "opt_level" : { - "intValue" : 0 - }, - "dump_subgraph_ops" : { - "boolValue" : false - }, - "profile" : { - "boolValue" : false - }, - "prefetch" : { - "boolValue" : false - }, - "preassign" : { - "boolValue" : false - }, - "disable_std_quant" : { - "boolValue" : false - }, - "concat_skip_code_gen" : { - "boolValue" : false - } - } - } - } - ] -}