diff --git a/libkineto/CMakeLists.txt b/libkineto/CMakeLists.txt index 5ee1aff..1355079 100644 --- a/libkineto/CMakeLists.txt +++ b/libkineto/CMakeLists.txt @@ -26,6 +26,7 @@ set(KINETO_LIBRARY_TYPE "default" CACHE STRING "Type of library (default, static or shared) to build") set_property(CACHE KINETO_LIBRARY_TYPE PROPERTY STRINGS default shared static) option(KINETO_BUILD_TESTS "Build kineto unit tests" ON) +option(KINETO_BUILD_MSPTI_PLUGIN "Build MSPTI dynamic plugin for Kineto" OFF) set(LIBKINETO_DIR "${CMAKE_CURRENT_SOURCE_DIR}") set(LIBKINETO_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src") @@ -84,6 +85,10 @@ if(NOT DEFINED LIBKINETO_NOAIUPTI) add_subdirectory(src/plugin/aiupti) endif() +if(KINETO_BUILD_MSPTI_PLUGIN) + add_subdirectory(src/plugin/mspti_dynamic) +endif() + # Define file lists if(LIBKINETO_NOCUPTI AND LIBKINETO_NOROCTRACER AND LIBKINETO_NOXPUPTI AND LIBKINETO_NOAIUPTI) get_filelist("get_libkineto_cpu_only_srcs(with_api=False)" LIBKINETO_SRCS) diff --git a/libkineto/src/dynamic_plugin/PluginLoader.h b/libkineto/src/dynamic_plugin/PluginLoader.h index 2a6af5c..7fdb3d6 100644 --- a/libkineto/src/dynamic_plugin/PluginLoader.h +++ b/libkineto/src/dynamic_plugin/PluginLoader.h @@ -13,11 +13,11 @@ namespace libkineto { #ifdef _WIN32 -constexpr const char* kPluginExtension = "dll"; +constexpr const char* kPluginExtension = ".dll"; #elif defined(__linux__) || defined(__APPLE__) -constexpr const char* kPluginExtension = "so"; +constexpr const char* kPluginExtension = ".so"; #else -constexpr const char* kPluginExtension = "DONOTMATCHANYTHING"; +constexpr const char* kPluginExtension = ".DONOTMATCHANYTHING"; #endif class PluginRegistry { diff --git a/libkineto/src/dynamic_plugin/PluginTraceBuilder.h b/libkineto/src/dynamic_plugin/PluginTraceBuilder.h index 0e184c6..51069ca 100644 --- a/libkineto/src/dynamic_plugin/PluginTraceBuilder.h +++ b/libkineto/src/dynamic_plugin/PluginTraceBuilder.h @@ -39,7 +39,7 @@ class PluginTraceBuilder { // Handle versioning // Currently expect the exact same version if (pProfileEvent->unpaddedStructSize < - KINETO_PLUGIN_PROFILER_PROCESS_EVENTS_PARAMS_UNPADDED_STRUCT_SIZE) { + KINETO_PLUGIN_PROFILE_EVENT_UNPADDED_STRUCT_SIZE) { LOG(ERROR) << "Profile event has an incompatible version"; return -1; } diff --git a/libkineto/src/plugin/mspti_dynamic/CMakeLists.txt b/libkineto/src/plugin/mspti_dynamic/CMakeLists.txt new file mode 100644 index 0000000..8789692 --- /dev/null +++ b/libkineto/src/plugin/mspti_dynamic/CMakeLists.txt @@ -0,0 +1,37 @@ +cmake_minimum_required(VERSION 3.16) + +set(MSPTI_ROOT "" CACHE PATH "Path to MSPTI SDK root (contains include/ and lib64/)") + +if("${MSPTI_ROOT}" STREQUAL "") + if(DEFINED ENV{MSPTI_ROOT}) + set(MSPTI_ROOT "$ENV{MSPTI_ROOT}") + elseif(DEFINED ENV{ASCEND_HOME_PATH}) + set(MSPTI_ROOT "$ENV{ASCEND_HOME_PATH}/tools/mspti") + else() + set(MSPTI_ROOT "/root/miniconda3/envs/ascend/Ascend/ascend-toolkit/latest/toolkit/tools/mspti") + endif() +endif() + +set(MSPTI_INCLUDE_DIR "${MSPTI_ROOT}/include") +set(MSPTI_LIBRARY_DIR "${MSPTI_ROOT}/lib64") + +find_path(MSPTI_INCLUDE_PATH mspti.h PATHS "${MSPTI_INCLUDE_DIR}" NO_DEFAULT_PATH) +find_library(MSPTI_LIBRARY mspti PATHS "${MSPTI_LIBRARY_DIR}" NO_DEFAULT_PATH) + +if(NOT MSPTI_INCLUDE_PATH OR NOT MSPTI_LIBRARY) + message(WARNING "MSPTI plugin skipped: cannot find mspti headers/libs under ${MSPTI_ROOT}") + return() +endif() + +add_library(mspti_kineto_plugin SHARED MsptiKinetoDynamicPlugin.cpp) +target_compile_features(mspti_kineto_plugin PRIVATE cxx_std_17) + +target_include_directories(mspti_kineto_plugin PRIVATE + ${LIBKINETO_INCLUDE_DIR} + ${MSPTI_INCLUDE_PATH}) + +target_link_libraries(mspti_kineto_plugin PRIVATE ${MSPTI_LIBRARY}) + +set_target_properties(mspti_kineto_plugin PROPERTIES + OUTPUT_NAME "mspti_kineto_plugin" + POSITION_INDEPENDENT_CODE ON) diff --git a/libkineto/src/plugin/mspti_dynamic/MsptiKinetoDynamicPlugin.cpp b/libkineto/src/plugin/mspti_dynamic/MsptiKinetoDynamicPlugin.cpp new file mode 100644 index 0000000..bd77ef5 --- /dev/null +++ b/libkineto/src/plugin/mspti_dynamic/MsptiKinetoDynamicPlugin.cpp @@ -0,0 +1,692 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "KinetoDynamicPluginInterface.h" + + +namespace { + +constexpr size_t kAlignment = 8; +constexpr size_t kDefaultBufferSize = 8 * 1024 * 1024; + +struct PluginEvent { + KinetoPlugin_ProfileEventType type{KINETO_PLUGIN_PROFILE_EVENT_TYPE_INVALID}; + int64_t startNs{0}; + int64_t endNs{0}; + int64_t id{0}; + int32_t deviceOrProcess{0}; + int32_t resourceOrThread{0}; + std::string name; + std::vector> metadata; + bool hasFlow{false}; + KinetoPlugin_ProfileEventFlow flow{}; +}; + +class MsptiKinetoPluginSession { + public: + explicit MsptiKinetoPluginSession( + const std::vector& enabledTypes) + : enabledTypes_(enabledTypes) {} + + int start() { + std::lock_guard guard(mu_); + if (started_) { + return 0; + } + + const msptiResult subscribeRes = + msptiSubscribe(&subscriber_, nullptr, nullptr); + if (subscribeRes != MSPTI_SUCCESS) { + return -1; + } + + const msptiResult regRes = + msptiActivityRegisterCallbacks(&requestBuffer, &completeBuffer); + if (regRes != MSPTI_SUCCESS) { + msptiUnsubscribe(subscriber_); + subscriber_ = nullptr; + return -1; + } + + activeSession_ = this; + enableRequestedActivities(); + started_ = true; + return 0; + } + + int stop() { + { + std::lock_guard guard(mu_); + if (!started_) { + return 0; + } + } + + usleep(1000 * 1000); + msptiActivityFlushAll(0); + + { + std::lock_guard guard(mu_); + disableAllActivities(); + msptiUnsubscribe(subscriber_); + subscriber_ = nullptr; + started_ = false; + activeSession_ = nullptr; + } + return 0; + } + + int processEvents(const KinetoPlugin_TraceBuilder* traceBuilder) { + if (traceBuilder == nullptr || traceBuilder->pTraceBuilderHandle == nullptr) { + return -1; + } + + std::vector localEvents; + { + std::lock_guard guard(mu_); + localEvents.swap(events_); + } + + for (const auto& event : localEvents) { + KinetoPlugin_ProfileEvent pluginEvent{}; + pluginEvent.unpaddedStructSize = + KINETO_PLUGIN_PROFILE_EVENT_UNPADDED_STRUCT_SIZE; + pluginEvent.eventType = event.type; + pluginEvent.startTimeUtcNs = event.startNs; + pluginEvent.endTimeUtcNs = std::max(event.endNs, event.startNs); + pluginEvent.eventId = event.id; + pluginEvent.deviceId = event.deviceOrProcess; + pluginEvent.resourceId = event.resourceOrThread; + + if (traceBuilder->addEvent(traceBuilder->pTraceBuilderHandle, &pluginEvent) != + 0) { + continue; + } + + traceBuilder->setLastEventName( + traceBuilder->pTraceBuilderHandle, + event.name.empty() ? "mspti" : event.name.c_str()); + + if (event.hasFlow) { + traceBuilder->setLastEventFlow( + traceBuilder->pTraceBuilderHandle, + &event.flow); + } + + for (const auto& [k, v] : event.metadata) { + traceBuilder->addLastEventMetadata( + traceBuilder->pTraceBuilderHandle, + k.c_str(), + v.c_str()); + } + } + + return 0; + } + + int pushExternalCorrelation(uint64_t id) { + return msptiActivityPushExternalCorrelationId( + MSPTI_EXTERNAL_CORRELATION_KIND_CUSTOM0, + id) == MSPTI_SUCCESS + ? 0 + : -1; + } + + int popExternalCorrelation() { + uint64_t lastId = 0; + const msptiResult result = msptiActivityPopExternalCorrelationId( + MSPTI_EXTERNAL_CORRELATION_KIND_CUSTOM0, + &lastId); + return (result == MSPTI_SUCCESS || result == MSPTI_ERROR_QUEUE_EMPTY) ? 0 + : -1; + } + + int pushExternalUserCorrelation(uint64_t id) { + return msptiActivityPushExternalCorrelationId( + MSPTI_EXTERNAL_CORRELATION_KIND_CUSTOM1, + id) == MSPTI_SUCCESS + ? 0 + : -1; + } + + int popExternalUserCorrelation() { + uint64_t lastId = 0; + const msptiResult result = msptiActivityPopExternalCorrelationId( + MSPTI_EXTERNAL_CORRELATION_KIND_CUSTOM1, + &lastId); + return (result == MSPTI_SUCCESS || result == MSPTI_ERROR_QUEUE_EMPTY) ? 0 + : -1; + } + + static void requestBuffer(uint8_t** buffer, size_t* size, size_t* maxNumRecords) { + if (buffer == nullptr || size == nullptr || maxNumRecords == nullptr) { + return; + } + auto* raw = reinterpret_cast(std::malloc(kDefaultBufferSize + kAlignment)); + if (raw == nullptr) { + *buffer = nullptr; + *size = 0; + *maxNumRecords = 0; + return; + } + + uintptr_t ptr = reinterpret_cast(raw); + uintptr_t aligned = (ptr + (kAlignment - 1)) & ~(kAlignment - 1); + auto* alignedPtr = reinterpret_cast(aligned); + + if (activeSession_ != nullptr) { + std::lock_guard guard(activeSession_->mu_); + activeSession_->bufferMap_[alignedPtr] = raw; + } + + *buffer = alignedPtr; + *size = kDefaultBufferSize; + *maxNumRecords = 0; + } + + static void completeBuffer(uint8_t* buffer, size_t, size_t validSize) { + if (activeSession_ == nullptr || buffer == nullptr) { + return; + } + activeSession_->consumeBuffer(buffer, validSize); + } + + private: + void enableRequestedActivities() { + bool enabledApi = false; + bool enabledKernel = false; + bool enabledMemcpy = false; + bool enabledMemset = false; + bool enabledHccl = false; + bool enabledMarker = false; + + + for (const auto type : enabledTypes_) { + switch (type) { + case KINETO_PLUGIN_PROFILE_EVENT_TYPE_PRIVATEUSE1_RUNTIME: + case KINETO_PLUGIN_PROFILE_EVENT_TYPE_PRIVATEUSE1_DRIVER: + case KINETO_PLUGIN_PROFILE_EVENT_TYPE_CUDA_RUNTIME: + case KINETO_PLUGIN_PROFILE_EVENT_TYPE_CUDA_DRIVER: + enabledApi = true; + break; + case KINETO_PLUGIN_PROFILE_EVENT_TYPE_CONCURRENT_KERNEL: + enabledKernel = true; + break; + case KINETO_PLUGIN_PROFILE_EVENT_TYPE_GPU_MEMCPY: + enabledMemcpy = true; + break; + case KINETO_PLUGIN_PROFILE_EVENT_TYPE_GPU_MEMSET: + enabledMemset = true; + break; + case KINETO_PLUGIN_PROFILE_EVENT_TYPE_COLLECTIVE_COMM: + enabledHccl = true; + break; + case KINETO_PLUGIN_PROFILE_EVENT_TYPE_USER_ANNOTATION: + case KINETO_PLUGIN_PROFILE_EVENT_TYPE_CPU_INSTANT_EVENT: + enabledMarker = true; + break; + default: + break; + } + } + + if (enabledApi) { + msptiResult r = msptiActivityEnable(MSPTI_ACTIVITY_KIND_API); + } + if (enabledKernel) { + msptiResult r = msptiActivityEnable(MSPTI_ACTIVITY_KIND_KERNEL); + } + if (enabledMemcpy) { + msptiResult r = msptiActivityEnable(MSPTI_ACTIVITY_KIND_MEMCPY); + } + if (enabledMemset) { + msptiResult r = msptiActivityEnable(MSPTI_ACTIVITY_KIND_MEMSET); + } + if (enabledHccl) { + msptiActivityEnable(MSPTI_ACTIVITY_KIND_HCCL); + msptiActivityEnable(MSPTI_ACTIVITY_KIND_COMMUNICATION); + } + if (enabledMarker) { + msptiActivityEnable(MSPTI_ACTIVITY_KIND_MARKER); + } + + msptiResult r = msptiActivityEnable(MSPTI_ACTIVITY_KIND_EXTERNAL_CORRELATION); + } + + void disableAllActivities() { + msptiActivityDisable(MSPTI_ACTIVITY_KIND_API); + msptiActivityDisable(MSPTI_ACTIVITY_KIND_KERNEL); + msptiActivityDisable(MSPTI_ACTIVITY_KIND_MEMCPY); + msptiActivityDisable(MSPTI_ACTIVITY_KIND_MEMSET); + msptiActivityDisable(MSPTI_ACTIVITY_KIND_HCCL); + msptiActivityDisable(MSPTI_ACTIVITY_KIND_COMMUNICATION); + msptiActivityDisable(MSPTI_ACTIVITY_KIND_MARKER); + msptiActivityDisable(MSPTI_ACTIVITY_KIND_EXTERNAL_CORRELATION); + } + + void consumeBuffer(uint8_t* buffer, size_t validSize) { + if (validSize == 0) { + freeBuffer(buffer); + return; + } + + std::vector parsed; + msptiActivity* record = nullptr; + while (true) { + const msptiResult status = msptiActivityGetNextRecord(buffer, validSize, &record); + if (status == MSPTI_ERROR_MAX_LIMIT_REACHED) { + break; + } + if (status != MSPTI_SUCCESS || record == nullptr) { + break; + } + + switch (record->kind) { + case MSPTI_ACTIVITY_KIND_API: + parsed.push_back(convertApi(*reinterpret_cast(record))); + break; + case MSPTI_ACTIVITY_KIND_KERNEL: + parsed.push_back(convertKernel(*reinterpret_cast(record))); + break; + case MSPTI_ACTIVITY_KIND_MEMCPY: + parsed.push_back(convertMemcpy(*reinterpret_cast(record))); + break; + case MSPTI_ACTIVITY_KIND_MEMSET: + parsed.push_back(convertMemset(*reinterpret_cast(record))); + break; + case MSPTI_ACTIVITY_KIND_HCCL: + parsed.push_back(convertHccl(*reinterpret_cast(record))); + break; + case MSPTI_ACTIVITY_KIND_COMMUNICATION: + parsed.push_back( + convertCommunication(*reinterpret_cast(record))); + break; + case MSPTI_ACTIVITY_KIND_MARKER: + parsed.push_back(convertMarker(*reinterpret_cast(record))); + break; + default: + break; + } + } + + { + std::lock_guard guard(mu_); + events_.insert(events_.end(), parsed.begin(), parsed.end()); + } + + freeBuffer(buffer); + } + + void freeBuffer(uint8_t* alignedBuffer) { + uint8_t* raw = alignedBuffer; + { + std::lock_guard guard(mu_); + const auto it = bufferMap_.find(alignedBuffer); + if (it != bufferMap_.end()) { + raw = it->second; + bufferMap_.erase(it); + } + } + std::free(raw); + } + + static std::string safeName(const char* name, const char* fallback) { + if (name != nullptr && std::strlen(name) > 0) { + return std::string(name); + } + return std::string(fallback); + } + + static PluginEvent convertApi(const msptiActivityApi& api) { + PluginEvent ev; + ev.type = KINETO_PLUGIN_PROFILE_EVENT_TYPE_PRIVATEUSE1_RUNTIME; + ev.startNs = static_cast(api.start); + ev.endNs = static_cast(api.end); + ev.id = static_cast(api.correlationId); + ev.deviceOrProcess = static_cast(api.pt.processId); + ev.resourceOrThread = static_cast(api.pt.threadId); + ev.name = safeName(api.name, "acl_api"); + + ev.hasFlow = true; + ev.flow.unpaddedStructSize = + KINETO_PLUGIN_PROFILE_EVENT_FLOW_UNPADDED_STRUCT_SIZE; + ev.flow.flowType = KINETO_PLUGIN_PROFILE_EVENT_FLOW_TYPE_ASYNC_CPU_GPU; + ev.flow.flowId = static_cast(api.correlationId); + ev.flow.isStartPoint = true; + return ev; + } + + static PluginEvent convertKernel(const msptiActivityKernel& kernel) { + PluginEvent ev; + ev.type = KINETO_PLUGIN_PROFILE_EVENT_TYPE_CONCURRENT_KERNEL; + ev.startNs = static_cast(kernel.start); + ev.endNs = static_cast(kernel.end); + ev.id = static_cast(kernel.correlationId); + ev.deviceOrProcess = static_cast(kernel.ds.deviceId); + ev.resourceOrThread = static_cast(kernel.ds.streamId); + ev.name = safeName(kernel.name, "npu_kernel"); + + if (kernel.type != nullptr) { + ev.metadata.emplace_back("kernelType", std::string("\"") + kernel.type + "\""); + } + + ev.hasFlow = true; + ev.flow.unpaddedStructSize = + KINETO_PLUGIN_PROFILE_EVENT_FLOW_UNPADDED_STRUCT_SIZE; + ev.flow.flowType = KINETO_PLUGIN_PROFILE_EVENT_FLOW_TYPE_ASYNC_CPU_GPU; + ev.flow.flowId = static_cast(kernel.correlationId); + ev.flow.isStartPoint = false; + return ev; + } + + static PluginEvent convertMemcpy(const msptiActivityMemcpy& memcpy) { + PluginEvent ev; + ev.type = KINETO_PLUGIN_PROFILE_EVENT_TYPE_GPU_MEMCPY; + ev.startNs = static_cast(memcpy.start); + ev.endNs = static_cast(memcpy.end); + ev.id = static_cast(memcpy.correlationId); + ev.deviceOrProcess = static_cast(memcpy.deviceId); + ev.resourceOrThread = static_cast(memcpy.streamId); + ev.name = "mspti_memcpy"; + ev.metadata.emplace_back("bytes", std::to_string(memcpy.bytes)); + ev.metadata.emplace_back("copyKind", std::to_string(static_cast(memcpy.copyKind))); + return ev; + } + + static PluginEvent convertMemset(const msptiActivityMemset& memset) { + PluginEvent ev; + ev.type = KINETO_PLUGIN_PROFILE_EVENT_TYPE_GPU_MEMSET; + ev.startNs = static_cast(memset.start); + ev.endNs = static_cast(memset.end); + ev.id = static_cast(memset.correlationId); + ev.deviceOrProcess = static_cast(memset.deviceId); + ev.resourceOrThread = static_cast(memset.streamId); + ev.name = "mspti_memset"; + ev.metadata.emplace_back("bytes", std::to_string(memset.bytes)); + ev.metadata.emplace_back("value", std::to_string(memset.value)); + return ev; + } + + static PluginEvent convertHccl(const msptiActivityHccl& hccl) { + PluginEvent ev; + ev.type = KINETO_PLUGIN_PROFILE_EVENT_TYPE_COLLECTIVE_COMM; + ev.startNs = static_cast(hccl.start); + ev.endNs = static_cast(hccl.end); + ev.id = static_cast(hccl.start); + ev.deviceOrProcess = static_cast(hccl.ds.deviceId); + ev.resourceOrThread = static_cast(hccl.ds.streamId); + ev.name = safeName(hccl.name, "hccl"); + if (hccl.commName != nullptr) { + ev.metadata.emplace_back( + "commName", std::string("\"") + hccl.commName + "\""); + } + ev.metadata.emplace_back("bandwidthGBs", std::to_string(hccl.bandWidth)); + return ev; + } + + static PluginEvent convertCommunication(const msptiActivityCommunication& comm) { + PluginEvent ev; + ev.type = KINETO_PLUGIN_PROFILE_EVENT_TYPE_COLLECTIVE_COMM; + ev.startNs = static_cast(comm.start); + ev.endNs = static_cast(comm.end); + ev.id = static_cast(comm.correlationId); + ev.deviceOrProcess = static_cast(comm.ds.deviceId); + ev.resourceOrThread = static_cast(comm.ds.streamId); + ev.name = safeName(comm.name, "communication"); + ev.metadata.emplace_back("count", std::to_string(comm.count)); + if (comm.commName != nullptr) { + ev.metadata.emplace_back( + "commName", std::string("\"") + comm.commName + "\""); + } + if (comm.algType != nullptr) { + ev.metadata.emplace_back( + "algType", std::string("\"") + comm.algType + "\""); + } + return ev; + } + + static PluginEvent convertMarker(const msptiActivityMarker& marker) { + PluginEvent ev; + ev.type = (marker.flag & MSPTI_ACTIVITY_FLAG_MARKER_INSTANTANEOUS) + ? KINETO_PLUGIN_PROFILE_EVENT_TYPE_CPU_INSTANT_EVENT + : KINETO_PLUGIN_PROFILE_EVENT_TYPE_USER_ANNOTATION; + ev.startNs = static_cast(marker.timestamp); + ev.endNs = static_cast(marker.timestamp); + ev.id = static_cast(marker.id); + if (marker.sourceKind == MSPTI_ACTIVITY_SOURCE_KIND_DEVICE) { + ev.deviceOrProcess = static_cast(marker.objectId.ds.deviceId); + ev.resourceOrThread = static_cast(marker.objectId.ds.streamId); + } else { + ev.deviceOrProcess = static_cast(marker.objectId.pt.processId); + ev.resourceOrThread = static_cast(marker.objectId.pt.threadId); + } + ev.name = safeName(marker.name, "marker"); + if (marker.domain != nullptr) { + ev.metadata.emplace_back( + "domain", std::string("\"") + marker.domain + "\""); + } + return ev; + } + + private: + std::mutex mu_; + bool started_{false}; + msptiSubscriberHandle subscriber_{nullptr}; + std::vector enabledTypes_; + std::vector events_; + std::unordered_map bufferMap_; + + static inline MsptiKinetoPluginSession* activeSession_{nullptr}; +}; + +int msptiProfilerCreate(KinetoPlugin_ProfilerCreate_Params* params) { + if (params == nullptr || + params->unpaddedStructSize < + KINETO_PLUGIN_PROFILER_CREATE_PARAMS_UNPADDED_STRUCT_SIZE) { + return -1; + } + + std::vector enabledTypes; + if (params->pEnabledActivityTypes != nullptr && params->enabledActivityTypesMaxLen > 0) { + enabledTypes.assign( + params->pEnabledActivityTypes, + params->pEnabledActivityTypes + params->enabledActivityTypesMaxLen); + } else { + enabledTypes = { + KINETO_PLUGIN_PROFILE_EVENT_TYPE_PRIVATEUSE1_RUNTIME, + KINETO_PLUGIN_PROFILE_EVENT_TYPE_CONCURRENT_KERNEL, + KINETO_PLUGIN_PROFILE_EVENT_TYPE_GPU_MEMCPY, + KINETO_PLUGIN_PROFILE_EVENT_TYPE_GPU_MEMSET, + KINETO_PLUGIN_PROFILE_EVENT_TYPE_COLLECTIVE_COMM, + KINETO_PLUGIN_PROFILE_EVENT_TYPE_USER_ANNOTATION, + }; + } + + auto* session = new MsptiKinetoPluginSession(enabledTypes); + params->pProfilerHandle = reinterpret_cast(session); + return 0; +} + +int msptiProfilerDestroy(KinetoPlugin_ProfilerDestroy_Params* params) { + if (params == nullptr || + params->unpaddedStructSize < + KINETO_PLUGIN_PROFILER_DESTROY_PARAMS_UNPADDED_STRUCT_SIZE || + params->pProfilerHandle == nullptr) { + return -1; + } + + auto* session = reinterpret_cast(params->pProfilerHandle); + session->stop(); + delete session; + return 0; +} + +int msptiProfilerQuery(KinetoPlugin_ProfilerQuery_Params* params) { + if (params == nullptr || + params->unpaddedStructSize < + KINETO_PLUGIN_PROFILER_QUERY_PARAMS_UNPADDED_STRUCT_SIZE) { + return -1; + } + + const char* profilerName = "MSPTI Profiler"; + if (params->pProfilerName != nullptr && params->profilerNameMaxLen > 0) { + std::snprintf( + params->pProfilerName, + params->profilerNameMaxLen + 1, + "%s", + profilerName); + } + + if (params->pSupportedActivityTypes != nullptr && + params->supportedActivityTypesMaxLen > 0) { + const std::vector supportedTypes = { + KINETO_PLUGIN_PROFILE_EVENT_TYPE_PRIVATEUSE1_RUNTIME, + KINETO_PLUGIN_PROFILE_EVENT_TYPE_PRIVATEUSE1_DRIVER, + KINETO_PLUGIN_PROFILE_EVENT_TYPE_CONCURRENT_KERNEL, + KINETO_PLUGIN_PROFILE_EVENT_TYPE_GPU_MEMCPY, + KINETO_PLUGIN_PROFILE_EVENT_TYPE_GPU_MEMSET, + KINETO_PLUGIN_PROFILE_EVENT_TYPE_COLLECTIVE_COMM, + KINETO_PLUGIN_PROFILE_EVENT_TYPE_USER_ANNOTATION, + KINETO_PLUGIN_PROFILE_EVENT_TYPE_CPU_INSTANT_EVENT, + }; + + const size_t n = std::min( + params->supportedActivityTypesMaxLen, + supportedTypes.size()); + for (size_t i = 0; i < n; ++i) { + params->pSupportedActivityTypes[i] = supportedTypes[i]; + } + for (size_t i = n; i < params->supportedActivityTypesMaxLen; ++i) { + params->pSupportedActivityTypes[i] = + KINETO_PLUGIN_PROFILE_EVENT_TYPE_INVALID; + } + } + + return 0; +} + +int msptiProfilerStart(KinetoPlugin_ProfilerStart_Params* params) { + if (params == nullptr || + params->unpaddedStructSize < + KINETO_PLUGIN_PROFILER_START_PARAMS_UNPADDED_STRUCT_SIZE || + params->pProfilerHandle == nullptr) { + return -1; + } + + auto* session = reinterpret_cast(params->pProfilerHandle); + return session->start(); +} + +int msptiProfilerStop(KinetoPlugin_ProfilerStop_Params* params) { + if (params == nullptr || + params->unpaddedStructSize < + KINETO_PLUGIN_PROFILER_STOP_PARAMS_UNPADDED_STRUCT_SIZE || + params->pProfilerHandle == nullptr) { + return -1; + } + + auto* session = reinterpret_cast(params->pProfilerHandle); + return session->stop(); +} + +int msptiProfilerProcessEvents(KinetoPlugin_ProfilerProcessEvents_Params* params) { + if (params == nullptr || + params->unpaddedStructSize < + KINETO_PLUGIN_PROFILER_PROCESS_EVENTS_PARAMS_UNPADDED_STRUCT_SIZE || + params->pProfilerHandle == nullptr || params->pTraceBuilder == nullptr) { + return -1; + } + + auto* session = reinterpret_cast(params->pProfilerHandle); + return session->processEvents(params->pTraceBuilder); +} + +int msptiProfilerPushCorrelationId( + KinetoPlugin_ProfilerPushCorrelationId_Params* params) { + if (params == nullptr || + params->unpaddedStructSize < + KINETO_PLUGIN_PROFILER_PUSH_CORRELATION_ID_PARAMS_UNPADDED_STRUCT_SIZE || + params->pProfilerHandle == nullptr) { + return -1; + } + + auto* session = reinterpret_cast(params->pProfilerHandle); + return session->pushExternalCorrelation(params->correlationId); +} + +int msptiProfilerPopCorrelationId( + KinetoPlugin_ProfilerPopCorrelationId_Params* params) { + if (params == nullptr || + params->unpaddedStructSize < + KINETO_PLUGIN_PROFILER_POP_CORRELATION_ID_PARAMS_UNPADDED_STRUCT_SIZE || + params->pProfilerHandle == nullptr) { + return -1; + } + + auto* session = reinterpret_cast(params->pProfilerHandle); + return session->popExternalCorrelation(); +} + +int msptiProfilerPushUserCorrelationId( + KinetoPlugin_ProfilerPushUserCorrelationId_Params* params) { + if (params == nullptr || + params->unpaddedStructSize < + KINETO_PLUGIN_PROFILER_PUSH_USER_CORRELATION_ID_PARAMS_UNPADDED_STRUCT_SIZE || + params->pProfilerHandle == nullptr) { + return -1; + } + + auto* session = reinterpret_cast(params->pProfilerHandle); + return session->pushExternalUserCorrelation(params->userCorrelationId); +} + +int msptiProfilerPopUserCorrelationId( + KinetoPlugin_ProfilerPopUserCorrelationId_Params* params) { + if (params == nullptr || + params->unpaddedStructSize < + KINETO_PLUGIN_PROFILER_POP_USER_CORRELATION_ID_PARAMS_UNPADDED_STRUCT_SIZE || + params->pProfilerHandle == nullptr) { + return -1; + } + + auto* session = reinterpret_cast(params->pProfilerHandle); + return session->popExternalUserCorrelation(); +} + +} // namespace + +extern "C" int KinetoPlugin_register(const KinetoPlugin_Registry* pRegistry) { + if (pRegistry == nullptr || + pRegistry->unpaddedStructSize < KINETO_PLUGIN_REGISTRY_UNPADDED_STRUCT_SIZE || + pRegistry->registerProfiler == nullptr || pRegistry->pRegistryHandle == nullptr) { + return -1; + } + + KinetoPlugin_ProfilerInterface profilerInterface{}; + profilerInterface.unpaddedStructSize = + KINETO_PLUGIN_PROFILER_INTERFACE_UNPADDED_STRUCT_SIZE; + profilerInterface.profilerCreate = &msptiProfilerCreate; + profilerInterface.profilerDestroy = &msptiProfilerDestroy; + profilerInterface.profilerQuery = &msptiProfilerQuery; + profilerInterface.profilerStart = &msptiProfilerStart; + profilerInterface.profilerStop = &msptiProfilerStop; + profilerInterface.profilerPushCorrelationId = &msptiProfilerPushCorrelationId; + profilerInterface.profilerPopCorrelationId = &msptiProfilerPopCorrelationId; + profilerInterface.profilerPushUserCorrelationId = &msptiProfilerPushUserCorrelationId; + profilerInterface.profilerPopUserCorrelationId = &msptiProfilerPopUserCorrelationId; + profilerInterface.profilerProcessEvents = &msptiProfilerProcessEvents; + + return pRegistry->registerProfiler( + pRegistry->pRegistryHandle, + &profilerInterface); +} diff --git a/libkineto/src/plugin/mspti_dynamic/README.md b/libkineto/src/plugin/mspti_dynamic/README.md new file mode 100644 index 0000000..122909c --- /dev/null +++ b/libkineto/src/plugin/mspti_dynamic/README.md @@ -0,0 +1,35 @@ +# MSPTI Dynamic Plugin for Kineto + +This directory contains a Kineto dynamic plugin implementation based on Huawei MSPTI. + +## Build + +Configure Kineto with plugin build enabled: + +```bash +cmake -S third_party/kineto/libkineto -B build/libkineto \ + -DKINETO_BUILD_MSPTI_PLUGIN=ON \ + -DMSPTI_ROOT=/root/miniconda3/envs/ascend/Ascend/ascend-toolkit/8.3/toolkit/tools/mspti +cmake --build build/libkineto --target mspti_kineto_plugin -j +``` + +`MSPTI_ROOT` must contain: + +- `include/mspti.h` +- `lib64/libmspti.so` + +## Run with Kineto + +Point Kineto dynamic plugin loader to the plugin output directory: + +```bash +export KINETO_PLUGIN_LIB_DIR_PATH= +``` + +If you need to avoid CUPTI conflicts while using this plugin: + +```bash +export KINETO_DISABLE_CUPTI=1 +``` + +Then run your normal PyTorch profiler workflow. diff --git a/libkineto/test/CMakeLists.txt b/libkineto/test/CMakeLists.txt index 6350cc6..99aa480 100644 --- a/libkineto/test/CMakeLists.txt +++ b/libkineto/test/CMakeLists.txt @@ -41,6 +41,15 @@ target_link_libraries(DynamicPluginTest PRIVATE kineto_base kineto_api) gtest_discover_tests(DynamicPluginTest) +if(TARGET mspti_kineto_plugin) +add_executable(MsptiDynamicPluginTest MsptiDynamicPluginTest.cpp) +target_link_libraries(MsptiDynamicPluginTest PRIVATE + gtest_main + kineto_base kineto_api + mspti_kineto_plugin) +gtest_discover_tests(MsptiDynamicPluginTest) +endif() + if(NOT LIBKINETO_NOCUPTI) # CuptiActivityProfilerTest #[[ diff --git a/libkineto/test/DynamicPluginTest.cpp b/libkineto/test/DynamicPluginTest.cpp index da52a2c..c2a7d27 100644 --- a/libkineto/test/DynamicPluginTest.cpp +++ b/libkineto/test/DynamicPluginTest.cpp @@ -511,3 +511,34 @@ TEST_F(DynamicPluginTest, ProfilerValidation) { session2->pushUserCorrelationId(67890); session2->popUserCorrelationId(); } + +// Regression test: PluginTraceBuilder::addEvent() must validate the +// ProfileEvent struct size against KINETO_PLUGIN_PROFILE_EVENT_UNPADDED_STRUCT_SIZE, +// not against any other struct's size constant. +TEST_F(DynamicPluginTest, AddEventVersionCheck) { + auto mockInterface = MockPlugin::getInterface(); + PluginProfiler pluginProfiler(mockInterface); + + auto activities = pluginProfiler.availableActivities(); + auto session = pluginProfiler.configure(activities, Config{}); + ASSERT_NE(session, nullptr); + + session->start(); + session->stop(); + + // Process events through the real PluginTraceBuilder + MemoryTraceLogger logger(Config{}); + session->processTrace(logger); + + auto traceBuffer = session->getTraceBuffer(); + ASSERT_NE(traceBuffer, nullptr); + + // The mock plugin creates events with the correct + // KINETO_PLUGIN_PROFILE_EVENT_UNPADDED_STRUCT_SIZE. + // If the version check in PluginTraceBuilder::addEvent() compares against + // the wrong constant, events would be silently rejected and the buffer + // would be empty. + EXPECT_EQ(traceBuffer->activities.size(), 4) + << "Events with correct unpaddedStructSize must not be rejected. " + "This can fail if addEvent() checks against the wrong size constant."; +} diff --git a/libkineto/test/MsptiDynamicPluginTest.cpp b/libkineto/test/MsptiDynamicPluginTest.cpp new file mode 100644 index 0000000..4d6fb48 --- /dev/null +++ b/libkineto/test/MsptiDynamicPluginTest.cpp @@ -0,0 +1,277 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +#include +#include +#include + +#include "include/KinetoDynamicPluginInterface.h" + +extern "C" int KinetoPlugin_register(const KinetoPlugin_Registry* pRegistry); + +namespace { + +struct CapturedRegistry { + bool called{false}; + KinetoPlugin_ProfilerInterface profiler{}; +}; + +int registerProfilerShim( + KinetoPlugin_RegistryHandle* pRegistryHandle, + const KinetoPlugin_ProfilerInterface* pProfiler) { + if (pRegistryHandle == nullptr || pProfiler == nullptr) { + return -1; + } + auto* captured = reinterpret_cast(pRegistryHandle); + captured->called = true; + captured->profiler = *pProfiler; + return 0; +} + +struct DummyTraceBuilderHandle { + int addEventCount{0}; + int setNameCount{0}; + int setFlowCount{0}; + int addMetadataCount{0}; +}; + +int addEventStub( + KinetoPlugin_TraceBuilderHandle* pTraceBuilderHandle, + const KinetoPlugin_ProfileEvent*) { + auto* h = reinterpret_cast(pTraceBuilderHandle); + h->addEventCount++; + return 0; +} + +int setLastEventNameStub( + KinetoPlugin_TraceBuilderHandle* pTraceBuilderHandle, + const char*) { + auto* h = reinterpret_cast(pTraceBuilderHandle); + h->setNameCount++; + return 0; +} + +int setLastEventFlowStub( + KinetoPlugin_TraceBuilderHandle* pTraceBuilderHandle, + const KinetoPlugin_ProfileEventFlow*) { + auto* h = reinterpret_cast(pTraceBuilderHandle); + h->setFlowCount++; + return 0; +} + +int addLastEventMetadataStub( + KinetoPlugin_TraceBuilderHandle* pTraceBuilderHandle, + const char*, + const char*) { + auto* h = reinterpret_cast(pTraceBuilderHandle); + h->addMetadataCount++; + return 0; +} + +int addDeviceInfoStub( + KinetoPlugin_TraceBuilderHandle*, + const KinetoPlugin_ProfileDeviceInfo*) { + return 0; +} + +int addResourceInfoStub( + KinetoPlugin_TraceBuilderHandle*, + const KinetoPlugin_ProfileResourceInfo*) { + return 0; +} + +} // namespace + +TEST(MsptiDynamicPluginTest, RegisterAndQuery) { + CapturedRegistry captured; + KinetoPlugin_Registry registry{}; + registry.unpaddedStructSize = KINETO_PLUGIN_REGISTRY_UNPADDED_STRUCT_SIZE; + registry.pRegistryHandle = + reinterpret_cast(&captured); + registry.registerProfiler = registerProfilerShim; + + const int registerResult = KinetoPlugin_register(®istry); + ASSERT_EQ(registerResult, 0); + ASSERT_TRUE(captured.called); + + const auto& profiler = captured.profiler; + EXPECT_GE( + profiler.unpaddedStructSize, + KINETO_PLUGIN_PROFILER_INTERFACE_UNPADDED_STRUCT_SIZE); + ASSERT_NE(profiler.profilerCreate, nullptr); + ASSERT_NE(profiler.profilerDestroy, nullptr); + ASSERT_NE(profiler.profilerQuery, nullptr); + ASSERT_NE(profiler.profilerStart, nullptr); + ASSERT_NE(profiler.profilerStop, nullptr); + ASSERT_NE(profiler.profilerProcessEvents, nullptr); + + std::array profilerName{}; + std::array + supportedTypes{}; + supportedTypes.fill(KINETO_PLUGIN_PROFILE_EVENT_TYPE_INVALID); + + KinetoPlugin_ProfilerQuery_Params queryParams{}; + queryParams.unpaddedStructSize = + KINETO_PLUGIN_PROFILER_QUERY_PARAMS_UNPADDED_STRUCT_SIZE; + queryParams.pProfilerHandle = nullptr; + queryParams.pProfilerName = profilerName.data(); + queryParams.profilerNameMaxLen = profilerName.size() - 1; + queryParams.pSupportedActivityTypes = supportedTypes.data(); + queryParams.supportedActivityTypesMaxLen = supportedTypes.size(); + + const int queryResult = profiler.profilerQuery(&queryParams); + ASSERT_EQ(queryResult, 0); + + std::string name{profilerName.data()}; + EXPECT_NE(name.find("MSPTI"), std::string::npos); + + bool hasRuntime = false; + bool hasKernel = false; + bool hasComm = false; + for (const auto t : supportedTypes) { + if (t == KINETO_PLUGIN_PROFILE_EVENT_TYPE_PRIVATEUSE1_RUNTIME) { + hasRuntime = true; + } else if (t == KINETO_PLUGIN_PROFILE_EVENT_TYPE_CONCURRENT_KERNEL) { + hasKernel = true; + } else if (t == KINETO_PLUGIN_PROFILE_EVENT_TYPE_COLLECTIVE_COMM) { + hasComm = true; + } + } + EXPECT_TRUE(hasRuntime); + EXPECT_TRUE(hasKernel); + EXPECT_TRUE(hasComm); +} + +TEST(MsptiDynamicPluginTest, LifecycleAndProcessEvents) { + CapturedRegistry captured; + KinetoPlugin_Registry registry{}; + registry.unpaddedStructSize = KINETO_PLUGIN_REGISTRY_UNPADDED_STRUCT_SIZE; + registry.pRegistryHandle = + reinterpret_cast(&captured); + registry.registerProfiler = registerProfilerShim; + + ASSERT_EQ(KinetoPlugin_register(®istry), 0); + ASSERT_TRUE(captured.called); + + const auto& profiler = captured.profiler; + + std::array enabledTypes = { + KINETO_PLUGIN_PROFILE_EVENT_TYPE_PRIVATEUSE1_RUNTIME, + KINETO_PLUGIN_PROFILE_EVENT_TYPE_CONCURRENT_KERNEL, + }; + + KinetoPlugin_ProfilerCreate_Params createParams{}; + createParams.unpaddedStructSize = + KINETO_PLUGIN_PROFILER_CREATE_PARAMS_UNPADDED_STRUCT_SIZE; + createParams.pEnabledActivityTypes = enabledTypes.data(); + createParams.enabledActivityTypesMaxLen = enabledTypes.size(); + + ASSERT_EQ(profiler.profilerCreate(&createParams), 0); + ASSERT_NE(createParams.pProfilerHandle, nullptr); + + KinetoPlugin_ProfilerStart_Params startParams{}; + startParams.unpaddedStructSize = + KINETO_PLUGIN_PROFILER_START_PARAMS_UNPADDED_STRUCT_SIZE; + startParams.pProfilerHandle = createParams.pProfilerHandle; + EXPECT_EQ(profiler.profilerStart(&startParams), 0); + + KinetoPlugin_ProfilerStop_Params stopParams{}; + stopParams.unpaddedStructSize = + KINETO_PLUGIN_PROFILER_STOP_PARAMS_UNPADDED_STRUCT_SIZE; + stopParams.pProfilerHandle = createParams.pProfilerHandle; + EXPECT_EQ(profiler.profilerStop(&stopParams), 0); + + DummyTraceBuilderHandle tbHandle; + KinetoPlugin_TraceBuilder traceBuilder{}; + traceBuilder.unpaddedStructSize = + KINETO_PLUGIN_TRACE_BUILDER_UNPADDED_STRUCT_SIZE; + traceBuilder.pTraceBuilderHandle = + reinterpret_cast(&tbHandle); + traceBuilder.addEvent = addEventStub; + traceBuilder.setLastEventName = setLastEventNameStub; + traceBuilder.setLastEventFlow = setLastEventFlowStub; + traceBuilder.addLastEventMetadata = addLastEventMetadataStub; + traceBuilder.addDeviceInfo = addDeviceInfoStub; + traceBuilder.addResourceInfo = addResourceInfoStub; + + KinetoPlugin_ProfilerProcessEvents_Params processParams{}; + processParams.unpaddedStructSize = + KINETO_PLUGIN_PROFILER_PROCESS_EVENTS_PARAMS_UNPADDED_STRUCT_SIZE; + processParams.pProfilerHandle = createParams.pProfilerHandle; + processParams.pTraceBuilder = &traceBuilder; + + EXPECT_EQ(profiler.profilerProcessEvents(&processParams), 0); + + KinetoPlugin_ProfilerDestroy_Params destroyParams{}; + destroyParams.unpaddedStructSize = + KINETO_PLUGIN_PROFILER_DESTROY_PARAMS_UNPADDED_STRUCT_SIZE; + destroyParams.pProfilerHandle = createParams.pProfilerHandle; + + EXPECT_EQ(profiler.profilerDestroy(&destroyParams), 0); +} + + TEST(MsptiDynamicPluginTest, UserCorrelationApi) { + CapturedRegistry captured; + KinetoPlugin_Registry registry{}; + registry.unpaddedStructSize = KINETO_PLUGIN_REGISTRY_UNPADDED_STRUCT_SIZE; + registry.pRegistryHandle = + reinterpret_cast(&captured); + registry.registerProfiler = registerProfilerShim; + + ASSERT_EQ(KinetoPlugin_register(®istry), 0); + ASSERT_TRUE(captured.called); + + const auto& profiler = captured.profiler; + ASSERT_NE(profiler.profilerCreate, nullptr); + ASSERT_NE(profiler.profilerDestroy, nullptr); + ASSERT_NE(profiler.profilerPushUserCorrelationId, nullptr); + ASSERT_NE(profiler.profilerPopUserCorrelationId, nullptr); + + KinetoPlugin_ProfilerCreate_Params createParams{}; + createParams.unpaddedStructSize = + KINETO_PLUGIN_PROFILER_CREATE_PARAMS_UNPADDED_STRUCT_SIZE; + createParams.pEnabledActivityTypes = nullptr; + createParams.enabledActivityTypesMaxLen = 0; + + ASSERT_EQ(profiler.profilerCreate(&createParams), 0); + ASSERT_NE(createParams.pProfilerHandle, nullptr); + + KinetoPlugin_ProfilerPushUserCorrelationId_Params pushParams{}; + pushParams.unpaddedStructSize = + KINETO_PLUGIN_PROFILER_PUSH_USER_CORRELATION_ID_PARAMS_UNPADDED_STRUCT_SIZE; + pushParams.userCorrelationId = 0x1234ULL; + pushParams.pProfilerHandle = createParams.pProfilerHandle; + EXPECT_EQ(profiler.profilerPushUserCorrelationId(&pushParams), 0); + + KinetoPlugin_ProfilerPopUserCorrelationId_Params popParams{}; + popParams.unpaddedStructSize = + KINETO_PLUGIN_PROFILER_POP_USER_CORRELATION_ID_PARAMS_UNPADDED_STRUCT_SIZE; + popParams.pProfilerHandle = createParams.pProfilerHandle; + EXPECT_EQ(profiler.profilerPopUserCorrelationId(&popParams), 0); + + KinetoPlugin_ProfilerPushUserCorrelationId_Params invalidPushParams = + pushParams; + invalidPushParams.unpaddedStructSize = + KINETO_PLUGIN_PROFILER_PUSH_USER_CORRELATION_ID_PARAMS_UNPADDED_STRUCT_SIZE - + 1; + EXPECT_EQ(profiler.profilerPushUserCorrelationId(&invalidPushParams), -1); + + KinetoPlugin_ProfilerPopUserCorrelationId_Params invalidPopParams = popParams; + invalidPopParams.unpaddedStructSize = + KINETO_PLUGIN_PROFILER_POP_USER_CORRELATION_ID_PARAMS_UNPADDED_STRUCT_SIZE - + 1; + EXPECT_EQ(profiler.profilerPopUserCorrelationId(&invalidPopParams), -1); + + KinetoPlugin_ProfilerDestroy_Params destroyParams{}; + destroyParams.unpaddedStructSize = + KINETO_PLUGIN_PROFILER_DESTROY_PARAMS_UNPADDED_STRUCT_SIZE; + destroyParams.pProfilerHandle = createParams.pProfilerHandle; + EXPECT_EQ(profiler.profilerDestroy(&destroyParams), 0); + }