From 125e1b17d8335351f6bee11f1d4514afaf603a72 Mon Sep 17 00:00:00 2001 From: yoann-heitz Date: Tue, 21 Sep 2021 06:19:02 -0400 Subject: [PATCH 1/8] allow fine-grained control on several APIs Deleted a line that makes rocprof abort when using fine grained control one several APIs at a time. The parsing function in libtracer_tool.so that parses the input XML file that specifies the functions for which the user wants to activate the tracing callbacks seems to handle correctly cases where fine-grained control is used on several APIs. This line can be safely deleted. --- test/tool/tool.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/test/tool/tool.cpp b/test/tool/tool.cpp index 4bdce5dd..3773d6df 100644 --- a/test/tool/tool.cpp +++ b/test/tool/tool.cpp @@ -1108,7 +1108,6 @@ extern "C" PUBLIC_API void OnLoadToolProp(rocprofiler_settings_t* settings) // Getting traces const auto traces_list = xml->GetNodes("top.trace"); - if (traces_list.size() > 1) fatal("ROCProfiler: only one trace supported at a time"); const unsigned feature_count = metrics_vec.size() + traces_list.size(); rocprofiler_feature_t* features = new rocprofiler_feature_t[feature_count]; From 2b0aab57b1b2cd0006de368d7d2272b75e1d6c8c Mon Sep 17 00:00:00 2001 From: yoann-heitz Date: Wed, 22 Sep 2021 07:44:35 -0400 Subject: [PATCH 2/8] Isolate flushing instructions for metrics Isolated flushing instructions into a wrapping function. The file descriptor used for flushing is the one that is globally defined and not the one stored in the context_entry_t object as it is always the one that is defined globally that is used. If using a globally defined file descriptor is problematic (however it is the norm in the libtracer_tool.so library in ROCTracer) rather than passing it through arguments, the wrapping flushing function can be modified to take a void pointer. The file descriptor can then be passed through this pointer and be used in the default flushing function. --- test/tool/tool.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/test/tool/tool.cpp b/test/tool/tool.cpp index 3773d6df..6cac8f20 100644 --- a/test/tool/tool.cpp +++ b/test/tool/tool.cpp @@ -337,19 +337,21 @@ unsigned align_size(unsigned size, unsigned alignment) { return ((size + alignment - 1) & ~(alignment - 1)); } +void metric_flush_cb(const char *name, uint64_t result){ + fprintf(result_file_handle, " %s ", name); + fprintf(result_file_handle, "(%lu)\n", result); +} // Output profiling results for input features void output_results(const context_entry_t* entry, const char* label) { - FILE* file = entry->file_handle; const rocprofiler_feature_t* features = entry->features; const unsigned feature_count = entry->feature_count; for (unsigned i = 0; i < feature_count; ++i) { const rocprofiler_feature_t* p = &features[i]; - fprintf(file, " %s ", p->name); switch (p->data.kind) { // Output metrics results case ROCPROFILER_DATA_KIND_INT64: - fprintf(file, "(%lu)\n", p->data.result_int64); + metric_flush_cb(p->name, p->data.result_int64); break; default: fprintf(stderr, "RPL-tool: undefined data kind(%u)\n", p->data.kind); From 5ccee80b5b32f7cd6c01b68134f652f9c454df4a Mon Sep 17 00:00:00 2001 From: yoann-heitz Date: Wed, 22 Sep 2021 08:16:59 -0400 Subject: [PATCH 3/8] Isolate flushing instructions for kernel events Isolated flushing instructions into a wrapping function. The file descriptor used for flushing is the one that is globally defined and not the one stored in the context_entry_t object as it is always the one that is defined globally that is used. If using a globally defined file descriptor is problematic (however it is the norm in the libtracer_tool.so library in ROCTracer) rather than passing it through arguments, the wrapping flushing function can be modified to take a void pointer. The file descriptor can then be passed through this pointer and be used in the default flushing function. --- test/tool/tool.cpp | 66 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 54 insertions(+), 12 deletions(-) diff --git a/test/tool/tool.cpp b/test/tool/tool.cpp index 6cac8f20..8c9267e8 100644 --- a/test/tool/tool.cpp +++ b/test/tool/tool.cpp @@ -373,6 +373,53 @@ void output_group(const context_entry_t* entry, const char* label) { } } +void kernel_flush_cb( uint32_t dispatch, + uint32_t gpu_id, + uint32_t queue_id, + uint64_t queue_index, + uint32_t pid, + uint32_t tid, + uint32_t grid_size, + uint32_t workgroup_size, + uint32_t lds_size, + uint32_t scratch_size, + uint32_t vgpr, + uint32_t sgpr, + uint32_t fbarrier_count, + uint64_t signal_handle, + uint64_t object, + const char* kernel_name, + bool record, + uint64_t dispatch_time, + uint64_t begin, + uint64_t end, + uint64_t complete){ + fprintf(result_file_handle, "dispatch[%u], gpu-id(%u), queue-id(%u), queue-index(%lu), pid(%u), tid(%u), grd(%u), wgr(%u), lds(%u), scr(%u), vgpr(%u), sgpr(%u), fbar(%u), sig(0x%lx), obj(0x%lx), kernel-name(\"%s\")", + dispatch, + gpu_id, + queue_id, + queue_index, + pid, + tid, + grid_size, + workgroup_size, + lds_size, + scratch_size, + vgpr, + sgpr, + fbarrier_count, + signal_handle, + object, + kernel_name); + if (record) fprintf(result_file_handle, ", time(%lu,%lu,%lu,%lu)", + dispatch_time, + begin, + end, + complete); + fprintf(result_file_handle, "\n"); + fflush(result_file_handle); +} + // Dump stored context entry bool dump_context_entry(context_entry_t* entry, bool to_clean = true) { hsa_status_t status = HSA_STATUS_ERROR; @@ -391,12 +438,9 @@ bool dump_context_entry(context_entry_t* entry, bool to_clean = true) { const uint32_t index = entry->index; if (index != UINT32_MAX) { - FILE* file_handle = entry->file_handle; const std::string nik_name = (to_truncate_names == 0) ? entry->data.kernel_name : filtr_kernel_name(entry->data.kernel_name); const AgentInfo* agent_info = HsaRsrcFactory::Instance().GetAgentInfo(entry->agent); - - fprintf(file_handle, "dispatch[%u], gpu-id(%u), queue-id(%u), queue-index(%lu), pid(%u), tid(%u), grd(%u), wgr(%u), lds(%u), scr(%u), vgpr(%u), sgpr(%u), fbar(%u), sig(0x%lx), obj(0x%lx), kernel-name(\"%s\")", - index, + kernel_flush_cb(index, agent_info->dev_index, entry->data.queue_id, entry->data.queue_index, @@ -411,14 +455,12 @@ bool dump_context_entry(context_entry_t* entry, bool to_clean = true) { entry->kernel_properties.fbarrier_count, entry->kernel_properties.signal.handle, entry->kernel_properties.object, - nik_name.c_str()); - if (record) fprintf(file_handle, ", time(%lu,%lu,%lu,%lu)", - record->dispatch, - record->begin, - record->end, - record->complete); - fprintf(file_handle, "\n"); - fflush(file_handle); + nik_name.c_str(), + record != NULL ? true : false, + record != NULL ? record->dispatch: 0, + record != NULL ? record->begin : 0, + record != NULL ? record->end : 0, + record != NULL ? record->complete : 0); } if (record && to_clean) { delete record; From 65161e2763be13042b1f99dd2e74f10e623efeb2 Mon Sep 17 00:00:00 2001 From: yoann-heitz Date: Thu, 23 Sep 2021 06:41:41 -0400 Subject: [PATCH 4/8] Update metrics flushing function signature The signature of the function that flush metrics has been updated in order to have a standard signature for all the flushing functions. A new data structure that is passed as the only one argument for the function has also been defined --- test/tool/tool.cpp | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/test/tool/tool.cpp b/test/tool/tool.cpp index 8c9267e8..374c10d7 100644 --- a/test/tool/tool.cpp +++ b/test/tool/tool.cpp @@ -337,9 +337,15 @@ unsigned align_size(unsigned size, unsigned alignment) { return ((size + alignment - 1) & ~(alignment - 1)); } -void metric_flush_cb(const char *name, uint64_t result){ - fprintf(result_file_handle, " %s ", name); - fprintf(result_file_handle, "(%lu)\n", result); +struct metric_trace_entry_t { + uint32_t dispatch; + const char* name; + uint64_t result; +}; + +void metric_flush_cb(metric_trace_entry_t *entry){ + fprintf(result_file_handle, " %s ", entry->name); + fprintf(result_file_handle, "(%lu)\n", entry->result); } // Output profiling results for input features void output_results(const context_entry_t* entry, const char* label) { @@ -350,8 +356,10 @@ void output_results(const context_entry_t* entry, const char* label) { const rocprofiler_feature_t* p = &features[i]; switch (p->data.kind) { // Output metrics results - case ROCPROFILER_DATA_KIND_INT64: - metric_flush_cb(p->name, p->data.result_int64); + case ROCPROFILER_DATA_KIND_INT64: { + metric_trace_entry_t metric_trace_entry = {entry->index, p->name, p->data.result_int64}; + metric_flush_cb(&metric_trace_entry); + } break; default: fprintf(stderr, "RPL-tool: undefined data kind(%u)\n", p->data.kind); From 24bf30f1fb9c9215cffb318e8e998b228009da46 Mon Sep 17 00:00:00 2001 From: yoann-heitz Date: Thu, 23 Sep 2021 06:42:15 -0400 Subject: [PATCH 5/8] Update kernel flushing function signature As for the metrics, the kernel flushing function signature has been updated and a new data structure has been defined --- test/tool/tool.cpp | 95 ++++++++++++++++++++++++---------------------- 1 file changed, 50 insertions(+), 45 deletions(-) diff --git a/test/tool/tool.cpp b/test/tool/tool.cpp index 374c10d7..06d11c13 100644 --- a/test/tool/tool.cpp +++ b/test/tool/tool.cpp @@ -381,49 +381,53 @@ void output_group(const context_entry_t* entry, const char* label) { } } -void kernel_flush_cb( uint32_t dispatch, - uint32_t gpu_id, - uint32_t queue_id, - uint64_t queue_index, - uint32_t pid, - uint32_t tid, - uint32_t grid_size, - uint32_t workgroup_size, - uint32_t lds_size, - uint32_t scratch_size, - uint32_t vgpr, - uint32_t sgpr, - uint32_t fbarrier_count, - uint64_t signal_handle, - uint64_t object, - const char* kernel_name, - bool record, - uint64_t dispatch_time, - uint64_t begin, - uint64_t end, - uint64_t complete){ +struct kernel_trace_entry_t { + uint32_t dispatch; + uint32_t gpu_id; + uint32_t queue_id; + uint64_t queue_index; + uint32_t pid; + uint32_t tid; + uint32_t grid_size; + uint32_t workgroup_size; + uint32_t lds_size; + uint32_t scratch_size; + uint32_t vgpr; + uint32_t sgpr; + uint32_t fbarrier_count; + uint64_t signal_handle; + uint64_t object; + const char* kernel_name; + bool record; + uint64_t dispatch_time; + uint64_t begin; + uint64_t end; + uint64_t complete; +}; + +void kernel_flush_cb(kernel_trace_entry_t* entry){ fprintf(result_file_handle, "dispatch[%u], gpu-id(%u), queue-id(%u), queue-index(%lu), pid(%u), tid(%u), grd(%u), wgr(%u), lds(%u), scr(%u), vgpr(%u), sgpr(%u), fbar(%u), sig(0x%lx), obj(0x%lx), kernel-name(\"%s\")", - dispatch, - gpu_id, - queue_id, - queue_index, - pid, - tid, - grid_size, - workgroup_size, - lds_size, - scratch_size, - vgpr, - sgpr, - fbarrier_count, - signal_handle, - object, - kernel_name); - if (record) fprintf(result_file_handle, ", time(%lu,%lu,%lu,%lu)", - dispatch_time, - begin, - end, - complete); + entry->dispatch, + entry->gpu_id, + entry->queue_id, + entry->queue_index, + entry->pid, + entry->tid, + entry->grid_size, + entry->workgroup_size, + entry->lds_size, + entry->scratch_size, + entry->vgpr, + entry->sgpr, + entry->fbarrier_count, + entry->signal_handle, + entry->object, + entry->kernel_name); + if (entry->record) fprintf(result_file_handle, ", time(%lu,%lu,%lu,%lu)", + entry->dispatch_time, + entry->begin, + entry->end, + entry->complete); fprintf(result_file_handle, "\n"); fflush(result_file_handle); } @@ -448,10 +452,10 @@ bool dump_context_entry(context_entry_t* entry, bool to_clean = true) { if (index != UINT32_MAX) { const std::string nik_name = (to_truncate_names == 0) ? entry->data.kernel_name : filtr_kernel_name(entry->data.kernel_name); const AgentInfo* agent_info = HsaRsrcFactory::Instance().GetAgentInfo(entry->agent); - kernel_flush_cb(index, + kernel_trace_entry_t kernel_trace_entry = {entry->index, agent_info->dev_index, entry->data.queue_id, - entry->data.queue_index, + entry->data.queue_index, my_pid, entry->data.thread_id, entry->kernel_properties.grid_size, @@ -468,7 +472,8 @@ bool dump_context_entry(context_entry_t* entry, bool to_clean = true) { record != NULL ? record->dispatch: 0, record != NULL ? record->begin : 0, record != NULL ? record->end : 0, - record != NULL ? record->complete : 0); + record != NULL ? record->complete : 0}; + kernel_flush_cb(&kernel_trace_entry); } if (record && to_clean) { delete record; From 769987e75bd2b5b59bb980a81ee906598233fd23 Mon Sep 17 00:00:00 2001 From: yoann-heitz Date: Thu, 23 Sep 2021 10:53:23 -0400 Subject: [PATCH 6/8] Move metric_trace_entry_t to a new header Definition of metric_trace_entry_t has been moved to a new header. This new header will contain the definitions of all the _trace_entry_t types used to flush the payloads of the events. The purpose of this modification is to have only one header that needs to be included by developers that want to implement new plugins with the interface proposal. --- inc/rocprofiler_trace_entries.h | 12 ++++++++++++ test/tool/tool.cpp | 7 +------ 2 files changed, 13 insertions(+), 6 deletions(-) create mode 100644 inc/rocprofiler_trace_entries.h diff --git a/inc/rocprofiler_trace_entries.h b/inc/rocprofiler_trace_entries.h new file mode 100644 index 00000000..d94bc51a --- /dev/null +++ b/inc/rocprofiler_trace_entries.h @@ -0,0 +1,12 @@ +#ifndef INC_ROCTRACER_TRACE_ENTRIES_H_ +#define INC_ROCTRACER_TRACE_ENTRIES_H_ + +#include + +struct metric_trace_entry_t { + uint32_t dispatch; + const char* name; + uint64_t result; +}; + +#endif \ No newline at end of file diff --git a/test/tool/tool.cpp b/test/tool/tool.cpp index 06d11c13..7afc1400 100644 --- a/test/tool/tool.cpp +++ b/test/tool/tool.cpp @@ -49,6 +49,7 @@ THE SOFTWARE. #include #include "inc/rocprofiler.h" +#include "inc/rocprofiler_trace_entries.h" #include "util/hsa_rsrc_factory.h" #include "util/xml.h" @@ -337,12 +338,6 @@ unsigned align_size(unsigned size, unsigned alignment) { return ((size + alignment - 1) & ~(alignment - 1)); } -struct metric_trace_entry_t { - uint32_t dispatch; - const char* name; - uint64_t result; -}; - void metric_flush_cb(metric_trace_entry_t *entry){ fprintf(result_file_handle, " %s ", entry->name); fprintf(result_file_handle, "(%lu)\n", entry->result); From 1cfc727a4e2e03003bff4fa0bc7347cbcd930f57 Mon Sep 17 00:00:00 2001 From: yoann-heitz Date: Thu, 23 Sep 2021 10:54:55 -0400 Subject: [PATCH 7/8] Update CMakeLists for new header Added the new header in list of headers. It will allow the new header to be copied to /opt/rocm/include/rocprofiler and /opt/rocm/rocprofiler/include when installing rocprofiler --- CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index c34f7cc9..144bc965 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -145,10 +145,12 @@ install ( TARGETS ${ROCPROFILER_TARGET} LIBRARY DESTINATION ${DEST_NAME}/lib ) install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/rocprofiler.h ${CMAKE_CURRENT_SOURCE_DIR}/src/core/activity.h + ${CMAKE_CURRENT_SOURCE_DIR}/inc/rocprofiler_trace_entries.h DESTINATION ${DEST_NAME}/include ) install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/rocprofiler.h ${CMAKE_CURRENT_SOURCE_DIR}/src/core/activity.h + ${CMAKE_CURRENT_SOURCE_DIR}/inc/rocprofiler_trace_entries.h DESTINATION include/${DEST_NAME} ) # rpl_run.sh tblextr.py txt2xml.sh install ( FILES From 0782afed3f3cf4808ca32cbe45b8e4bb33050f85 Mon Sep 17 00:00:00 2001 From: yoann-heitz Date: Thu, 23 Sep 2021 11:03:30 -0400 Subject: [PATCH 8/8] Move kernel_trace_entry_t to rocprofiler_trace_entries.h --- inc/rocprofiler_trace_entries.h | 24 ++++++++++++++++++++++++ test/tool/tool.cpp | 24 ------------------------ 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/inc/rocprofiler_trace_entries.h b/inc/rocprofiler_trace_entries.h index d94bc51a..6611b615 100644 --- a/inc/rocprofiler_trace_entries.h +++ b/inc/rocprofiler_trace_entries.h @@ -9,4 +9,28 @@ struct metric_trace_entry_t { uint64_t result; }; +struct kernel_trace_entry_t { + uint32_t dispatch; + uint32_t gpu_id; + uint32_t queue_id; + uint64_t queue_index; + uint32_t pid; + uint32_t tid; + uint32_t grid_size; + uint32_t workgroup_size; + uint32_t lds_size; + uint32_t scratch_size; + uint32_t vgpr; + uint32_t sgpr; + uint32_t fbarrier_count; + uint64_t signal_handle; + uint64_t object; + const char* kernel_name; + bool record; + uint64_t dispatch_time; + uint64_t begin; + uint64_t end; + uint64_t complete; +}; + #endif \ No newline at end of file diff --git a/test/tool/tool.cpp b/test/tool/tool.cpp index 7afc1400..ca9f506f 100644 --- a/test/tool/tool.cpp +++ b/test/tool/tool.cpp @@ -376,30 +376,6 @@ void output_group(const context_entry_t* entry, const char* label) { } } -struct kernel_trace_entry_t { - uint32_t dispatch; - uint32_t gpu_id; - uint32_t queue_id; - uint64_t queue_index; - uint32_t pid; - uint32_t tid; - uint32_t grid_size; - uint32_t workgroup_size; - uint32_t lds_size; - uint32_t scratch_size; - uint32_t vgpr; - uint32_t sgpr; - uint32_t fbarrier_count; - uint64_t signal_handle; - uint64_t object; - const char* kernel_name; - bool record; - uint64_t dispatch_time; - uint64_t begin; - uint64_t end; - uint64_t complete; -}; - void kernel_flush_cb(kernel_trace_entry_t* entry){ fprintf(result_file_handle, "dispatch[%u], gpu-id(%u), queue-id(%u), queue-index(%lu), pid(%u), tid(%u), grd(%u), wgr(%u), lds(%u), scr(%u), vgpr(%u), sgpr(%u), fbar(%u), sig(0x%lx), obj(0x%lx), kernel-name(\"%s\")", entry->dispatch,