From 125e1b17d8335351f6bee11f1d4514afaf603a72 Mon Sep 17 00:00:00 2001 From: yoann-heitz Date: Tue, 21 Sep 2021 06:19:02 -0400 Subject: [PATCH 1/3] allow fine-grained control on several APIs Deleted a line that makes rocprof abort when using fine grained control one several APIs at a time. The parsing function in libtracer_tool.so that parses the input XML file that specifies the functions for which the user wants to activate the tracing callbacks seems to handle correctly cases where fine-grained control is used on several APIs. This line can be safely deleted. --- test/tool/tool.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/test/tool/tool.cpp b/test/tool/tool.cpp index 4bdce5dd..3773d6df 100644 --- a/test/tool/tool.cpp +++ b/test/tool/tool.cpp @@ -1108,7 +1108,6 @@ extern "C" PUBLIC_API void OnLoadToolProp(rocprofiler_settings_t* settings) // Getting traces const auto traces_list = xml->GetNodes("top.trace"); - if (traces_list.size() > 1) fatal("ROCProfiler: only one trace supported at a time"); const unsigned feature_count = metrics_vec.size() + traces_list.size(); rocprofiler_feature_t* features = new rocprofiler_feature_t[feature_count]; From 2b0aab57b1b2cd0006de368d7d2272b75e1d6c8c Mon Sep 17 00:00:00 2001 From: yoann-heitz Date: Wed, 22 Sep 2021 07:44:35 -0400 Subject: [PATCH 2/3] Isolate flushing instructions for metrics Isolated flushing instructions into a wrapping function. The file descriptor used for flushing is the one that is globally defined and not the one stored in the context_entry_t object as it is always the one that is defined globally that is used. If using a globally defined file descriptor is problematic (however it is the norm in the libtracer_tool.so library in ROCTracer) rather than passing it through arguments, the wrapping flushing function can be modified to take a void pointer. The file descriptor can then be passed through this pointer and be used in the default flushing function. --- test/tool/tool.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/test/tool/tool.cpp b/test/tool/tool.cpp index 3773d6df..6cac8f20 100644 --- a/test/tool/tool.cpp +++ b/test/tool/tool.cpp @@ -337,19 +337,21 @@ unsigned align_size(unsigned size, unsigned alignment) { return ((size + alignment - 1) & ~(alignment - 1)); } +void metric_flush_cb(const char *name, uint64_t result){ + fprintf(result_file_handle, " %s ", name); + fprintf(result_file_handle, "(%lu)\n", result); +} // Output profiling results for input features void output_results(const context_entry_t* entry, const char* label) { - FILE* file = entry->file_handle; const rocprofiler_feature_t* features = entry->features; const unsigned feature_count = entry->feature_count; for (unsigned i = 0; i < feature_count; ++i) { const rocprofiler_feature_t* p = &features[i]; - fprintf(file, " %s ", p->name); switch (p->data.kind) { // Output metrics results case ROCPROFILER_DATA_KIND_INT64: - fprintf(file, "(%lu)\n", p->data.result_int64); + metric_flush_cb(p->name, p->data.result_int64); break; default: fprintf(stderr, "RPL-tool: undefined data kind(%u)\n", p->data.kind); From 5ccee80b5b32f7cd6c01b68134f652f9c454df4a Mon Sep 17 00:00:00 2001 From: yoann-heitz Date: Wed, 22 Sep 2021 08:16:59 -0400 Subject: [PATCH 3/3] Isolate flushing instructions for kernel events Isolated flushing instructions into a wrapping function. The file descriptor used for flushing is the one that is globally defined and not the one stored in the context_entry_t object as it is always the one that is defined globally that is used. If using a globally defined file descriptor is problematic (however it is the norm in the libtracer_tool.so library in ROCTracer) rather than passing it through arguments, the wrapping flushing function can be modified to take a void pointer. The file descriptor can then be passed through this pointer and be used in the default flushing function. --- test/tool/tool.cpp | 66 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 54 insertions(+), 12 deletions(-) diff --git a/test/tool/tool.cpp b/test/tool/tool.cpp index 6cac8f20..8c9267e8 100644 --- a/test/tool/tool.cpp +++ b/test/tool/tool.cpp @@ -373,6 +373,53 @@ void output_group(const context_entry_t* entry, const char* label) { } } +void kernel_flush_cb( uint32_t dispatch, + uint32_t gpu_id, + uint32_t queue_id, + uint64_t queue_index, + uint32_t pid, + uint32_t tid, + uint32_t grid_size, + uint32_t workgroup_size, + uint32_t lds_size, + uint32_t scratch_size, + uint32_t vgpr, + uint32_t sgpr, + uint32_t fbarrier_count, + uint64_t signal_handle, + uint64_t object, + const char* kernel_name, + bool record, + uint64_t dispatch_time, + uint64_t begin, + uint64_t end, + uint64_t complete){ + fprintf(result_file_handle, "dispatch[%u], gpu-id(%u), queue-id(%u), queue-index(%lu), pid(%u), tid(%u), grd(%u), wgr(%u), lds(%u), scr(%u), vgpr(%u), sgpr(%u), fbar(%u), sig(0x%lx), obj(0x%lx), kernel-name(\"%s\")", + dispatch, + gpu_id, + queue_id, + queue_index, + pid, + tid, + grid_size, + workgroup_size, + lds_size, + scratch_size, + vgpr, + sgpr, + fbarrier_count, + signal_handle, + object, + kernel_name); + if (record) fprintf(result_file_handle, ", time(%lu,%lu,%lu,%lu)", + dispatch_time, + begin, + end, + complete); + fprintf(result_file_handle, "\n"); + fflush(result_file_handle); +} + // Dump stored context entry bool dump_context_entry(context_entry_t* entry, bool to_clean = true) { hsa_status_t status = HSA_STATUS_ERROR; @@ -391,12 +438,9 @@ bool dump_context_entry(context_entry_t* entry, bool to_clean = true) { const uint32_t index = entry->index; if (index != UINT32_MAX) { - FILE* file_handle = entry->file_handle; const std::string nik_name = (to_truncate_names == 0) ? entry->data.kernel_name : filtr_kernel_name(entry->data.kernel_name); const AgentInfo* agent_info = HsaRsrcFactory::Instance().GetAgentInfo(entry->agent); - - fprintf(file_handle, "dispatch[%u], gpu-id(%u), queue-id(%u), queue-index(%lu), pid(%u), tid(%u), grd(%u), wgr(%u), lds(%u), scr(%u), vgpr(%u), sgpr(%u), fbar(%u), sig(0x%lx), obj(0x%lx), kernel-name(\"%s\")", - index, + kernel_flush_cb(index, agent_info->dev_index, entry->data.queue_id, entry->data.queue_index, @@ -411,14 +455,12 @@ bool dump_context_entry(context_entry_t* entry, bool to_clean = true) { entry->kernel_properties.fbarrier_count, entry->kernel_properties.signal.handle, entry->kernel_properties.object, - nik_name.c_str()); - if (record) fprintf(file_handle, ", time(%lu,%lu,%lu,%lu)", - record->dispatch, - record->begin, - record->end, - record->complete); - fprintf(file_handle, "\n"); - fflush(file_handle); + nik_name.c_str(), + record != NULL ? true : false, + record != NULL ? record->dispatch: 0, + record != NULL ? record->begin : 0, + record != NULL ? record->end : 0, + record != NULL ? record->complete : 0); } if (record && to_clean) { delete record;