diff --git a/CMakeLists.txt b/CMakeLists.txt index ac21227f..12574d1e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -152,6 +152,7 @@ install ( FILES DESTINATION include/${DEST_NAME} ) # rpl_run.sh tblextr.py txt2xml.sh install ( FILES + ${CMAKE_CURRENT_SOURCE_DIR}/bin/merge_traces.sh ${CMAKE_CURRENT_SOURCE_DIR}/bin/rpl_run.sh ${CMAKE_CURRENT_SOURCE_DIR}/bin/txt2xml.sh ${CMAKE_CURRENT_SOURCE_DIR}/bin/txt2params.py @@ -203,7 +204,7 @@ else() endif() message ( "Using CPACK_DEBIAN_PACKAGE_RELEASE ${CPACK_DEBIAN_PACKAGE_RELEASE}" ) set ( CPACK_DEBIAN_FILE_NAME "DEB-DEFAULT" ) -set ( CPACK_DEBIAN_PACKAGE_DEPENDS "hsa-rocr-dev" ) +set ( CPACK_DEBIAN_PACKAGE_DEPENDS "hsa-rocr-dev, rocm-core" ) ## Process the Debian install/remove scripts to update the CPACK variables configure_file ( ${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/postinst.in DEBIAN/postinst @ONLY ) configure_file ( ${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/prerm.in DEBIAN/prerm @ONLY ) @@ -228,12 +229,17 @@ if ( PROC_RESULT EQUAL "0" AND NOT EVAL_RESULT STREQUAL "" ) string ( APPEND CPACK_RPM_PACKAGE_RELEASE "%{?dist}" ) endif() set ( CPACK_RPM_FILE_NAME "RPM-DEFAULT" ) -set ( CPACK_RPM_PACKAGE_DEPENDS "hsa-rocr-dev" ) +set ( CPACK_RPM_PACKAGE_REQUIRES "hsa-rocr-dev, rocm-core" ) ## Process the Rpm install/remove scripts to update the CPACK variables configure_file ( "${CMAKE_CURRENT_SOURCE_DIR}/RPM/post.in" RPM/post @ONLY ) configure_file ( "${CMAKE_CURRENT_SOURCE_DIR}/RPM/postun.in" RPM/postun @ONLY ) set ( CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${CMAKE_CURRENT_BINARY_DIR}/RPM/post" ) set ( CPACK_RPM_POST_UNINSTALL_SCRIPT_FILE "${CMAKE_CURRENT_BINARY_DIR}/RPM/postun" ) +# Remove dependency on rocm-core if -DROCM_DEP_ROCMCORE=ON not given to cmake +if(NOT ROCM_DEP_ROCMCORE) + string(REGEX REPLACE ",? ?rocm-core" "" CPACK_RPM_PACKAGE_REQUIRES ${CPACK_RPM_PACKAGE_REQUIRES}) + string(REGEX REPLACE ",? ?rocm-core" "" CPACK_DEBIAN_PACKAGE_DEPENDS ${CPACK_DEBIAN_PACKAGE_DEPENDS}) +endif() include ( CPack ) diff --git a/bin/tblextr.py b/bin/tblextr.py index 66be2739..c58b4fac 100755 --- a/bin/tblextr.py +++ b/bin/tblextr.py @@ -115,7 +115,10 @@ def parse_res(infile): beg_pattern = re.compile("^dispatch\[(\d*)\], (.*) kernel-name\(\"([^\"]*)\"\)") prop_pattern = re.compile("([\w-]+)\((\w+)\)"); ts_pattern = re.compile(", time\((\d*),(\d*),(\d*),(\d*)\)") - var_pattern = re.compile("^\s*([^\s]*)\s+\((\d*)\)") + # var pattern below matches a variable name and a variable value from a one + # line text in the format of for example "WRITE_SIZE (0.2500000000)" or + # "GRBM_GUI_ACTIVE (27867)" or "TA_TA_BUSY[0]" + var_pattern = re.compile("^\s*([a-zA-Z0-9_]+(?:\[\d+\])?)\s+\((\d+(?:\.\d+)?)\)") dispatch_number = 0 for line in inp.readlines(): diff --git a/src/core/context.h b/src/core/context.h index a8026dd3..f629ef1c 100644 --- a/src/core/context.h +++ b/src/core/context.h @@ -55,7 +55,7 @@ inline unsigned align_size(unsigned size, unsigned alignment) { template class MetricArgs : public xml::args_cache_t { public: MetricArgs(const Map& map) : map_(map) {} - bool Lookup(const std::string& name, uint64_t& result) const { + bool Lookup(const std::string& name, double& result) const { rocprofiler_feature_t* info = NULL; auto it = map_.find(name); if (it == map_.end()) EXC_RAISING(HSA_STATUS_ERROR, "var '" << name << "' is not found"); @@ -311,8 +311,8 @@ class Context { if (it == info_map_.end()) EXC_RAISING(HSA_STATUS_ERROR, "metric '" << name << "', rocprofiler info is not found " << this); rocprofiler_feature_t* info = it->second; - info->data.result_int64 = expr->Eval(args); - info->data.kind = ROCPROFILER_DATA_KIND_INT64; + info->data.result_double = expr->Eval(args); + info->data.kind = ROCPROFILER_DATA_KIND_DOUBLE; } } } diff --git a/src/core/gpu_command.cpp b/src/core/gpu_command.cpp index 48e4fba7..e337367b 100644 --- a/src/core/gpu_command.cpp +++ b/src/core/gpu_command.cpp @@ -97,13 +97,10 @@ struct gpu_cmd_fncomp_t { }; typedef std::map gpu_cmd_map_t; -typedef std::mutex gpu_cmd_mutex_t; -gpu_cmd_mutex_t gpu_cmd_mutex; - size_t GetGpuCommand(gpu_cmd_op_t op, const rocprofiler::util::AgentInfo* agent_info, packet_t** command_out) { - static gpu_cmd_map_t* map = NULL; + thread_local gpu_cmd_map_t map; // Getting chip-id uint32_t chip_id = 0; @@ -112,9 +109,7 @@ size_t GetGpuCommand(gpu_cmd_op_t op, if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_agent_get_info failed"); // Query/create a command - std::lock_guard lck(gpu_cmd_mutex); - if (map == NULL) map = new gpu_cmd_map_t; - auto ret = map->insert({gpu_cmd_key_t{op, chip_id}, gpu_cmd_entry_t{}}); + auto ret = map.insert({gpu_cmd_key_t{op, chip_id}, gpu_cmd_entry_t{}}); gpu_cmd_map_t::iterator it = ret.first; if (ret.second) { it->second.size = CreateGpuCommand(op, agent_info, it->second.command, Profile::LEGACY_SLOT_SIZE_PKT); diff --git a/src/core/rocprofiler.cpp b/src/core/rocprofiler.cpp index bbb97e3f..b50bd51d 100644 --- a/src/core/rocprofiler.cpp +++ b/src/core/rocprofiler.cpp @@ -400,42 +400,38 @@ PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t fa ONLOAD_TRACE_BEG(); rocprofiler::SaveHsaApi(table); rocprofiler::ProxyQueue::InitFactory(); - bool intercept_mode = false; // Checking environment to enable intercept mode const char* intercept_env = getenv("ROCP_HSA_INTERCEPT"); + + int intercept_env_value = 0; if (intercept_env != NULL) { - switch (atoi(intercept_env)) { - // Intercepting disabled + intercept_env_value = atoi(intercept_env); + + switch (intercept_env_value) { case 0: - intercept_mode = false; - rocprofiler::InterceptQueue::TrackerOn(false); - break; - // Intercepting enabled without timestamping case 1: - intercept_mode = true; + // 0: Intercepting disabled + // 1: Intercepting enabled without timestamping rocprofiler::InterceptQueue::TrackerOn(false); break; - // Intercepting enabled with timestamping case 2: - intercept_mode = true; + // Intercepting enabled with timestamping rocprofiler::InterceptQueue::TrackerOn(true); break; default: - ERR_LOGGING("Bad ROCP_HSA_INTERCEPT env var value (" << intercept_env << ")"); + ERR_LOGGING("Bad ROCP_HSA_INTERCEPT env var value (" << intercept_env << "): " << + "valid values are 0 (standalone), 1 (intercepting without timestamp), 2 (intercepting with timestamp)"); return false; } } + // always enable excutable tracking + rocprofiler::util::HsaRsrcFactory::EnableExecutableTracking(table); + // Loading a tool lib and setting of intercept mode const uint32_t intercept_mode_mask = rocprofiler::LoadTool(); - if (intercept_mode_mask & rocprofiler::DISPATCH_INTERCEPT_MODE) { - intercept_mode = true; - } - if (intercept_mode_mask & rocprofiler::CODE_OBJ_TRACKING_MODE) { - if (intercept_mode == false) EXC_RAISING(HSA_STATUS_ERROR, "code objects tracking without intercept mode enabled"); - rocprofiler::util::HsaRsrcFactory::EnableExecutableTracking(table); - } + if (intercept_mode_mask & rocprofiler::MEMCOPY_INTERCEPT_MODE) { hsa_status_t status = hsa_amd_profiling_async_copy_enable(true); if (status != HSA_STATUS_SUCCESS) EXC_ABORT(status, "hsa_amd_profiling_async_copy_enable"); @@ -453,14 +449,14 @@ PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t fa } // HSA intercepting - if (intercept_mode) { + if (intercept_env_value != 0) { rocprofiler::ProxyQueue::HsaIntercept(table); rocprofiler::InterceptQueue::HsaIntercept(table); } else { rocprofiler::StandaloneIntercept(); } - ONLOAD_TRACE("end intercept_mode(" << std::hex << intercept_mode << ")" << + ONLOAD_TRACE("end intercept_mode(" << std::hex << intercept_env_value << ")" << " intercept_mode_mask(" << std::hex << intercept_mode_mask << ")" << std::dec); return true; } diff --git a/src/util/hsa_rsrc_factory.cpp b/src/util/hsa_rsrc_factory.cpp index 9d980312..0a5a7ec9 100644 --- a/src/util/hsa_rsrc_factory.cpp +++ b/src/util/hsa_rsrc_factory.cpp @@ -737,6 +737,12 @@ const char* HsaRsrcFactory::GetKernelNameRef(uint64_t addr) { void HsaRsrcFactory::EnableExecutableTracking(HsaApiTable* table) { std::lock_guard lck(mutex_); + // Prevent infinite recursion + // + if (hsa_api_.hsa_executable_freeze == hsa_executable_freeze && + hsa_api_.hsa_executable_destroy == hsa_executable_destroy) + return; + executable_tracking_on_ = true; table->core_->hsa_executable_freeze_fn = hsa_executable_freeze_interceptor; table->core_->hsa_executable_destroy_fn = hsa_executable_destroy_interceptor; diff --git a/src/xml/expr.h b/src/xml/expr.h index 731e25e4..7f754b4c 100644 --- a/src/xml/expr.h +++ b/src/xml/expr.h @@ -29,6 +29,7 @@ THE SOFTWARE. #include #include #include +#include namespace xml { class exception_t : public std::exception { @@ -45,8 +46,8 @@ class div_zero_exception_t : public exception_t { explicit div_zero_exception_t(const std::string& msg) : exception_t("Divide by zero exception " + msg) {} }; -typedef uint64_t args_t; -static const args_t ARGS_MAX = UINT64_MAX; +typedef double args_t; +static const args_t ARGS_MAX = DBL_MAX; typedef std::map args_map_t; class Expr; diff --git a/test/app/intercept_test.cpp b/test/app/intercept_test.cpp index e62bf6ce..bbcdf806 100644 --- a/test/app/intercept_test.cpp +++ b/test/app/intercept_test.cpp @@ -124,6 +124,9 @@ void dump_context_entry(context_entry_t* entry, rocprofiler_feature_t* features, case ROCPROFILER_DATA_KIND_INT64: fprintf(stdout, "= (%lu)\n", p->data.result_int64); break; + case ROCPROFILER_DATA_KIND_DOUBLE: + fprintf(stdout, "= (%lf)\n", p->data.result_double); + break; default: fprintf(stderr, "Undefined data kind(%u)\n", p->data.kind); abort(); diff --git a/test/app/standalone_test.cpp b/test/app/standalone_test.cpp index 34bc05ea..1344e0eb 100644 --- a/test/app/standalone_test.cpp +++ b/test/app/standalone_test.cpp @@ -78,6 +78,9 @@ void print_features(rocprofiler_feature_t* feature, uint32_t feature_count) { case ROCPROFILER_DATA_KIND_INT64: std::cout << std::dec << " result64 (" << p->data.result_int64 << ")" << std::endl; break; + case ROCPROFILER_DATA_KIND_DOUBLE: + std::cout << " result64 (" << p->data.result_double << ")" << std::endl; + break; case ROCPROFILER_DATA_KIND_BYTES: { const char* ptr = reinterpret_cast(p->data.result_bytes.ptr); uint64_t size = 0; diff --git a/test/run.sh b/test/run.sh index f4f07166..135d3bb3 100755 --- a/test/run.sh +++ b/test/run.sh @@ -57,7 +57,7 @@ eval_test() { } # paths to ROC profiler and oher libraries -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PWD:$PWD/../../lib:/home/jenkins/compute-package/lib +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PWD:$PWD/../../lib:/opt/rocm/lib # enable tools load failure reporting export HSA_TOOLS_REPORT_LOAD_FAILURE=1 diff --git a/test/tool/tool.cpp b/test/tool/tool.cpp index 4bdce5dd..d24100c6 100644 --- a/test/tool/tool.cpp +++ b/test/tool/tool.cpp @@ -351,6 +351,9 @@ void output_results(const context_entry_t* entry, const char* label) { case ROCPROFILER_DATA_KIND_INT64: fprintf(file, "(%lu)\n", p->data.result_int64); break; + case ROCPROFILER_DATA_KIND_DOUBLE: + fprintf(file, "(%.10lf)\n", p->data.result_double); + break; default: fprintf(stderr, "RPL-tool: undefined data kind(%u)\n", p->data.kind); abort(); @@ -358,12 +361,13 @@ void output_results(const context_entry_t* entry, const char* label) { } } -// Output group intermeadate profiling results, created internally for complex metrics +// Output group intermediate profiling results, created internally for complex metrics void output_group(const context_entry_t* entry, const char* label) { const rocprofiler_group_t* group = &(entry->group); context_entry_t group_entry = *entry; for (unsigned i = 0; i < group->feature_count; ++i) { - if (group->features[i]->data.kind == ROCPROFILER_DATA_KIND_INT64) { + if (group->features[i]->data.kind == ROCPROFILER_DATA_KIND_INT64 || + group->features[i]->data.kind == ROCPROFILER_DATA_KIND_DOUBLE) { group_entry.features = group->features[i]; group_entry.feature_count = 1; output_results(&group_entry, label); @@ -1126,6 +1130,10 @@ extern "C" PUBLIC_API void OnLoadToolProp(rocprofiler_settings_t* settings) const uint32_t features_found = metrics_vec.size(); + if (!features_found) { + CTX_OUTSTANDING_MAX = 0; + } + // Context array aloocation context_array = new context_array_t;