From b0adef77f276ca821db76ad3540a8f6f8792b44b Mon Sep 17 00:00:00 2001 From: Sherry Yuan Date: Mon, 4 Apr 2022 09:12:15 -0700 Subject: [PATCH] [Device Global] Autodiscovery change Autodiscovery help bridge the communication between backend and runtime. The autodiscovery will contain the following information for a device global: 1. device global name 2. device global address 3. device global size The autodiscovery will be in the following format: [(,
, ), (...), ...] --- include/acl.h | 11 +++++ src/acl_auto_configure.cpp | 82 ++++++++++++++++++++++++++++++++ test/acl_auto_configure_test.cpp | 63 ++++++++++++++++-------- 3 files changed, 137 insertions(+), 19 deletions(-) diff --git a/include/acl.h b/include/acl.h index a351c938..73215939 100644 --- a/include/acl.h +++ b/include/acl.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -478,6 +479,12 @@ typedef class acl_device_program_info_t *acl_device_program_info; */ #define ACL_MEM_CAPABILITY_P2P (1 << 3) +// Definition of device global. +struct acl_device_global_mem_def_t { + uint32_t address; + uint32_t size; +}; + // Part of acl_device_def_t where members are populated from the information // in the autodiscovery string. This will get updated every time the device // is programmed with a new device binary as the new binary would contain a @@ -496,6 +503,10 @@ typedef struct acl_device_def_autodiscovery_t { std::array global_mem_defs; std::vector acl_hostpipe_info; + + // Device global definition. + std::unordered_map + device_global_mem_defs; } acl_device_def_autodiscovery_t; typedef struct acl_device_def_t { diff --git a/src/acl_auto_configure.cpp b/src/acl_auto_configure.cpp index 320264ce..33bc7068 100644 --- a/src/acl_auto_configure.cpp +++ b/src/acl_auto_configure.cpp @@ -99,6 +99,25 @@ static bool read_uint_counters(const std::string &str, return true; } +// Reads the next word in str and converts it into an unsigned. +// Returns true if a valid integer was read or false if an error occurred. +// pos is updated to the position immediately following the parsed word +// even if an error occurs. +static bool read_uint32_counters(const std::string &str, + std::string::size_type &pos, uint32_t &val, + std::vector &counters) noexcept { + std::string result; + pos = read_word(str, pos, result); + decrement_section_counters(counters); + try { + val = static_cast(std::stoul(result)); + } catch (const std::exception &e) { + UNREFERENCED_PARAMETER(e); + return false; + } + return true; +} + // Reads the next word in str and converts it into an unsigned. // Returns true if a valid integer was read or false if an error occurred. // pos is updated to the position immediately following the parsed word @@ -493,6 +512,69 @@ bool acl_load_device_def_from_str(const std::string &config_str, counters); } + // Read device global information. + unsigned int num_device_global = 0; + if (result && counters.back() > 0) { + result = + read_uint_counters(config_str, curr_pos, num_device_global, counters); + + // read total number of fields in device global + unsigned int total_fields_device_global = 0; + if (result) { + result = read_uint_counters(config_str, curr_pos, + total_fields_device_global, counters); + } + + for (auto i = 0U; result && (i < num_device_global); + i++) { // device_global_memories + counters.emplace_back(total_fields_device_global); + + // read device global name + std::string device_global_name; + if (result && counters.back() > 0) { + result = read_string_counters(config_str, curr_pos, device_global_name, + counters); + } + + // read device global address + uint32_t dev_global_addr = 0; // Default + if (result && counters.back() > 0) { + result = read_uint32_counters(config_str, curr_pos, dev_global_addr, + counters); + } + // read device global address size + uint32_t dev_global_size = 0; // Default + if (result && counters.back() > 0) { + result = read_uint32_counters(config_str, curr_pos, dev_global_size, + counters); + } + + acl_device_global_mem_def_t dev_global_def = {dev_global_addr, + dev_global_size}; + bool ok = devdef.device_global_mem_defs + .insert({device_global_name, dev_global_def}) + .second; + if (!ok) { + // Device global name already exist in map, but it should have been + // unique. + err_ss << "Device global name should be unique. " << device_global_name + << " is repeated.\n"; + result = false; + } + + // forward compatibility: bypassing remaining fields at the end of device + // global memory + while (result && counters.size() > 0 && + counters.back() > 0) { // total_fields_device_global>0 + std::string tmp; + result = + result && read_string_counters(config_str, curr_pos, tmp, counters); + check_section_counters(counters); + } + counters.pop_back(); // removing total_fields_device_global + } // device_global_memories + } + // forward compatibility: bypassing remaining fields at the end of device // description section while (result && counters.size() > 0 && diff --git a/test/acl_auto_configure_test.cpp b/test/acl_auto_configure_test.cpp index 65f950bd..64bec97c 100644 --- a/test/acl_auto_configure_test.cpp +++ b/test/acl_auto_configure_test.cpp @@ -36,6 +36,7 @@ TEST(auto_configure, simple) { #define VERSIONIDSTRINGIFY(x) #x #define VERSIONIDTOSTR(x) VERSIONIDSTRINGIFY(x) #define DEVICE_FIELDS " 23" +#define DEVICE_FIELDS_DEV_GLOBAL " 30" #define DEVICE_FIELDS_OLD " 18" #define BOARDNAME "de4_gen2x4_swdimm" #define BOARDNAME2 "pcie385_a7" @@ -96,24 +97,31 @@ TEST(auto_configure, simple) { #define IS_SYCL_COMPILE " 1" #define IS_NOT_SYCL_COMPILE " 0" +// Device global autodiscovery entries +#define NUM_DEV_GLOBAL " 2" +#define NUM_DEV_GLOBAL_FIELD " 3" // containing dev_globa_name, address, size +#define DEV_GLOBAL_1 \ + " kernel15_dev_global 4096 2048" // in format of dev_globa_name, address, size +#define DEV_GLOBAL_2 " kernel15_dev_global2 2048 1024" + int parsed; std::string err_str; - ACL_LOCKED( - parsed = acl_load_device_def_from_str( - std::string( - VERSIONIDTOSTR(ACL_AUTO_CONFIGURE_VERSIONID) - DEVICE_FIELDS RANDOM_HASH - " " BOARDNAME IS_NOT_BIG_ENDIAN MEM HOSTPIPE KERNEL_ARG_INFO_NONE - " 1 82 foo" KERNEL_CRA KERNEL_FAST_LAUNCH_DEPTH KERNEL_PERF_MON - KERNEL_WORKGROUP_VARIANT KERNEL_WORKITEM_VARIANT - KERNEL_NUM_VECTOR_LANES1 KERNEL_PROFILE_SCANCHAIN_LENGTH - ARGS_LOCAL_GLOBAL_LONG_PROF KERNEL_PRINTF_FORMATSTRINGS - LD_1024 KERNEL_REQD_WORK_GROUP_SIZE_NONE - KERNEL_MAX_WORK_GROUP_SIZE_NONE - KERNEL_MAX_GLOBAL_WORK_DIM_NONE - KERNEL_USES_GLOBAL_WORK_OFFSET_ENABLED - IS_SYCL_COMPILE), - m_device_def.autodiscovery_def, err_str)); + std::string autodiscovery = std::string( + VERSIONIDTOSTR(ACL_AUTO_CONFIGURE_VERSIONID) + DEVICE_FIELDS_DEV_GLOBAL RANDOM_HASH + " " BOARDNAME IS_NOT_BIG_ENDIAN MEM HOSTPIPE KERNEL_ARG_INFO_NONE + NUM_DEV_GLOBAL NUM_DEV_GLOBAL_FIELD DEV_GLOBAL_1 DEV_GLOBAL_2 + " 1 82 foo" KERNEL_CRA KERNEL_FAST_LAUNCH_DEPTH KERNEL_PERF_MON + KERNEL_WORKGROUP_VARIANT KERNEL_WORKITEM_VARIANT + KERNEL_NUM_VECTOR_LANES1 KERNEL_PROFILE_SCANCHAIN_LENGTH + ARGS_LOCAL_GLOBAL_LONG_PROF KERNEL_PRINTF_FORMATSTRINGS + LD_1024 KERNEL_REQD_WORK_GROUP_SIZE_NONE + KERNEL_MAX_WORK_GROUP_SIZE_NONE + KERNEL_MAX_GLOBAL_WORK_DIM_NONE + KERNEL_USES_GLOBAL_WORK_OFFSET_ENABLED + IS_SYCL_COMPILE); + ACL_LOCKED(parsed = acl_load_device_def_from_str( + autodiscovery, m_device_def.autodiscovery_def, err_str)); CHECK_EQUAL(1, parsed); CHECK_EQUAL(1, m_device_def.autodiscovery_def.num_global_mem_systems); @@ -261,6 +269,23 @@ TEST(auto_configure, simple) { (int)m_device_def.autodiscovery_def.accel[0].max_work_group_size); CHECK_EQUAL(1, (int)m_device_def.autodiscovery_def.accel[0].is_sycl_compile); + // Checks for device global entry. + CHECK_EQUAL(2, m_device_def.autodiscovery_def.device_global_mem_defs.size()); + const auto kernel15_dev_global = + m_device_def.autodiscovery_def.device_global_mem_defs.find( + "kernel15_dev_global"); + const auto kernel15_dev_global2 = + m_device_def.autodiscovery_def.device_global_mem_defs.find( + "kernel15_dev_global2"); + CHECK(kernel15_dev_global != + m_device_def.autodiscovery_def.device_global_mem_defs.end()); + CHECK(kernel15_dev_global2 != + m_device_def.autodiscovery_def.device_global_mem_defs.end()); + CHECK_EQUAL(4096, kernel15_dev_global->second.address); + CHECK_EQUAL(2048, kernel15_dev_global->second.size); + CHECK_EQUAL(2048, kernel15_dev_global2->second.address); + CHECK_EQUAL(1024, kernel15_dev_global2->second.size); + // Check a second parsing. // It should allocate a new string for the name. ACL_LOCKED( @@ -460,8 +485,8 @@ TEST(auto_configure, many_ok_forward_compatibility) { ACL_AUTO_CONFIGURE_VERSIONID) " 28 " "sample40byterandomhash000000000000000000 " "a10gx 0 1 15 DDR 2 1 6 0 2147483648 100 " - "100 100 100 200 200 200 200 0 0 0 0 400 " - "400 400 400 400 47 " + "100 100 100 200 200 200 200 0 0 0 0 2 " + "1 name1 1 name2 47 " "40 external_sort_stage_0 0 128 1 0 0 1 0 " "1 0 1 10 0 0 4 1 0 0 500 500 500 500 0 0 " "0 0 1 1 1 3 1 1 1 3 1 800 800 800 800 800 " @@ -1175,7 +1200,7 @@ TEST(auto_configure, hostpipe) { "200 " "2 9 host_to_dev 1 0 32 32768 300 300 300 " "300 dev_to_host 0 1 32 32768 300 300 300 " - "300 400 400 400 400 400 0 " + "300 400 1 3 name3 400 0 " "1 29 foo 0 128 1 0 0 1 0 1 0 0 0 0 0 0 1 " "1 1 3 1 1 1 3 1 800 800 800 800 800 900 " "900"