From ae0ac61e853360ca18ae5443ad323f4c63d9a77e Mon Sep 17 00:00:00 2001 From: AJ Date: Mon, 25 Nov 2024 12:53:47 -0800 Subject: [PATCH 01/18] feat: update --- CMakeLists.txt | 4 +- include/am_super/system_status_class.h | 95 ++++++++ include/am_super/transform_status_class.h | 26 ++ src/am_super/am_super.cpp | 19 ++ src/am_super/system_status_class.cpp | 282 ++++++++++++++++++++++ 5 files changed, 425 insertions(+), 1 deletion(-) create mode 100644 include/am_super/system_status_class.h create mode 100644 include/am_super/transform_status_class.h create mode 100644 src/am_super/system_status_class.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index c5334b0..7711119 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -30,6 +30,7 @@ set(dependencies # find dependencies find_package(ament_cmake REQUIRED) +find_library(PROCPS_LIBRARY procps REQUIRED) foreach(Dependency IN ITEMS ${dependencies}) find_package(${Dependency} REQUIRED) @@ -51,7 +52,8 @@ file(GLOB SUPER_MEDIATOR_FILES src/am_super/*_mediator.cpp ) -add_executable(am_super ${am_super_cpp_files}) +add_executable(am_super ${am_super_cpp_files} src/am_super/system_status_class.cpp) +target_link_libraries(am_super ${PROCPS_LIBRARY}) ament_target_dependencies(am_super ${dependencies}) install(DIRECTORY include/ diff --git a/include/am_super/system_status_class.h b/include/am_super/system_status_class.h new file mode 100644 index 0000000..90034f0 --- /dev/null +++ b/include/am_super/system_status_class.h @@ -0,0 +1,95 @@ +#ifndef AM_SUPER_INCLUDE_SYSTEM_STATUS_CLASS_H_ +#define AM_SUPER_INCLUDE_SYSTEM_STATUS_CLASS_H_ + +#include +#include + +#include +#include +#include +#include +#include + +namespace am +{ + +struct MemoryInfo +{ + unsigned long total; + unsigned long free; + unsigned long used; + unsigned long available; +}; + +struct GpuInfo +{ + std::string gpu_name; + int temp; + int mem_used; + int mem_free; + int util_percent; +}; + +struct CpuInfo +{ + unsigned long long user; + unsigned long long nice; + unsigned long long system; + unsigned long long idle; + unsigned long long iowait; + unsigned long long irq; + unsigned long long softirq; + unsigned long long steal; + unsigned long long total; +}; + +class SystemStatus +{ +public: + SystemStatus(); + + ~SystemStatus(); + + am::MemoryInfo& getMemoryInfo(); + + am::CpuInfo getCPUInfo(); + + void getGPUInfo(std::vector &gpu_infos); + + void getCPUInfo(std::vector &infos); + + double calculateCpuLoad(const am::CpuInfo &ci, const am::CpuInfo &ci_old); + + double getUpTime(); + + void updateInfos(); + + void print(); + +private: + + int getCPUCoresCount(); + + am::CpuInfo parseCpuLine(const std::string &line); + + int cpu_cnt_= -1; + + double cpu_usage_; + + double uptime_seconds_; + + bool is_first_time_ {true}; + + std::vector cpu_loads_; + + am::MemoryInfo mi; + + std::vector cpu_infos_; + + std::vector cpu_infos_old_; + + std::vector gpu_infos_; +}; +} + +#endif /*AM_SUPER_INCLUDE_SYSTEM_STATUS_CLASS_H_*/ \ No newline at end of file diff --git a/include/am_super/transform_status_class.h b/include/am_super/transform_status_class.h new file mode 100644 index 0000000..9493d4f --- /dev/null +++ b/include/am_super/transform_status_class.h @@ -0,0 +1,26 @@ +#ifndef AM_SUPER_INCLUDE_TRANSFORM_STATUS_CLASS_H_ +#define AM_SUPER_INCLUDE_TRANSFORM_STATUS_CLASS_H_ + +#include + +namespace am +{ + +class TransformStatus +{ +public: + + TransformStatus(); + + ~TransformStatus(); + + +private: + rclcpp::TimerBase::SharedPtr check_timer_; + + void checkTimerCB(); +}; + +} + +#endif /*AM_SUPER_INCLUDE_TRANSFORM_STATUS_CLASS_H_*/ diff --git a/src/am_super/am_super.cpp b/src/am_super/am_super.cpp index 1105089..ccc18ec 100644 --- a/src/am_super/am_super.cpp +++ b/src/am_super/am_super.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -207,6 +208,9 @@ class AMSuper rclcpp::Subscription::SharedPtr diagnostics_sub; rclcpp::Subscription::SharedPtr current_enu_sub; + rclcpp::TimerBase::SharedPtr system_check_timer_; + + rclcpp::Subscription::SharedPtr log_control_sub_; BagLogger::BagLoggerLevel log_level_; @@ -222,6 +226,8 @@ class AMSuper /** The current state of the system. */ SuperNodeMediator::Supervisor supervisor_; + std::shared_ptr system_status_; + /** * amount of time in seconds without hearing from a node that will cause it to go offline */ @@ -241,6 +247,8 @@ class AMSuper { ROS_INFO_STREAM( am::Node::node->get_name()); + system_status_ = std::make_shared(); + life_cycle_node_ = std::static_pointer_cast(am::Node::node); am::getParam("node_timeout_s", node_timeout_s_, 3.1); @@ -342,6 +350,9 @@ class AMSuper current_enu_sub = am::Node::node->create_subscription(am_topics::CTRL_VX_VEHICLE_CURRENTENU, am::getSensorQoS(1), std::bind(&AMSuper::currentENUCB, this, std::placeholders::_1)); + + system_check_timer_ = am::Node::node->create_wall_timer(am::toDuration(1.0), std::bind(&AMSuper::statusTimerCB, this)); + } ~AMSuper() @@ -353,6 +364,14 @@ class AMSuper } private: + + void statusTimerCB() + { + system_status_->updateInfos(); + system_status_->print(); + } + + /** * process LifeCycleState messages from nodes * diff --git a/src/am_super/system_status_class.cpp b/src/am_super/system_status_class.cpp new file mode 100644 index 0000000..e76b874 --- /dev/null +++ b/src/am_super/system_status_class.cpp @@ -0,0 +1,282 @@ +#include + +namespace am +{ +SystemStatus::SystemStatus() +{ + cpu_cnt_ = getCPUCoresCount(); +} + +SystemStatus::~SystemStatus() +{ + +} + +int SystemStatus::getCPUCoresCount() +{ + std::ifstream file("/proc/stat"); + if (!file.is_open()) { + ROS_ERROR("Unable to open /proc/stat"); + } + + int coreCount = 0; + std::string line; + + while (std::getline(file, line)) + { + if (line.compare(0, 3, "cpu") == 0 && line[3] >= '0' && line[3] <= '9') + { + coreCount++; + } + } + + file.close(); + return coreCount; +} + +am::CpuInfo SystemStatus::parseCpuLine(const std::string& line) +{ + am::CpuInfo info = {0, 0, 0, 0, 0, 0, 0, 0, 0}; + std::istringstream iss(line); + std::string cpuLabel; // e.g., "cpu0", "cpu1", etc. + iss >> cpuLabel >> info.user >> info.nice >> info.system >> info.idle + >> info.iowait >> info.irq >> info.softirq >> info.steal; + + info.total = info.user + info.nice + info.system + info.idle + + info.iowait + info.irq + info.softirq + info.steal; + return info; +} + +void SystemStatus::updateInfos() +{ + getMemoryInfo(); + if(cpu_cnt_ < 0) + { + cpu_cnt_ = getCPUCoresCount(); + if(cpu_cnt_ < 0) + { + ROS_ERROR("Cannot get CPU count"); + return; + } + } + + cpu_infos_.resize(cpu_cnt_); + cpu_infos_old_.resize(cpu_cnt_); + cpu_loads_.resize(cpu_cnt_); + + getCPUInfo(cpu_infos_); + + if(is_first_time_) + { + getCPUInfo(cpu_infos_old_); + is_first_time_ = false; + } + + for(int i = 0; i < cpu_infos_.size(); i++) + { + cpu_loads_[i] = calculateCpuLoad(cpu_infos_[i], cpu_infos_old_[i]); + } + + uptime_seconds_ = getUpTime(); + + cpu_infos_old_ = cpu_infos_; + + getGPUInfo(gpu_infos_); +} + + +am::MemoryInfo& SystemStatus::getMemoryInfo() +{ + mi = {0, 0, 0}; + std::ifstream file("/proc/meminfo"); + if (!file.is_open()) { + ROS_ERROR("Error: Unable to open /proc/meminfo"); + return mi; + } + + std::string line; + while (std::getline(file, line)) { + std::istringstream iss(line); + std::string key; + unsigned long value; + std::string unit; + + iss >> key >> value >> unit; + + if (key == "MemTotal:") { + mi.total = value; // in kB + } else if (key == "MemFree:") { + mi.free = value; // in kB + } else if (key == "Buffers:" || key == "Cached:") { + mi.used += value; // Add buffers and cached to used + }else if(key == "MemAvailable:"){ + mi.available = value; + } + + } + + // Calculate used memory + mi.used = mi.total - mi.free; + file.close(); + return mi; +} + +void SystemStatus::getGPUInfo(std::vector &gpu_infos) +{ + gpu_infos.clear(); + // Execute the nvidia-smi command and read the output directly + const std::string command = "nvidia-smi --query-gpu=name,utilization.gpu,temperature.gpu,memory.used,memory.free --format=csv,nounits,noheader"; + FILE* pipe = popen(command.c_str(), "r"); + if (!pipe) + { + ROS_ERROR("Error: Unable to execute nvidia-smi. Ensure it's installed and available in PATH."); + return; + } + + char buffer[128]; + std::ostringstream result; + while (fgets(buffer, sizeof(buffer), pipe) != nullptr) { + result << buffer; + } + pclose(pipe); + + // Parse the command output + std::istringstream iss(result.str()); + std::string line; + while (std::getline(iss, line)) + { + + ROS_INFO(GREEN "%s" COLOR_RESET, line.c_str()); + std::istringstream lineStream(line); + am::GpuInfo gpu_info; + // Parse memory used and free values + std::string gpuName; + int gpuUtilization, gpuTemperature, memoryUsed, memoryFree; + + // Using ',' to split the values + std::getline(lineStream, gpuName, ','); // Get the GPU name + + // Extracting the other values (removing leading/trailing spaces) + lineStream >> gpuUtilization; + lineStream.ignore(); // Ignore the space after the utilization + lineStream >> gpuTemperature; + lineStream.ignore(); // Ignore the space after the temperature + lineStream >> memoryUsed; + lineStream.ignore(); // Ignore the space after the memory used + lineStream >> memoryFree; + + gpu_info.gpu_name = gpuName; + gpu_info.util_percent = gpuUtilization; + gpu_info.temp = gpuTemperature; + gpu_info.mem_free = memoryFree; + gpu_info.mem_used = memoryUsed; + + gpu_infos.push_back(gpu_info); + } +} + + +void SystemStatus::getCPUInfo(std::vector &infos) +{ + std::ifstream file("/proc/stat"); + if (!file.is_open()) { + ROS_ERROR("Error: Unable to open /proc/stat"); + return; + } + + std::string line; + int cnt = 0; + while (std::getline(file, line)) + { + if (line.find("cpu") == 0 && line.find("cpu ") != 0) + { + // Only process lines like "cpu0", "cpu1", etc. + infos[cnt] = parseCpuLine(line); + cnt++; + } + } + + file.close(); +} + +am::CpuInfo SystemStatus::getCPUInfo() +{ + if(cpu_cnt_ < 0) + { + ROS_ERROR("Cannot get CPU Core Count: %d", cpu_cnt_); + } + + am::CpuInfo cpu_info = {0, 0, 0, 0, 0, 0, 0, 0, 0}; + std::ifstream file("/proc/stat"); + if (!file.is_open()) { + ROS_ERROR("Error: Unable to open /proc/stat"); + return cpu_info; + } + + std::string line; + if (std::getline(file, line)) { + std::istringstream iss(line); + std::string cpu; + iss >> cpu >> cpu_info.user >> cpu_info.nice >> cpu_info.system + >> cpu_info.idle >> cpu_info.iowait >> cpu_info.irq + >> cpu_info.softirq >> cpu_info.steal; + + // Total CPU time + cpu_info.total = cpu_info.user + cpu_info.nice + cpu_info.system + + cpu_info.idle + cpu_info.iowait + cpu_info.irq + + cpu_info.softirq + cpu_info.steal; + } + + file.close(); + return cpu_info; +} + +double SystemStatus::calculateCpuLoad(const am::CpuInfo &ci, const am::CpuInfo &ci_old) +{ + unsigned long long totalDiff = ci.total - ci_old.total; + unsigned long long idleDiff = (ci.idle + ci.iowait) - (ci_old.idle + ci_old.iowait); + + cpu_usage_ = (totalDiff - idleDiff) * 100.0 / totalDiff; + + return cpu_usage_; +} + +double SystemStatus::getUpTime() +{ + std::ifstream file("/proc/uptime"); + if (!file.is_open()) { + ROS_INFO("Error: Unable to open /proc/uptime"); + + return 0.0; + } + + double idleSeconds; + file >> uptime_seconds_ >> idleSeconds; + file.close(); + + return uptime_seconds_; +} + +void SystemStatus::print() +{ + ROS_INFO("MemoryInfo---> Total: %ld MB, Free: %ld MB, Used: %ld MB, Available: %ld MB", (mi.total / 1024), (mi.free / 1024), (mi.used / 1024), (mi.available / 1024)); + + std::string msg = ""; + for(int i = 0; i < cpu_loads_.size(); i++) + { + msg += std::string(" Core[") + std::to_string(i) + "] Usage: " + std::to_string(cpu_loads_[i]); + } + + ROS_INFO("CPUInfo---> Cores: %d , %s", cpu_cnt_, msg.c_str()); + + ROS_INFO("UpTime: %f", uptime_seconds_); + + msg = ""; + for(int i = 0; i < gpu_infos_.size(); i++) + { + msg += gpu_infos_[i].gpu_name + ": Temp[C] = " + std::to_string(gpu_infos_[i].temp) + ", Used[%]: " + std::to_string(gpu_infos_[i].util_percent); + } + + ROS_INFO("%s", msg.c_str()); +} +} \ No newline at end of file From 48919bb3fdc95043abd7034d5a0c3919168608d3 Mon Sep 17 00:00:00 2001 From: AJ Date: Mon, 25 Nov 2024 13:52:06 -0800 Subject: [PATCH 02/18] feat: renamed --- CMakeLists.txt | 2 +- ...status_class.h => resource_status_class.h} | 12 ++++---- src/am_super/am_super.cpp | 10 +++---- ...us_class.cpp => resource_status_class.cpp} | 28 +++++++++---------- 4 files changed, 26 insertions(+), 26 deletions(-) rename include/am_super/{system_status_class.h => resource_status_class.h} (86%) rename src/am_super/{system_status_class.cpp => resource_status_class.cpp} (90%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7711119..73ad011 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -52,7 +52,7 @@ file(GLOB SUPER_MEDIATOR_FILES src/am_super/*_mediator.cpp ) -add_executable(am_super ${am_super_cpp_files} src/am_super/system_status_class.cpp) +add_executable(am_super ${am_super_cpp_files} src/am_super/resource_status_class.cpp) target_link_libraries(am_super ${PROCPS_LIBRARY}) ament_target_dependencies(am_super ${dependencies}) diff --git a/include/am_super/system_status_class.h b/include/am_super/resource_status_class.h similarity index 86% rename from include/am_super/system_status_class.h rename to include/am_super/resource_status_class.h index 90034f0..49f09ec 100644 --- a/include/am_super/system_status_class.h +++ b/include/am_super/resource_status_class.h @@ -1,5 +1,5 @@ -#ifndef AM_SUPER_INCLUDE_SYSTEM_STATUS_CLASS_H_ -#define AM_SUPER_INCLUDE_SYSTEM_STATUS_CLASS_H_ +#ifndef AM_SUPER_INCLUDE_RESOURCE_STATUS_CLASS_H_ +#define AM_SUPER_INCLUDE_RESOURCE_STATUS_CLASS_H_ #include #include @@ -43,12 +43,12 @@ struct CpuInfo unsigned long long total; }; -class SystemStatus +class ResourceStatus { public: - SystemStatus(); + ResourceStatus(); - ~SystemStatus(); + ~ResourceStatus(); am::MemoryInfo& getMemoryInfo(); @@ -92,4 +92,4 @@ class SystemStatus }; } -#endif /*AM_SUPER_INCLUDE_SYSTEM_STATUS_CLASS_H_*/ \ No newline at end of file +#endif /*AM_SUPER_INCLUDE_RESOURCE_STATUS_CLASS_H_*/ \ No newline at end of file diff --git a/src/am_super/am_super.cpp b/src/am_super/am_super.cpp index ccc18ec..de59c47 100644 --- a/src/am_super/am_super.cpp +++ b/src/am_super/am_super.cpp @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include @@ -226,7 +226,7 @@ class AMSuper /** The current state of the system. */ SuperNodeMediator::Supervisor supervisor_; - std::shared_ptr system_status_; + std::shared_ptr resource_status_; /** * amount of time in seconds without hearing from a node that will cause it to go offline @@ -247,7 +247,7 @@ class AMSuper { ROS_INFO_STREAM( am::Node::node->get_name()); - system_status_ = std::make_shared(); + resource_status_ = std::make_shared(); life_cycle_node_ = std::static_pointer_cast(am::Node::node); @@ -367,8 +367,8 @@ class AMSuper void statusTimerCB() { - system_status_->updateInfos(); - system_status_->print(); + resource_status_->updateInfos(); + resource_status_->print(); } diff --git a/src/am_super/system_status_class.cpp b/src/am_super/resource_status_class.cpp similarity index 90% rename from src/am_super/system_status_class.cpp rename to src/am_super/resource_status_class.cpp index e76b874..dac57f2 100644 --- a/src/am_super/system_status_class.cpp +++ b/src/am_super/resource_status_class.cpp @@ -1,18 +1,18 @@ -#include +#include namespace am { -SystemStatus::SystemStatus() +ResourceStatus::ResourceStatus() { cpu_cnt_ = getCPUCoresCount(); } -SystemStatus::~SystemStatus() +ResourceStatus::~ResourceStatus() { } -int SystemStatus::getCPUCoresCount() +int ResourceStatus::getCPUCoresCount() { std::ifstream file("/proc/stat"); if (!file.is_open()) { @@ -34,7 +34,7 @@ int SystemStatus::getCPUCoresCount() return coreCount; } -am::CpuInfo SystemStatus::parseCpuLine(const std::string& line) +am::CpuInfo ResourceStatus::parseCpuLine(const std::string& line) { am::CpuInfo info = {0, 0, 0, 0, 0, 0, 0, 0, 0}; std::istringstream iss(line); @@ -47,7 +47,7 @@ am::CpuInfo SystemStatus::parseCpuLine(const std::string& line) return info; } -void SystemStatus::updateInfos() +void ResourceStatus::updateInfos() { getMemoryInfo(); if(cpu_cnt_ < 0) @@ -85,7 +85,7 @@ void SystemStatus::updateInfos() } -am::MemoryInfo& SystemStatus::getMemoryInfo() +am::MemoryInfo& ResourceStatus::getMemoryInfo() { mi = {0, 0, 0}; std::ifstream file("/proc/meminfo"); @@ -121,7 +121,7 @@ am::MemoryInfo& SystemStatus::getMemoryInfo() return mi; } -void SystemStatus::getGPUInfo(std::vector &gpu_infos) +void ResourceStatus::getGPUInfo(std::vector &gpu_infos) { gpu_infos.clear(); // Execute the nvidia-smi command and read the output directly @@ -146,7 +146,7 @@ void SystemStatus::getGPUInfo(std::vector &gpu_infos) while (std::getline(iss, line)) { - ROS_INFO(GREEN "%s" COLOR_RESET, line.c_str()); + //ROS_INFO(GREEN "%s" COLOR_RESET, line.c_str()); std::istringstream lineStream(line); am::GpuInfo gpu_info; // Parse memory used and free values @@ -176,7 +176,7 @@ void SystemStatus::getGPUInfo(std::vector &gpu_infos) } -void SystemStatus::getCPUInfo(std::vector &infos) +void ResourceStatus::getCPUInfo(std::vector &infos) { std::ifstream file("/proc/stat"); if (!file.is_open()) { @@ -199,7 +199,7 @@ void SystemStatus::getCPUInfo(std::vector &infos) file.close(); } -am::CpuInfo SystemStatus::getCPUInfo() +am::CpuInfo ResourceStatus::getCPUInfo() { if(cpu_cnt_ < 0) { @@ -231,7 +231,7 @@ am::CpuInfo SystemStatus::getCPUInfo() return cpu_info; } -double SystemStatus::calculateCpuLoad(const am::CpuInfo &ci, const am::CpuInfo &ci_old) +double ResourceStatus::calculateCpuLoad(const am::CpuInfo &ci, const am::CpuInfo &ci_old) { unsigned long long totalDiff = ci.total - ci_old.total; unsigned long long idleDiff = (ci.idle + ci.iowait) - (ci_old.idle + ci_old.iowait); @@ -241,7 +241,7 @@ double SystemStatus::calculateCpuLoad(const am::CpuInfo &ci, const am::CpuInfo & return cpu_usage_; } -double SystemStatus::getUpTime() +double ResourceStatus::getUpTime() { std::ifstream file("/proc/uptime"); if (!file.is_open()) { @@ -257,7 +257,7 @@ double SystemStatus::getUpTime() return uptime_seconds_; } -void SystemStatus::print() +void ResourceStatus::print() { ROS_INFO("MemoryInfo---> Total: %ld MB, Free: %ld MB, Used: %ld MB, Available: %ld MB", (mi.total / 1024), (mi.free / 1024), (mi.used / 1024), (mi.available / 1024)); From d1357e275e2fda598ce7cee837986553300c0ba2 Mon Sep 17 00:00:00 2001 From: AJ Date: Mon, 25 Nov 2024 13:53:07 -0800 Subject: [PATCH 03/18] feat: update --- include/am_super/transform_status_class.h | 26 ----------------------- 1 file changed, 26 deletions(-) delete mode 100644 include/am_super/transform_status_class.h diff --git a/include/am_super/transform_status_class.h b/include/am_super/transform_status_class.h deleted file mode 100644 index 9493d4f..0000000 --- a/include/am_super/transform_status_class.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef AM_SUPER_INCLUDE_TRANSFORM_STATUS_CLASS_H_ -#define AM_SUPER_INCLUDE_TRANSFORM_STATUS_CLASS_H_ - -#include - -namespace am -{ - -class TransformStatus -{ -public: - - TransformStatus(); - - ~TransformStatus(); - - -private: - rclcpp::TimerBase::SharedPtr check_timer_; - - void checkTimerCB(); -}; - -} - -#endif /*AM_SUPER_INCLUDE_TRANSFORM_STATUS_CLASS_H_*/ From 119118a8ae893ecadc59e8450b5591e26f8015ec Mon Sep 17 00:00:00 2001 From: AJ Date: Mon, 25 Nov 2024 14:36:51 -0800 Subject: [PATCH 04/18] feat: update --- include/am_super/resource_status_class.h | 9 +++++++ src/am_super/resource_status_class.cpp | 34 ++++++++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/include/am_super/resource_status_class.h b/include/am_super/resource_status_class.h index 49f09ec..8d8ac06 100644 --- a/include/am_super/resource_status_class.h +++ b/include/am_super/resource_status_class.h @@ -9,6 +9,8 @@ #include #include #include +#include + namespace am { @@ -89,6 +91,13 @@ class ResourceStatus std::vector cpu_infos_old_; std::vector gpu_infos_; + + + + /*ROS Infrastructure Checking tools*/ + std::shared_ptr transformer_; + rclcpp::TimerBase::SharedPtr timer_; + void timerCB(); }; } diff --git a/src/am_super/resource_status_class.cpp b/src/am_super/resource_status_class.cpp index dac57f2..4d3b1c6 100644 --- a/src/am_super/resource_status_class.cpp +++ b/src/am_super/resource_status_class.cpp @@ -1,9 +1,15 @@ + + #include namespace am { ResourceStatus::ResourceStatus() { + transformer_ = std::make_shared(); + + timer_ = am::Node::node->create_wall_timer(am::toDuration(1.0), std::bind(&ResourceStatus::timerCB, this)); + cpu_cnt_ = getCPUCoresCount(); } @@ -279,4 +285,32 @@ void ResourceStatus::print() ROS_INFO("%s", msg.c_str()); } + + + +void ResourceStatus::timerCB() +{ + rclcpp::node_interfaces::NodeGraphInterface::SharedPtr node_graph = am::Node::node->get_node_graph_interface(); + + std::vector running_nodes = node_graph->get_node_names(); + + std::unordered_map string_count; + + // Count occurrences of each string + for (const std::string& str : running_nodes) + { + string_count[str]++; + } + + // Collect strings that appear more than once + for (const auto& [str, count] : string_count) + { + if (count > 1) + { + ROS_ERROR("Found a duplicate Node: %s", str.c_str()); + } + } + + +} } \ No newline at end of file From fcd1a62ff66dbaa8108c65c6f021a1fca9c8f13a Mon Sep 17 00:00:00 2001 From: AJ Date: Mon, 25 Nov 2024 14:48:42 -0800 Subject: [PATCH 05/18] feat: udpate --- include/am_super/resource_status_class.h | 3 +-- src/am_super/resource_status_class.cpp | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/include/am_super/resource_status_class.h b/include/am_super/resource_status_class.h index 8d8ac06..52d3119 100644 --- a/include/am_super/resource_status_class.h +++ b/include/am_super/resource_status_class.h @@ -92,10 +92,9 @@ class ResourceStatus std::vector gpu_infos_; - - /*ROS Infrastructure Checking tools*/ std::shared_ptr transformer_; + std::vector> transform_list_; rclcpp::TimerBase::SharedPtr timer_; void timerCB(); }; diff --git a/src/am_super/resource_status_class.cpp b/src/am_super/resource_status_class.cpp index 4d3b1c6..c46f0cb 100644 --- a/src/am_super/resource_status_class.cpp +++ b/src/am_super/resource_status_class.cpp @@ -8,6 +8,10 @@ ResourceStatus::ResourceStatus() { transformer_ = std::make_shared(); + transform_list_.push_back(std::make_pair("base_link","Asset_Frame")); + transform_list_.push_back(std::make_pair("base_link","ouster_FLU")); + transform_list_.push_back(std::make_pair("base_link","Asset_ENU")); + timer_ = am::Node::node->create_wall_timer(am::toDuration(1.0), std::bind(&ResourceStatus::timerCB, this)); cpu_cnt_ = getCPUCoresCount(); @@ -311,6 +315,16 @@ void ResourceStatus::timerCB() } } + + //Transform check + for(std::pair &tf_str : transform_list_) + { + geometry_msgs::msg::TransformStamped transform; + if(!transformer_->getTransform(tf_str.first, tf_str.second, transform, 1.0, false)) + { + ROS_ERROR("Transform tree is broken: %s, %s", tf_str.first.c_str(), tf_str.second.c_str()); + } + } } } \ No newline at end of file From 10efa3c344385e1b7d789816e02f122507229cad Mon Sep 17 00:00:00 2001 From: AJ Date: Tue, 26 Nov 2024 07:13:53 -0800 Subject: [PATCH 06/18] feat: code refactored --- CMakeLists.txt | 11 +++- .../resource_monitor/resource_monitor_node.h | 31 +++++++++ .../resource_monitor/resource_monitor_stats.h | 38 +++++++++++ .../resource_status_class.h | 26 ++++++-- src/am_super/am_super.cpp | 14 ---- .../resource_monitor_main.cpp | 28 ++++++++ .../resource_monitor_node.cpp | 65 ++++++++++++++++++ .../resource_status_class.cpp | 66 +++++++++++++++++-- 8 files changed, 253 insertions(+), 26 deletions(-) create mode 100644 include/resource_monitor/resource_monitor_node.h create mode 100644 include/resource_monitor/resource_monitor_stats.h rename include/{am_super => resource_monitor}/resource_status_class.h (67%) create mode 100644 src/resource_monitor/resource_monitor_main.cpp create mode 100644 src/resource_monitor/resource_monitor_node.cpp rename src/{am_super => resource_monitor}/resource_status_class.cpp (82%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 73ad011..1d88066 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -30,7 +30,6 @@ set(dependencies # find dependencies find_package(ament_cmake REQUIRED) -find_library(PROCPS_LIBRARY procps REQUIRED) foreach(Dependency IN ITEMS ${dependencies}) find_package(${Dependency} REQUIRED) @@ -52,16 +51,22 @@ file(GLOB SUPER_MEDIATOR_FILES src/am_super/*_mediator.cpp ) -add_executable(am_super ${am_super_cpp_files} src/am_super/resource_status_class.cpp) -target_link_libraries(am_super ${PROCPS_LIBRARY}) +add_executable(am_super ${am_super_cpp_files}) +target_link_libraries(am_super) ament_target_dependencies(am_super ${dependencies}) +add_executable(resource_monitor src/resource_monitor/resource_status_class.cpp + src/resource_monitor/resource_monitor_main.cpp + src/resource_monitor/resource_monitor_node.cpp) +ament_target_dependencies(resource_monitor ${dependencies}) + install(DIRECTORY include/ DESTINATION include/ ) install(TARGETS am_super + resource_monitor DESTINATION lib/${PROJECT_NAME} ) diff --git a/include/resource_monitor/resource_monitor_node.h b/include/resource_monitor/resource_monitor_node.h new file mode 100644 index 0000000..7dcf914 --- /dev/null +++ b/include/resource_monitor/resource_monitor_node.h @@ -0,0 +1,31 @@ +#ifndef AM_SUPER_INCLUDE_RESOURCE_MONITOR_RESOURCE_MONITOR_NODE_H_ +#define AM_SUPER_INCLUDE_RESOURCE_MONITOR_RESOURCE_MONITOR_NODE_H_ + + +#include + +namespace am +{ +class ResourceMonitorNode : public AMLifeCycle +{ +public: + ResourceMonitorNode(const std::string &node_name); + + ~ResourceMonitorNode(); + + std::shared_ptr resource_status_ = nullptr; + + std::shared_ptr getAMClass(); + + void setAMClass(std::shared_ptr am_class); + + bool configured_ = false; + + // AMLifeCycle overrides + void heartbeatCB() override; + bool onCleanup() override; + bool onConfigure() override; +}; +} + +#endif /*AM_SUPER_INCLUDE_RESOURCE_MONITOR_RESOURCE_MONITOR_NODE_H_*/ \ No newline at end of file diff --git a/include/resource_monitor/resource_monitor_stats.h b/include/resource_monitor/resource_monitor_stats.h new file mode 100644 index 0000000..8dbbff3 --- /dev/null +++ b/include/resource_monitor/resource_monitor_stats.h @@ -0,0 +1,38 @@ +#ifndef AM_LIDAR_BS_AM_LIDAR_BS_STATS_H_ +#define AM_LIDAR_BS_AM_LIDAR_BS_STATS_H_ + +#include +#include +#include +#include +#include + +namespace am +{ + +class ResourceMonitorStats +{ +public: + AMStatStatus statStatus = AMStatStatus("ss", "AMStatStatus"); + + + AMStat tf_stats = AMStat("tf_s", "Transform Stats", 1, 2, 80, 100); + AMStat node_stats = AMStat("n_s", "Nodes Stats", 1, 2, 80, 100); + AMStat cpu_stats = AMStat("cpu_s", "CPU Stats", 1, 2, 80, 100); + AMStat gpu_stats = AMStat("gpu_s", "GPU Stats", 1, 2, 80, 100); + AMStat ram_stats = AMStat("ram_s", "RAM Stats", 1, 2, 80, 100); + + ResourceMonitorStats(AMStatList &stat_list) + { + stat_list.add(&statStatus); + stat_list.add(&tf_stats); + stat_list.add(&node_stats); + stat_list.add(&gpu_stats); + stat_list.add(&cpu_stats); + stat_list.add(&ram_stats); + } +}; + +}; // namespace + +#endif /* AM_LIDAR_BS_AM_LIDAR_BS_STATS_H_ */ diff --git a/include/am_super/resource_status_class.h b/include/resource_monitor/resource_status_class.h similarity index 67% rename from include/am_super/resource_status_class.h rename to include/resource_monitor/resource_status_class.h index 52d3119..0044f61 100644 --- a/include/am_super/resource_status_class.h +++ b/include/resource_monitor/resource_status_class.h @@ -1,5 +1,5 @@ -#ifndef AM_SUPER_INCLUDE_RESOURCE_STATUS_CLASS_H_ -#define AM_SUPER_INCLUDE_RESOURCE_STATUS_CLASS_H_ +#ifndef AM_SUPER_INCLUDE_RESOURCE_MONITOR_RESOURCE_STATUS_CLASS_H_ +#define AM_SUPER_INCLUDE_RESOURCE_MONITOR_RESOURCE_STATUS_CLASS_H_ #include #include @@ -10,6 +10,8 @@ #include #include #include +#include +#include namespace am @@ -21,6 +23,7 @@ struct MemoryInfo unsigned long free; unsigned long used; unsigned long available; + int used_percent; }; struct GpuInfo @@ -48,7 +51,7 @@ struct CpuInfo class ResourceStatus { public: - ResourceStatus(); + ResourceStatus(std::shared_ptr stats); ~ResourceStatus(); @@ -68,8 +71,23 @@ class ResourceStatus void print(); + std::shared_ptr getStats(); + + // AMLifeCycle passthrus + bool onConfigure(); + bool onCleanup(); + void heartbeatCB(); + private: + std::shared_ptr stats_; + + rclcpp::Subscription::SharedPtr status_sub_; + rclcpp::Subscription::SharedPtr stat_sub_; + + void statusCB(const std_msgs::msg::Int32::SharedPtr msg); + void statCB(const std_msgs::msg::Int32::SharedPtr msg); + int getCPUCoresCount(); am::CpuInfo parseCpuLine(const std::string &line); @@ -100,4 +118,4 @@ class ResourceStatus }; } -#endif /*AM_SUPER_INCLUDE_RESOURCE_STATUS_CLASS_H_*/ \ No newline at end of file +#endif /*AM_SUPER_INCLUDE_RESOURCE_MONITOR_RESOURCE_STATUS_CLASS_H_*/ \ No newline at end of file diff --git a/src/am_super/am_super.cpp b/src/am_super/am_super.cpp index de59c47..0c975cb 100644 --- a/src/am_super/am_super.cpp +++ b/src/am_super/am_super.cpp @@ -13,7 +13,6 @@ #include #include #include -#include #include #include @@ -208,8 +207,6 @@ class AMSuper rclcpp::Subscription::SharedPtr diagnostics_sub; rclcpp::Subscription::SharedPtr current_enu_sub; - rclcpp::TimerBase::SharedPtr system_check_timer_; - rclcpp::Subscription::SharedPtr log_control_sub_; BagLogger::BagLoggerLevel log_level_; @@ -226,8 +223,6 @@ class AMSuper /** The current state of the system. */ SuperNodeMediator::Supervisor supervisor_; - std::shared_ptr resource_status_; - /** * amount of time in seconds without hearing from a node that will cause it to go offline */ @@ -247,8 +242,6 @@ class AMSuper { ROS_INFO_STREAM( am::Node::node->get_name()); - resource_status_ = std::make_shared(); - life_cycle_node_ = std::static_pointer_cast(am::Node::node); am::getParam("node_timeout_s", node_timeout_s_, 3.1); @@ -351,7 +344,6 @@ class AMSuper std::bind(&AMSuper::currentENUCB, this, std::placeholders::_1)); - system_check_timer_ = am::Node::node->create_wall_timer(am::toDuration(1.0), std::bind(&AMSuper::statusTimerCB, this)); } @@ -365,12 +357,6 @@ class AMSuper private: - void statusTimerCB() - { - resource_status_->updateInfos(); - resource_status_->print(); - } - /** * process LifeCycleState messages from nodes diff --git a/src/resource_monitor/resource_monitor_main.cpp b/src/resource_monitor/resource_monitor_main.cpp new file mode 100644 index 0000000..9506874 --- /dev/null +++ b/src/resource_monitor/resource_monitor_main.cpp @@ -0,0 +1,28 @@ +#include + +#include +#include + +std::shared_ptr am::Node::node = nullptr; + +int main(int argc, char** argv) +{ + rclcpp::init(argc, argv); + + // create the AMLifeCycle object with stats and assign it to the AMNode singleton + std::shared_ptr am_node = std::make_shared("resource_minitor"); + std::shared_ptr stats = std::make_shared(am_node->stats_list_); + am::Node::node = am_node; + + // create the buisness logic object and give the AMLifecycle class a pointer to it + std::shared_ptr am_class = std::make_shared(stats); + am_node->setAMClass(am_class); + + ROS_INFO_STREAM(am::Node::node->get_name() << ": running..."); + + rclcpp::spin(am::Node::node); + + rclcpp::shutdown(); + + return 0; +} \ No newline at end of file diff --git a/src/resource_monitor/resource_monitor_node.cpp b/src/resource_monitor/resource_monitor_node.cpp new file mode 100644 index 0000000..b56f100 --- /dev/null +++ b/src/resource_monitor/resource_monitor_node.cpp @@ -0,0 +1,65 @@ +#include +#include +#include +#include + +namespace am +{ + +ResourceMonitorNode::ResourceMonitorNode(const std::string & node_name) : AMLifeCycle(node_name) +{ +} + +ResourceMonitorNode::~ResourceMonitorNode() +{ + +} + +void ResourceMonitorNode::setAMClass(std::shared_ptr am_class) +{ + resource_status_= am_class; +} + +std::shared_ptr ResourceMonitorNode::getAMClass() +{ + return resource_status_; +} + +bool ResourceMonitorNode::onConfigure() +{ + if(configured_) + { + return AMLifeCycle::onConfigure(); + } + + ROS_INFO("onConfigure"); + + if(!resource_status_->onConfigure()) + { + ROS_WARN("am_class_->onConfigure() failed"); + resource_status_->onCleanup(); + return false; + } + else + { + configured_ = true; + return AMLifeCycle::onConfigure(); + } +} + +bool ResourceMonitorNode::onCleanup() +{ + ROS_INFO("onCleanup"); + + resource_status_->onCleanup(); + return AMLifeCycle::onCleanup(); +} + +void ResourceMonitorNode::heartbeatCB() +{ + resource_status_->heartbeatCB(); + AMLifeCycle::heartbeatCB(); +} + + +} // namespace \ No newline at end of file diff --git a/src/am_super/resource_status_class.cpp b/src/resource_monitor/resource_status_class.cpp similarity index 82% rename from src/am_super/resource_status_class.cpp rename to src/resource_monitor/resource_status_class.cpp index c46f0cb..6d61547 100644 --- a/src/am_super/resource_status_class.cpp +++ b/src/resource_monitor/resource_status_class.cpp @@ -1,10 +1,10 @@ -#include +#include namespace am { -ResourceStatus::ResourceStatus() +ResourceStatus::ResourceStatus(std::shared_ptr stats) : stats_(stats) { transformer_ = std::make_shared(); @@ -22,6 +22,41 @@ ResourceStatus::~ResourceStatus() } +std::shared_ptr ResourceStatus::getStats() +{ + return stats_; +} + +bool ResourceStatus::onConfigure() +{ + status_sub_ = am::Node::node->create_subscription(std::string(am::Node::node->get_name()) + "/status", 100, std::bind(&ResourceStatus::statusCB, this, std::placeholders::_1)); + stat_sub_ = am::Node::node->create_subscription(std::string(am::Node::node->get_name()) + "/stat", 100, std::bind(&ResourceStatus::statCB, this, std::placeholders::_1)); + + return true; +} + +bool ResourceStatus::onCleanup() +{ + status_sub_.reset(); + stat_sub_.reset(); + return true; +} + +void ResourceStatus::statusCB(const std_msgs::msg::Int32::SharedPtr msg) +{ + stats_->statStatus = msg->data; +} + +void ResourceStatus::statCB(const std_msgs::msg::Int32::SharedPtr msg) +{ + +} + +void ResourceStatus::heartbeatCB() +{ +} + + int ResourceStatus::getCPUCoresCount() { std::ifstream file("/proc/stat"); @@ -81,11 +116,20 @@ void ResourceStatus::updateInfos() getCPUInfo(cpu_infos_old_); is_first_time_ = false; } - + double avg_load = 0.0; for(int i = 0; i < cpu_infos_.size(); i++) { - cpu_loads_[i] = calculateCpuLoad(cpu_infos_[i], cpu_infos_old_[i]); + double load = calculateCpuLoad(cpu_infos_[i], cpu_infos_old_[i]); + avg_load+=load; + cpu_loads_[i] = load; + } + + if(cpu_cnt_ > 0) + { + avg_load = avg_load/cpu_cnt_; + stats_->cpu_stats = (avg_load > 80.0?100:50); } + uptime_seconds_ = getUpTime(); @@ -127,6 +171,8 @@ am::MemoryInfo& ResourceStatus::getMemoryInfo() // Calculate used memory mi.used = mi.total - mi.free; + mi.used_percent = (mi.used / mi.total) * 100; + stats_->ram_stats = (mi.used_percent > 80?100:50); file.close(); return mi; } @@ -180,6 +226,7 @@ void ResourceStatus::getGPUInfo(std::vector &gpu_infos) gpu_info.temp = gpuTemperature; gpu_info.mem_free = memoryFree; gpu_info.mem_used = memoryUsed; + stats_->gpu_stats = (gpu_info.util_percent>90?100:50); gpu_infos.push_back(gpu_info); } @@ -307,24 +354,33 @@ void ResourceStatus::timerCB() } // Collect strings that appear more than once + bool node_check = true; for (const auto& [str, count] : string_count) { if (count > 1) { ROS_ERROR("Found a duplicate Node: %s", str.c_str()); + node_check = false; } } + stats_->node_stats = (node_check?50:100); //Transform check + bool tf_check = true; for(std::pair &tf_str : transform_list_) { geometry_msgs::msg::TransformStamped transform; if(!transformer_->getTransform(tf_str.first, tf_str.second, transform, 1.0, false)) { - ROS_ERROR("Transform tree is broken: %s, %s", tf_str.first.c_str(), tf_str.second.c_str()); + //ROS_ERROR("Transform tree is broken: %s, %s", tf_str.first.c_str(), tf_str.second.c_str()); + tf_check = false; } } + stats_->tf_stats = (tf_check?50:100); + + //Resource Check + updateInfos(); } } \ No newline at end of file From d19ee87ff50bb74576d88a542e5da18e6b14f713 Mon Sep 17 00:00:00 2001 From: AJ Date: Tue, 26 Nov 2024 09:16:45 -0800 Subject: [PATCH 07/18] feat: update --- include/resource_monitor/resource_monitor_stats.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/resource_monitor/resource_monitor_stats.h b/include/resource_monitor/resource_monitor_stats.h index 8dbbff3..74ab0a4 100644 --- a/include/resource_monitor/resource_monitor_stats.h +++ b/include/resource_monitor/resource_monitor_stats.h @@ -16,11 +16,11 @@ class ResourceMonitorStats AMStatStatus statStatus = AMStatStatus("ss", "AMStatStatus"); - AMStat tf_stats = AMStat("tf_s", "Transform Stats", 1, 2, 80, 100); - AMStat node_stats = AMStat("n_s", "Nodes Stats", 1, 2, 80, 100); - AMStat cpu_stats = AMStat("cpu_s", "CPU Stats", 1, 2, 80, 100); - AMStat gpu_stats = AMStat("gpu_s", "GPU Stats", 1, 2, 80, 100); - AMStat ram_stats = AMStat("ram_s", "RAM Stats", 1, 2, 80, 100); + AMStat tf_stats = AMStat("tf_s", "Transform Stats", 1, 2, 80, 99); + AMStat node_stats = AMStat("n_s", "Nodes Stats", 1, 2, 80, 99); + AMStat cpu_stats = AMStat("cpu_s", "CPU Stats", 1, 2, 80, 99); + AMStat gpu_stats = AMStat("gpu_s", "GPU Stats", 1, 2, 80, 99); + AMStat ram_stats = AMStat("ram_s", "RAM Stats", 1, 2, 80, 99); ResourceMonitorStats(AMStatList &stat_list) { From 8e06d57a8c7057b979cb4a72cf6f9bf2e6d9b06e Mon Sep 17 00:00:00 2001 From: AJ Date: Thu, 5 Dec 2024 09:09:09 -0800 Subject: [PATCH 08/18] feat: ip checks --- .../resource_monitor/resource_monitor_stats.h | 10 ++ .../resource_monitor/resource_status_class.h | 4 + .../resource_status_class.cpp | 101 ++++++++++++++++++ 3 files changed, 115 insertions(+) diff --git a/include/resource_monitor/resource_monitor_stats.h b/include/resource_monitor/resource_monitor_stats.h index 74ab0a4..2b2519e 100644 --- a/include/resource_monitor/resource_monitor_stats.h +++ b/include/resource_monitor/resource_monitor_stats.h @@ -21,6 +21,11 @@ class ResourceMonitorStats AMStat cpu_stats = AMStat("cpu_s", "CPU Stats", 1, 2, 80, 99); AMStat gpu_stats = AMStat("gpu_s", "GPU Stats", 1, 2, 80, 99); AMStat ram_stats = AMStat("ram_s", "RAM Stats", 1, 2, 80, 99); + AMStat lidar_ip = AMStat("lidar_ip_s", "Lidar IP Stats", 1, 2, 80, 99); + AMStat fl_ip = AMStat("fl_s", "FL IP Stats", 1, 2, 80, 99); + AMStat fr_ip = AMStat("fr_s", "FR IP Stats", 1, 2, 80, 99); + AMStat rl_ip = AMStat("rl_s", "RL IP Stats", 1, 2, 80, 99); + AMStat rr_ip = AMStat("rr_s", "RR IP Stats", 1, 2, 80, 99); ResourceMonitorStats(AMStatList &stat_list) { @@ -30,6 +35,11 @@ class ResourceMonitorStats stat_list.add(&gpu_stats); stat_list.add(&cpu_stats); stat_list.add(&ram_stats); + stat_list.add(&lidar_ip); + stat_list.add(&fl_ip); + stat_list.add(&fr_ip); + stat_list.add(&rl_ip); + stat_list.add(&rr_ip); } }; diff --git a/include/resource_monitor/resource_status_class.h b/include/resource_monitor/resource_status_class.h index 0044f61..5e74500 100644 --- a/include/resource_monitor/resource_status_class.h +++ b/include/resource_monitor/resource_status_class.h @@ -71,6 +71,8 @@ class ResourceStatus void print(); + bool isReachable(const std::string &ipAddress, int port = 80, int timeoutSec = 1); + std::shared_ptr getStats(); // AMLifeCycle passthrus @@ -110,6 +112,8 @@ class ResourceStatus std::vector gpu_infos_; + std::map ip_addresses_; //IPAddress, Name + /*ROS Infrastructure Checking tools*/ std::shared_ptr transformer_; std::vector> transform_list_; diff --git a/src/resource_monitor/resource_status_class.cpp b/src/resource_monitor/resource_status_class.cpp index 6d61547..e91267f 100644 --- a/src/resource_monitor/resource_status_class.cpp +++ b/src/resource_monitor/resource_status_class.cpp @@ -1,6 +1,11 @@ #include +#include +#include // For memset +#include // For socket functions +#include // For inet_addr and sockaddr_in +#include // For close namespace am { @@ -12,6 +17,15 @@ ResourceStatus::ResourceStatus(std::shared_ptr stats) transform_list_.push_back(std::make_pair("base_link","ouster_FLU")); transform_list_.push_back(std::make_pair("base_link","Asset_ENU")); + ip_addresses_["192.168.1.55"] = std::string("lidar"); + ip_addresses_["192.168.1.10"] = std::string("front_left"); + ip_addresses_["192.168.1.20"] = std::string("front_right"); + ip_addresses_["192.168.1.30"] = std::string("rear_right"); + ip_addresses_["192.168.1.40"] = std::string("rear_left"); + + + + timer_ = am::Node::node->create_wall_timer(am::toDuration(1.0), std::bind(&ResourceStatus::timerCB, this)); cpu_cnt_ = getCPUCoresCount(); @@ -256,6 +270,46 @@ void ResourceStatus::getCPUInfo(std::vector &infos) file.close(); } +bool ResourceStatus::isReachable(const std::string &ip_address, int port, int timeoutSec) +{ + /*std::string command = std::string("ping -c 1 ") + ip_address + std::string(" >/dev/null 2>&1"); + + int result = std::system(command.c_str()); + + return result == 0;*/ + + int sockfd; + struct sockaddr_in serverAddr; + + // Create a socket + sockfd = socket(AF_INET, SOCK_STREAM, 0); + if (sockfd < 0) { + std::cerr << "Error: Cannot create socket\n"; + return false; + } + + // Set socket timeout + struct timeval timeout; + timeout.tv_sec = timeoutSec; + timeout.tv_usec = 0; + setsockopt(sockfd, SOL_SOCKET, SO_RCVTIMEO, (const char*)&timeout, sizeof(timeout)); + setsockopt(sockfd, SOL_SOCKET, SO_SNDTIMEO, (const char*)&timeout, sizeof(timeout)); + + // Set up the server address struct + memset(&serverAddr, 0, sizeof(serverAddr)); + serverAddr.sin_family = AF_INET; + serverAddr.sin_port = htons(port); + serverAddr.sin_addr.s_addr = inet_addr(ip_address.c_str()); + + // Attempt to connect + bool reachable = (connect(sockfd, (struct sockaddr*)&serverAddr, sizeof(serverAddr)) == 0); + + // Close the socket + close(sockfd); + + return reachable; +} + am::CpuInfo ResourceStatus::getCPUInfo() { if(cpu_cnt_ < 0) @@ -350,6 +404,10 @@ void ResourceStatus::timerCB() // Count occurrences of each string for (const std::string& str : running_nodes) { + if(str.find("plugin_name") != std::string::npos) + { + continue; + } string_count[str]++; } @@ -379,6 +437,49 @@ void ResourceStatus::timerCB() } stats_->tf_stats = (tf_check?50:100); + + //IP Address Check + stats_->lidar_ip = 50; + stats_->fl_ip = 50; + stats_->fr_ip = 50; + stats_->rl_ip = 50; + stats_->rr_ip = 50; + + std::map::iterator it = ip_addresses_.begin(); + for(; it != ip_addresses_.end(); ++it) + { + + if(!isReachable(it->first)) + { + //THE DEVICE CANNOT BE REACHED + if(it->second == "lidar") + { + stats_->lidar_ip = 100; + ROS_ERROR("Lidar is not reachable"); + } + if(it->second == "front_left") + { + stats_->fl_ip = 100; + ROS_ERROR("Front Left Camera is not reachable"); + } + if(it->second == "front_right") + { + stats_->fr_ip = 100; + ROS_ERROR("Front Right Camera is not reachable"); + } + if(it->second == "rear_right") + { + stats_->rr_ip = 100; + ROS_ERROR("Rear Right Camera is not reachable"); + } + if(it->second == "rear_left") + { + stats_->rl_ip = 100; + ROS_ERROR("Rear Left Camera is not reachable"); + } + } + } + //Resource Check updateInfos(); From 42d5c3a74559af0329c1384d4d861a3966b95960 Mon Sep 17 00:00:00 2001 From: AJ Date: Thu, 5 Dec 2024 12:26:58 -0800 Subject: [PATCH 09/18] feat: update --- .../resource_monitor/resource_status_class.h | 13 +- .../resource_status_class.cpp | 187 +++++++++++------- 2 files changed, 131 insertions(+), 69 deletions(-) diff --git a/include/resource_monitor/resource_status_class.h b/include/resource_monitor/resource_status_class.h index 5e74500..7b9bc56 100644 --- a/include/resource_monitor/resource_status_class.h +++ b/include/resource_monitor/resource_status_class.h @@ -71,13 +71,19 @@ class ResourceStatus void print(); - bool isReachable(const std::string &ipAddress, int port = 80, int timeoutSec = 1); + bool isReachable(const std::string &ipAddress); + + std::unordered_set getActiveIPs(const std::string& subnet = "192.168.1.0/24"); std::shared_ptr getStats(); + std::vector getInetAddresses(); + // AMLifeCycle passthrus bool onConfigure(); + bool onCleanup(); + void heartbeatCB(); private: @@ -85,9 +91,11 @@ class ResourceStatus std::shared_ptr stats_; rclcpp::Subscription::SharedPtr status_sub_; + rclcpp::Subscription::SharedPtr stat_sub_; void statusCB(const std_msgs::msg::Int32::SharedPtr msg); + void statCB(const std_msgs::msg::Int32::SharedPtr msg); int getCPUCoresCount(); @@ -116,8 +124,11 @@ class ResourceStatus /*ROS Infrastructure Checking tools*/ std::shared_ptr transformer_; + std::vector> transform_list_; + rclcpp::TimerBase::SharedPtr timer_; + void timerCB(); }; } diff --git a/src/resource_monitor/resource_status_class.cpp b/src/resource_monitor/resource_status_class.cpp index e91267f..2ef4db2 100644 --- a/src/resource_monitor/resource_status_class.cpp +++ b/src/resource_monitor/resource_status_class.cpp @@ -2,10 +2,9 @@ #include #include -#include // For memset -#include // For socket functions -#include // For inet_addr and sockaddr_in -#include // For close +#include // For popen and fgets +#include // For std::unique_ptr +#include // For std::regex namespace am { @@ -270,44 +269,13 @@ void ResourceStatus::getCPUInfo(std::vector &infos) file.close(); } -bool ResourceStatus::isReachable(const std::string &ip_address, int port, int timeoutSec) +bool ResourceStatus::isReachable(const std::string &ip_address) { - /*std::string command = std::string("ping -c 1 ") + ip_address + std::string(" >/dev/null 2>&1"); + std::string command = std::string("ping -c 1 ") + ip_address + std::string(" >/dev/null 2>&1"); int result = std::system(command.c_str()); - return result == 0;*/ - - int sockfd; - struct sockaddr_in serverAddr; - - // Create a socket - sockfd = socket(AF_INET, SOCK_STREAM, 0); - if (sockfd < 0) { - std::cerr << "Error: Cannot create socket\n"; - return false; - } - - // Set socket timeout - struct timeval timeout; - timeout.tv_sec = timeoutSec; - timeout.tv_usec = 0; - setsockopt(sockfd, SOL_SOCKET, SO_RCVTIMEO, (const char*)&timeout, sizeof(timeout)); - setsockopt(sockfd, SOL_SOCKET, SO_SNDTIMEO, (const char*)&timeout, sizeof(timeout)); - - // Set up the server address struct - memset(&serverAddr, 0, sizeof(serverAddr)); - serverAddr.sin_family = AF_INET; - serverAddr.sin_port = htons(port); - serverAddr.sin_addr.s_addr = inet_addr(ip_address.c_str()); - - // Attempt to connect - bool reachable = (connect(sockfd, (struct sockaddr*)&serverAddr, sizeof(serverAddr)) == 0); - - // Close the socket - close(sockfd); - - return reachable; + return result == 0; } am::CpuInfo ResourceStatus::getCPUInfo() @@ -438,50 +406,133 @@ void ResourceStatus::timerCB() stats_->tf_stats = (tf_check?50:100); + + //todo: this should be static and checked once or should be passed as argument depending on the architecture: for sim env this is false + bool ips_should_exists = false; + std::vector sub_nets_add = getInetAddresses(); + for(const std::string &ip : sub_nets_add) + { + ROS_INFO("subnet: %s", ip.c_str()); + if(ip == "192.168.1.1") + { + ips_should_exists = true; + } + } + //IP Address Check stats_->lidar_ip = 50; stats_->fl_ip = 50; stats_->fr_ip = 50; stats_->rl_ip = 50; stats_->rr_ip = 50; - - std::map::iterator it = ip_addresses_.begin(); - for(; it != ip_addresses_.end(); ++it) + //Only if you have the subnet + if(ips_should_exists) { - - if(!isReachable(it->first)) + std::unordered_set available_ips = getActiveIPs(); + std::map::iterator it = ip_addresses_.begin(); + for(; it != ip_addresses_.end(); ++it) { //THE DEVICE CANNOT BE REACHED - if(it->second == "lidar") - { - stats_->lidar_ip = 100; - ROS_ERROR("Lidar is not reachable"); - } - if(it->second == "front_left") - { - stats_->fl_ip = 100; - ROS_ERROR("Front Left Camera is not reachable"); - } - if(it->second == "front_right") - { - stats_->fr_ip = 100; - ROS_ERROR("Front Right Camera is not reachable"); - } - if(it->second == "rear_right") - { - stats_->rr_ip = 100; - ROS_ERROR("Rear Right Camera is not reachable"); - } - if(it->second == "rear_left") + if(available_ips.find(it->first) == available_ips.end()) { - stats_->rl_ip = 100; - ROS_ERROR("Rear Left Camera is not reachable"); - } + if(it->second == "lidar") + { + stats_->lidar_ip = 100; + ROS_ERROR("Lidar is not reachable"); + } + if(it->second == "front_left") + { + stats_->fl_ip = 100; + ROS_ERROR("Front Left Camera is not reachable"); + } + if(it->second == "front_right") + { + stats_->fr_ip = 100; + ROS_ERROR("Front Right Camera is not reachable"); + } + if(it->second == "rear_right") + { + stats_->rr_ip = 100; + ROS_ERROR("Rear Right Camera is not reachable"); + } + if(it->second == "rear_left") + { + stats_->rl_ip = 100; + ROS_ERROR("Rear Left Camera is not reachable"); + } + } } } - //Resource Check updateInfos(); } + + +// Function to execute the nmap command and capture the output +std::unordered_set ResourceStatus::getActiveIPs(const std::string& subnet) +{ + std::unordered_set activeIPs; + std::string command = "nmap -sn " + subnet; + + // Open a pipe to execute the command and read its output + std::unique_ptr pipe(popen(command.c_str(), "r"), pclose); + if (!pipe) { + std::cerr << "Error: Failed to run nmap command.\n"; + return activeIPs; + } + + // Read the command output line by line + char buffer[128]; + std::string line; + while (fgets(buffer, sizeof(buffer), pipe.get()) != nullptr) + { + line = buffer; + // Check if the line contains "Nmap scan report for", indicating a live IP + if (line.find("Nmap scan report for") != std::string::npos) + { + std::string ip = line.substr(line.find_last_of(' ') + 1); + ip.erase(ip.find('\n')); // Remove the newline character + activeIPs.insert(ip); + } + } + + return activeIPs; +} + + +// Function to execute ifconfig and extract inet addresses +std::vector ResourceStatus::getInetAddresses() +{ + std::vector inetAddresses; + std::string command = "ifconfig"; + std::unique_ptr pipe(popen(command.c_str(), "r"), pclose); + + if (!pipe) { + std::cerr << "Error: Failed to run ifconfig command.\n"; + return inetAddresses; + } + + char buffer[256]; + std::string output; + + // Read the entire output of ifconfig + while (fgets(buffer, sizeof(buffer), pipe.get()) != nullptr) { + output += buffer; + } + + // Regular expression to match inet (IPv4) addresses + std::regex inetRegex(R"(inet\s+(\d+\.\d+\.\d+\.\d+))"); + std::smatch match; + + // Search for inet addresses in the output + auto begin = output.cbegin(); + auto end = output.cend(); + while (std::regex_search(begin, end, match, inetRegex)) { + inetAddresses.push_back(match[1]); + begin = match.suffix().first; // Move to the next match + } + + return inetAddresses; +} } \ No newline at end of file From 3520e7c95f7f7fc313787d971888d8614f44298d Mon Sep 17 00:00:00 2001 From: AJ Date: Thu, 5 Dec 2024 13:46:31 -0800 Subject: [PATCH 10/18] feat: update --- .../resource_monitor/resource_status_class.h | 8 ++ .../resource_status_class.cpp | 96 ++++++++++++++----- 2 files changed, 81 insertions(+), 23 deletions(-) diff --git a/include/resource_monitor/resource_status_class.h b/include/resource_monitor/resource_status_class.h index 7b9bc56..5a77e4e 100644 --- a/include/resource_monitor/resource_status_class.h +++ b/include/resource_monitor/resource_status_class.h @@ -73,6 +73,8 @@ class ResourceStatus bool isReachable(const std::string &ipAddress); + void getParams(); + std::unordered_set getActiveIPs(const std::string& subnet = "192.168.1.0/24"); std::shared_ptr getStats(); @@ -130,6 +132,12 @@ class ResourceStatus rclcpp::TimerBase::SharedPtr timer_; void timerCB(); + + void checkNodeNames(); + + void checkTransforms(); + + void checkSensorIPs(); }; } diff --git a/src/resource_monitor/resource_status_class.cpp b/src/resource_monitor/resource_status_class.cpp index 2ef4db2..63acecd 100644 --- a/src/resource_monitor/resource_status_class.cpp +++ b/src/resource_monitor/resource_status_class.cpp @@ -12,18 +12,7 @@ ResourceStatus::ResourceStatus(std::shared_ptr stats) { transformer_ = std::make_shared(); - transform_list_.push_back(std::make_pair("base_link","Asset_Frame")); - transform_list_.push_back(std::make_pair("base_link","ouster_FLU")); - transform_list_.push_back(std::make_pair("base_link","Asset_ENU")); - - ip_addresses_["192.168.1.55"] = std::string("lidar"); - ip_addresses_["192.168.1.10"] = std::string("front_left"); - ip_addresses_["192.168.1.20"] = std::string("front_right"); - ip_addresses_["192.168.1.30"] = std::string("rear_right"); - ip_addresses_["192.168.1.40"] = std::string("rear_left"); - - - + getParams(); timer_ = am::Node::node->create_wall_timer(am::toDuration(1.0), std::bind(&ResourceStatus::timerCB, this)); @@ -35,6 +24,50 @@ ResourceStatus::~ResourceStatus() } +void ResourceStatus::getParams() +{ + + //getting the ip sensor parameters + int counter = 0; + am::getParam("ip_sensor_cnt", counter, counter); + for(int i = 0; i < counter; i++) + { + std::string ip_check_str = "ip_sensor_" + std::to_string(i); + std::string ip_address = ""; + std::string sensor_name = ""; + am::getParam(ip_check_str + std::string(".ip_address") , ip_address, ip_address); + am::getParam(ip_check_str + std::string(".name") , sensor_name, sensor_name); + if(ip_address == "" || sensor_name == "") + { + ROS_ERROR("ip sensor %d has configuration issues: ip: %s and name: %s", i, ip_address.c_str(), sensor_name.c_str()); + continue; + } + + ip_addresses_[ip_address] = sensor_name; + ROS_INFO(GREEN "IP Sensor[%s] is configured as %s" COLOR_RESET, ip_address.c_str(), sensor_name.c_str()); + } + + //getting the transform list + counter = 0; + am::getParam("transform_cnt", counter, counter); + for(int i = 0; i < counter; i++) + { + std::string transform_str = "transform_" + std::to_string(i); + std::string src = ""; + std::string target = ""; + am::getParam(transform_str + std::string(".source") , src, src); + am::getParam(transform_str + std::string(".target") , target, target); + + if(src == "" || target == "") + { + ROS_ERROR("transform %d has configuration issues: source: %s and target: %s", i, src.c_str(), target.c_str()); + continue; + } + transform_list_.push_back(std::make_pair(src, target)); + ROS_INFO(GREEN "Transform check is set between source %s and target %s" COLOR_RESET, src.c_str(), target.c_str()); + } +} + std::shared_ptr ResourceStatus::getStats() { return stats_; @@ -43,6 +76,7 @@ std::shared_ptr ResourceStatus::getStats() bool ResourceStatus::onConfigure() { status_sub_ = am::Node::node->create_subscription(std::string(am::Node::node->get_name()) + "/status", 100, std::bind(&ResourceStatus::statusCB, this, std::placeholders::_1)); + stat_sub_ = am::Node::node->create_subscription(std::string(am::Node::node->get_name()) + "/stat", 100, std::bind(&ResourceStatus::statCB, this, std::placeholders::_1)); return true; @@ -359,9 +393,7 @@ void ResourceStatus::print() ROS_INFO("%s", msg.c_str()); } - - -void ResourceStatus::timerCB() +void ResourceStatus::checkNodeNames() { rclcpp::node_interfaces::NodeGraphInterface::SharedPtr node_graph = am::Node::node->get_node_graph_interface(); @@ -390,9 +422,10 @@ void ResourceStatus::timerCB() } } stats_->node_stats = (node_check?50:100); +} - - //Transform check +void ResourceStatus::checkTransforms() +{ bool tf_check = true; for(std::pair &tf_str : transform_list_) { @@ -404,15 +437,16 @@ void ResourceStatus::timerCB() } } stats_->tf_stats = (tf_check?50:100); +} - - +void ResourceStatus::checkSensorIPs() +{ //todo: this should be static and checked once or should be passed as argument depending on the architecture: for sim env this is false bool ips_should_exists = false; std::vector sub_nets_add = getInetAddresses(); for(const std::string &ip : sub_nets_add) { - ROS_INFO("subnet: %s", ip.c_str()); + //ROS_INFO("subnet: %s", ip.c_str()); if(ip == "192.168.1.1") { ips_should_exists = true; @@ -463,9 +497,6 @@ void ResourceStatus::timerCB() } } } - //Resource Check - updateInfos(); - } @@ -535,4 +566,23 @@ std::vector ResourceStatus::getInetAddresses() return inetAddresses; } + +/* + Timer Callback: this is where everything is updated + */ +void ResourceStatus::timerCB() +{ + //Checking the repeated node name + checkNodeNames(); + + //Transform check + checkTransforms(); + + //sensor ip check + checkSensorIPs(); + + //Resource Check + updateInfos(); + +} } \ No newline at end of file From cbf706a9db5cd2bd19da8cb521710459aaefb070 Mon Sep 17 00:00:00 2001 From: AJ Date: Thu, 5 Dec 2024 14:50:42 -0800 Subject: [PATCH 11/18] feat: update --- src/resource_monitor/resource_monitor_main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/resource_monitor/resource_monitor_main.cpp b/src/resource_monitor/resource_monitor_main.cpp index 9506874..81a4f72 100644 --- a/src/resource_monitor/resource_monitor_main.cpp +++ b/src/resource_monitor/resource_monitor_main.cpp @@ -10,7 +10,7 @@ int main(int argc, char** argv) rclcpp::init(argc, argv); // create the AMLifeCycle object with stats and assign it to the AMNode singleton - std::shared_ptr am_node = std::make_shared("resource_minitor"); + std::shared_ptr am_node = std::make_shared("resource_monitor"); std::shared_ptr stats = std::make_shared(am_node->stats_list_); am::Node::node = am_node; From 4e8d59c50f88f764549e1cbc2280cf4e79e702ad Mon Sep 17 00:00:00 2001 From: AJ Date: Fri, 6 Dec 2024 10:23:49 -0800 Subject: [PATCH 12/18] feat: update --- .../resource_monitor/resource_monitor_stats.h | 2 + .../resource_monitor/resource_status_class.h | 21 +++- .../resource_status_class.cpp | 117 +++++++++++++++--- 3 files changed, 118 insertions(+), 22 deletions(-) diff --git a/include/resource_monitor/resource_monitor_stats.h b/include/resource_monitor/resource_monitor_stats.h index 2b2519e..45885f6 100644 --- a/include/resource_monitor/resource_monitor_stats.h +++ b/include/resource_monitor/resource_monitor_stats.h @@ -21,6 +21,7 @@ class ResourceMonitorStats AMStat cpu_stats = AMStat("cpu_s", "CPU Stats", 1, 2, 80, 99); AMStat gpu_stats = AMStat("gpu_s", "GPU Stats", 1, 2, 80, 99); AMStat ram_stats = AMStat("ram_s", "RAM Stats", 1, 2, 80, 99); + AMStat drive_stats = AMStat("drive_s", "Drive Stats", 1, 2, 80, 99); AMStat lidar_ip = AMStat("lidar_ip_s", "Lidar IP Stats", 1, 2, 80, 99); AMStat fl_ip = AMStat("fl_s", "FL IP Stats", 1, 2, 80, 99); AMStat fr_ip = AMStat("fr_s", "FR IP Stats", 1, 2, 80, 99); @@ -35,6 +36,7 @@ class ResourceMonitorStats stat_list.add(&gpu_stats); stat_list.add(&cpu_stats); stat_list.add(&ram_stats); + stat_list.add(&drive_stats); stat_list.add(&lidar_ip); stat_list.add(&fl_ip); stat_list.add(&fr_ip); diff --git a/include/resource_monitor/resource_status_class.h b/include/resource_monitor/resource_status_class.h index 5a77e4e..479f221 100644 --- a/include/resource_monitor/resource_status_class.h +++ b/include/resource_monitor/resource_status_class.h @@ -12,6 +12,8 @@ #include #include #include +#include // For statvfs +#include // For std::setprecision namespace am @@ -30,9 +32,7 @@ struct GpuInfo { std::string gpu_name; int temp; - int mem_used; - int mem_free; - int util_percent; + int load_percent; }; struct CpuInfo @@ -48,6 +48,13 @@ struct CpuInfo unsigned long long total; }; +struct DiskInfo { + unsigned long long totalSpace; // Total space in bytes + unsigned long long availableSpace; // Available space in bytes (matches `df`) + unsigned long long usedSpace; // Used space in bytes + double percentUsed; // Percentage used +}; + class ResourceStatus { public: @@ -59,10 +66,12 @@ class ResourceStatus am::CpuInfo getCPUInfo(); - void getGPUInfo(std::vector &gpu_infos); + am::GpuInfo getGPUInfo(); void getCPUInfo(std::vector &infos); + DiskInfo getDiskInfo(const std::string& path = "/"); + double calculateCpuLoad(const am::CpuInfo &ci, const am::CpuInfo &ci_old); double getUpTime(); @@ -102,6 +111,8 @@ class ResourceStatus int getCPUCoresCount(); + std::string readFile(const std::string& path); + am::CpuInfo parseCpuLine(const std::string &line); int cpu_cnt_= -1; @@ -120,7 +131,7 @@ class ResourceStatus std::vector cpu_infos_old_; - std::vector gpu_infos_; + am::GpuInfo gpu_info_; std::map ip_addresses_; //IPAddress, Name diff --git a/src/resource_monitor/resource_status_class.cpp b/src/resource_monitor/resource_status_class.cpp index 63acecd..36740d3 100644 --- a/src/resource_monitor/resource_status_class.cpp +++ b/src/resource_monitor/resource_status_class.cpp @@ -5,6 +5,7 @@ #include // For popen and fgets #include // For std::unique_ptr #include // For std::regex +#include namespace am { @@ -182,7 +183,26 @@ void ResourceStatus::updateInfos() cpu_infos_old_ = cpu_infos_; - getGPUInfo(gpu_infos_); + gpu_info_ = getGPUInfo(); + stats_->gpu_stats = 50; + ROS_INFO("GPU Load Percent: %d , Temp: %d", gpu_info_.load_percent, gpu_info_.temp); + if(gpu_info_.load_percent > 90) + { + stats_->gpu_stats = 100; + ROS_ERROR("GPU Issues: LOAD Percent: %d, Temp: %d", gpu_info_.load_percent, gpu_info_.temp); + } + + + //check the drive stats + stats_->drive_stats = 50; + am::DiskInfo disk_info = getDiskInfo(); + if(disk_info.percentUsed > 98.0) + { + stats_->drive_stats = 100; + int coef = 1024*1024; + ROS_ERROR("Disk total: %lld MB, available: %lld MB, used: %lld MB, percentage: %f", (disk_info.totalSpace/coef), (disk_info.availableSpace/coef), (disk_info.usedSpace/coef), disk_info.percentUsed); + } + } @@ -224,21 +244,69 @@ am::MemoryInfo& ResourceStatus::getMemoryInfo() return mi; } -void ResourceStatus::getGPUInfo(std::vector &gpu_infos) +// Function to read the content of a file +std::string ResourceStatus::readFile(const std::string& path) { - gpu_infos.clear(); + std::ifstream file(path); + if (!file.is_open()) { + throw std::runtime_error("Error: Unable to open file " + path); + } + + std::string content; + std::getline(file, content); + file.close(); + return content; +} + +am::GpuInfo ResourceStatus::getGPUInfo() +{ + am::GpuInfo gpu_info; + + // "/sys/devices/gpu.0/load" exists only in Jetpack + //in contrast, nvidia-smi only exists in amd64 architure + const std::string loadPath = "/sys/devices/gpu.0/load"; + if (boost::filesystem::exists(loadPath)) + { + std::string loadStr = readFile(loadPath); + gpu_info.load_percent = std::stoi(loadStr) / 1000; // Convert to percentage + + const std::string baseThermalPath = "/sys/class/thermal/"; + const std::string typeSuffix = "/type"; + const std::string tempSuffix = "/temp"; + + for (int i = 0; i < 10; ++i) { // Check up to 10 thermal zones + try { + std::string typePath = baseThermalPath + "thermal_zone" + std::to_string(i) + typeSuffix; + std::string type = readFile(typePath); + if (type.find("GPU") != std::string::npos) + { // Look for the GPU thermal zone + std::string tempPath = baseThermalPath + "thermal_zone" + std::to_string(i) + tempSuffix; + std::string tempStr = readFile(tempPath); + gpu_info.temp = std::stoi(tempStr) / 1000; // Convert millidegrees to degrees Celsius + } + } catch (...) { + // Ignore errors and continue checking other zones + } + } + throw std::runtime_error("Error: GPU thermal zone not found."); + + return gpu_info; + } + + // Execute the nvidia-smi command and read the output directly const std::string command = "nvidia-smi --query-gpu=name,utilization.gpu,temperature.gpu,memory.used,memory.free --format=csv,nounits,noheader"; FILE* pipe = popen(command.c_str(), "r"); if (!pipe) { ROS_ERROR("Error: Unable to execute nvidia-smi. Ensure it's installed and available in PATH."); - return; + return gpu_info; } char buffer[128]; std::ostringstream result; - while (fgets(buffer, sizeof(buffer), pipe) != nullptr) { + while (fgets(buffer, sizeof(buffer), pipe) != nullptr) + { result << buffer; } pclose(pipe); @@ -248,10 +316,8 @@ void ResourceStatus::getGPUInfo(std::vector &gpu_infos) std::string line; while (std::getline(iss, line)) { - //ROS_INFO(GREEN "%s" COLOR_RESET, line.c_str()); std::istringstream lineStream(line); - am::GpuInfo gpu_info; // Parse memory used and free values std::string gpuName; int gpuUtilization, gpuTemperature, memoryUsed, memoryFree; @@ -269,14 +335,15 @@ void ResourceStatus::getGPUInfo(std::vector &gpu_infos) lineStream >> memoryFree; gpu_info.gpu_name = gpuName; - gpu_info.util_percent = gpuUtilization; + gpu_info.load_percent = gpuUtilization; gpu_info.temp = gpuTemperature; - gpu_info.mem_free = memoryFree; - gpu_info.mem_used = memoryUsed; - stats_->gpu_stats = (gpu_info.util_percent>90?100:50); - - gpu_infos.push_back(gpu_info); + + return gpu_info; } + + + + return gpu_info; } @@ -385,10 +452,7 @@ void ResourceStatus::print() ROS_INFO("UpTime: %f", uptime_seconds_); msg = ""; - for(int i = 0; i < gpu_infos_.size(); i++) - { - msg += gpu_infos_[i].gpu_name + ": Temp[C] = " + std::to_string(gpu_infos_[i].temp) + ", Used[%]: " + std::to_string(gpu_infos_[i].util_percent); - } + msg += "GPU: Temp[C] = " + std::to_string(gpu_info_.temp) + ", Used[%]: " + std::to_string(gpu_info_.load_percent); ROS_INFO("%s", msg.c_str()); } @@ -567,6 +631,25 @@ std::vector ResourceStatus::getInetAddresses() return inetAddresses; } + +// Function to get disk usage information +DiskInfo ResourceStatus::getDiskInfo(const std::string& path) +{ + struct statvfs stat; + + if (statvfs(path.c_str(), &stat) != 0) { + throw std::runtime_error("Error: Unable to get disk information for " + path); + } + + DiskInfo info; + info.totalSpace = stat.f_blocks * stat.f_frsize; // Total blocks * block size + info.availableSpace = stat.f_bavail * stat.f_frsize; // Available blocks * block size + info.usedSpace = info.totalSpace - info.availableSpace; + info.percentUsed = (info.usedSpace * 100.0) / info.totalSpace; + + return info; +} + /* Timer Callback: this is where everything is updated */ From c5a8c2bfbb5f9bf6e242c29f81ec5b4670fd9133 Mon Sep 17 00:00:00 2001 From: AJ Date: Fri, 6 Dec 2024 10:31:39 -0800 Subject: [PATCH 13/18] feat: update --- src/resource_monitor/resource_status_class.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/resource_monitor/resource_status_class.cpp b/src/resource_monitor/resource_status_class.cpp index 36740d3..ee393de 100644 --- a/src/resource_monitor/resource_status_class.cpp +++ b/src/resource_monitor/resource_status_class.cpp @@ -277,6 +277,7 @@ am::GpuInfo ResourceStatus::getGPUInfo() for (int i = 0; i < 10; ++i) { // Check up to 10 thermal zones try { std::string typePath = baseThermalPath + "thermal_zone" + std::to_string(i) + typeSuffix; + ROS_INFO("Type file: %s", typePath.c_str()); std::string type = readFile(typePath); if (type.find("GPU") != std::string::npos) { // Look for the GPU thermal zone From af0fa9b5b4ba99d552fd902fd1237d56e71e4ad7 Mon Sep 17 00:00:00 2001 From: AJ Date: Fri, 6 Dec 2024 10:35:55 -0800 Subject: [PATCH 14/18] feat: update --- .../resource_status_class.cpp | 32 ++++++++++++------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/src/resource_monitor/resource_status_class.cpp b/src/resource_monitor/resource_status_class.cpp index ee393de..f622c61 100644 --- a/src/resource_monitor/resource_status_class.cpp +++ b/src/resource_monitor/resource_status_class.cpp @@ -274,22 +274,30 @@ am::GpuInfo ResourceStatus::getGPUInfo() const std::string typeSuffix = "/type"; const std::string tempSuffix = "/temp"; - for (int i = 0; i < 10; ++i) { // Check up to 10 thermal zones + bool found_gpu_file = false; + for (int i = 0; i < 10; ++i) + { // Check up to 10 thermal zones try { - std::string typePath = baseThermalPath + "thermal_zone" + std::to_string(i) + typeSuffix; - ROS_INFO("Type file: %s", typePath.c_str()); - std::string type = readFile(typePath); - if (type.find("GPU") != std::string::npos) - { // Look for the GPU thermal zone - std::string tempPath = baseThermalPath + "thermal_zone" + std::to_string(i) + tempSuffix; - std::string tempStr = readFile(tempPath); - gpu_info.temp = std::stoi(tempStr) / 1000; // Convert millidegrees to degrees Celsius - } - } catch (...) { + std::string typePath = baseThermalPath + "thermal_zone" + std::to_string(i) + typeSuffix; + ROS_INFO("Type file: %s", typePath.c_str()); + if(!boost::filesystem::exists(typePath)) + { + continue; + } + std::string type = readFile(typePath); + if (type.find("GPU") != std::string::npos) + { // Look for the GPU thermal zone + found_gpu_file = true; + std::string tempPath = baseThermalPath + "thermal_zone" + std::to_string(i) + tempSuffix; + std::string tempStr = readFile(tempPath); + gpu_info.temp = std::stoi(tempStr) / 1000; // Convert millidegrees to degrees Celsius + break; + } + } catch (...) + { // Ignore errors and continue checking other zones } } - throw std::runtime_error("Error: GPU thermal zone not found."); return gpu_info; } From bdbfa046d4cdb149c3670254aa5e6b30859b06fd Mon Sep 17 00:00:00 2001 From: AJ Date: Fri, 6 Dec 2024 10:54:18 -0800 Subject: [PATCH 15/18] feat: update --- include/resource_monitor/resource_status_class.h | 2 ++ src/resource_monitor/resource_status_class.cpp | 6 ++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/resource_monitor/resource_status_class.h b/include/resource_monitor/resource_status_class.h index 479f221..6f66e6d 100644 --- a/include/resource_monitor/resource_status_class.h +++ b/include/resource_monitor/resource_status_class.h @@ -133,6 +133,8 @@ class ResourceStatus am::GpuInfo gpu_info_; + std::vector sub_nets_add_; + std::map ip_addresses_; //IPAddress, Name /*ROS Infrastructure Checking tools*/ diff --git a/src/resource_monitor/resource_status_class.cpp b/src/resource_monitor/resource_status_class.cpp index f622c61..8ca3eff 100644 --- a/src/resource_monitor/resource_status_class.cpp +++ b/src/resource_monitor/resource_status_class.cpp @@ -13,6 +13,8 @@ ResourceStatus::ResourceStatus(std::shared_ptr stats) { transformer_ = std::make_shared(); + sub_nets_add_ = getInetAddresses(); + getParams(); timer_ = am::Node::node->create_wall_timer(am::toDuration(1.0), std::bind(&ResourceStatus::timerCB, this)); @@ -516,8 +518,8 @@ void ResourceStatus::checkSensorIPs() { //todo: this should be static and checked once or should be passed as argument depending on the architecture: for sim env this is false bool ips_should_exists = false; - std::vector sub_nets_add = getInetAddresses(); - for(const std::string &ip : sub_nets_add) + + for(const std::string &ip : sub_nets_add_) { //ROS_INFO("subnet: %s", ip.c_str()); if(ip == "192.168.1.1") From 282fcb7dacb724b38a6071799d5fa3631e81a0aa Mon Sep 17 00:00:00 2001 From: AJ Date: Fri, 6 Dec 2024 10:55:43 -0800 Subject: [PATCH 16/18] feat: update --- src/resource_monitor/resource_status_class.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/resource_monitor/resource_status_class.cpp b/src/resource_monitor/resource_status_class.cpp index 8ca3eff..b944b44 100644 --- a/src/resource_monitor/resource_status_class.cpp +++ b/src/resource_monitor/resource_status_class.cpp @@ -14,6 +14,7 @@ ResourceStatus::ResourceStatus(std::shared_ptr stats) transformer_ = std::make_shared(); sub_nets_add_ = getInetAddresses(); + getParams(); From 51ded3864088a555ac8d5cf25012e9c09ad88e6c Mon Sep 17 00:00:00 2001 From: AJ Date: Fri, 6 Dec 2024 13:13:36 -0800 Subject: [PATCH 17/18] feat: using the old ping method with better options --- .../resource_monitor/resource_status_class.h | 2 ++ .../resource_status_class.cpp | 35 +++++++++---------- 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/include/resource_monitor/resource_status_class.h b/include/resource_monitor/resource_status_class.h index 6f66e6d..0f24eb6 100644 --- a/include/resource_monitor/resource_status_class.h +++ b/include/resource_monitor/resource_status_class.h @@ -121,6 +121,8 @@ class ResourceStatus double uptime_seconds_; + bool ip_check_ {false}; + bool is_first_time_ {true}; std::vector cpu_loads_; diff --git a/src/resource_monitor/resource_status_class.cpp b/src/resource_monitor/resource_status_class.cpp index b944b44..343cda8 100644 --- a/src/resource_monitor/resource_status_class.cpp +++ b/src/resource_monitor/resource_status_class.cpp @@ -14,8 +14,16 @@ ResourceStatus::ResourceStatus(std::shared_ptr stats) transformer_ = std::make_shared(); sub_nets_add_ = getInetAddresses(); - + for(const std::string &ip : sub_nets_add_) + { + //ROS_INFO("subnet: %s", ip.c_str()); + if(ip == "192.168.1.1") + { + ip_check_ = true; + } + } + getParams(); timer_ = am::Node::node->create_wall_timer(am::toDuration(1.0), std::bind(&ResourceStatus::timerCB, this)); @@ -384,7 +392,7 @@ void ResourceStatus::getCPUInfo(std::vector &infos) bool ResourceStatus::isReachable(const std::string &ip_address) { - std::string command = std::string("ping -c 1 ") + ip_address + std::string(" >/dev/null 2>&1"); + std::string command = std::string("ping -c 1 -W 0.2 ") + ip_address + std::string(" >/dev/null 2>&1"); int result = std::system(command.c_str()); @@ -516,18 +524,7 @@ void ResourceStatus::checkTransforms() } void ResourceStatus::checkSensorIPs() -{ - //todo: this should be static and checked once or should be passed as argument depending on the architecture: for sim env this is false - bool ips_should_exists = false; - - for(const std::string &ip : sub_nets_add_) - { - //ROS_INFO("subnet: %s", ip.c_str()); - if(ip == "192.168.1.1") - { - ips_should_exists = true; - } - } +{ //IP Address Check stats_->lidar_ip = 50; @@ -536,15 +533,15 @@ void ResourceStatus::checkSensorIPs() stats_->rl_ip = 50; stats_->rr_ip = 50; //Only if you have the subnet - if(ips_should_exists) + if(ip_check_) { - std::unordered_set available_ips = getActiveIPs(); std::map::iterator it = ip_addresses_.begin(); for(; it != ip_addresses_.end(); ++it) { - //THE DEVICE CANNOT BE REACHED - if(available_ips.find(it->first) == available_ips.end()) + + if(!isReachable(it->first)) { + //THE DEVICE CANNOT BE REACHED if(it->second == "lidar") { stats_->lidar_ip = 100; @@ -569,7 +566,7 @@ void ResourceStatus::checkSensorIPs() { stats_->rl_ip = 100; ROS_ERROR("Rear Left Camera is not reachable"); - } + } } } } From 6e4373844cde318503e5b7604b5783db239be571 Mon Sep 17 00:00:00 2001 From: AJ Date: Fri, 6 Dec 2024 13:24:01 -0800 Subject: [PATCH 18/18] feat: less trace --- src/resource_monitor/resource_status_class.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/resource_monitor/resource_status_class.cpp b/src/resource_monitor/resource_status_class.cpp index 343cda8..2e94962 100644 --- a/src/resource_monitor/resource_status_class.cpp +++ b/src/resource_monitor/resource_status_class.cpp @@ -21,6 +21,7 @@ ResourceStatus::ResourceStatus(std::shared_ptr stats) if(ip == "192.168.1.1") { ip_check_ = true; + ROS_INFO("Resource Monitor: looking for sensors on 192.168.1.1"); } } @@ -196,7 +197,7 @@ void ResourceStatus::updateInfos() gpu_info_ = getGPUInfo(); stats_->gpu_stats = 50; - ROS_INFO("GPU Load Percent: %d , Temp: %d", gpu_info_.load_percent, gpu_info_.temp); + //ROS_INFO("GPU Load Percent: %d , Temp: %d", gpu_info_.load_percent, gpu_info_.temp); if(gpu_info_.load_percent > 90) { stats_->gpu_stats = 100; @@ -290,7 +291,7 @@ am::GpuInfo ResourceStatus::getGPUInfo() { // Check up to 10 thermal zones try { std::string typePath = baseThermalPath + "thermal_zone" + std::to_string(i) + typeSuffix; - ROS_INFO("Type file: %s", typePath.c_str()); + //ROS_INFO("Type file: %s", typePath.c_str()); if(!boost::filesystem::exists(typePath)) { continue;