diff --git a/include/heidi-kernel/gov_rule.h b/include/heidi-kernel/gov_rule.h index 6e006d3..545c258 100644 --- a/include/heidi-kernel/gov_rule.h +++ b/include/heidi-kernel/gov_rule.h @@ -4,6 +4,7 @@ #include #include #include +#include namespace heidi { namespace gov { @@ -42,6 +43,7 @@ struct CpuPolicy { std::optional nice; std::optional max_pct; std::optional period_us; + std::optional quota_us; }; struct MemPolicy { diff --git a/include/heidi-kernel/group_policy_store.h b/include/heidi-kernel/group_policy_store.h new file mode 100644 index 0000000..c42d5fa --- /dev/null +++ b/include/heidi-kernel/group_policy_store.h @@ -0,0 +1,95 @@ +#pragma once + +#include "heidi-kernel/gov_rule.h" + +#include +#include +#include +#include + +namespace heidi { +namespace gov { + +constexpr size_t kMaxGroups = 256; +constexpr size_t kMaxPidGroupMap = 8192; + +struct GroupPolicy { + std::array group_id{}; + uint64_t last_update_ns = 0; + + std::optional cpu_max_pct; + std::optional cpu_quota_us; + std::optional cpu_period_us; + + std::optional mem_max_bytes; + std::optional mem_high_bytes; + + std::optional pids_max; + + std::optional default_action; + std::optional apply_deadline_ms; + + bool has_any_policy() const; +}; + +class GroupPolicyStore { +public: + enum class EvictReason : uint8_t { + NONE = 0, + GROUP_EVICTED = 1, + PIDMAP_EVICTED = 2, + }; + + struct Stats { + size_t group_count = 0; + size_t pid_group_map_count = 0; + uint64_t group_evictions = 0; + uint64_t pidmap_evictions = 0; + uint64_t attach_failures = 0; + uint64_t cgroup_unavailable_count = 0; + int last_err = 0; + }; + + bool upsert_group(const char* group_id, const GovApplyMsg& msg); + bool map_pid_to_group(int32_t pid, const char* group_id); + const GroupPolicy* get_group(const char* group_id) const; + const char* get_group_for_pid(int32_t pid) const; + Stats get_stats() const; + void clear(); + + void set_time_for_test(uint64_t seq) { + test_seq_ = seq; + } + uint64_t get_time_for_test() const { + return test_seq_; + } + void tick(); + + static constexpr uint64_t kTimeIncrement = 1000000000ULL; + +private: + uint64_t get_time() const; + void evict_oldest_group(); + void evict_oldest_pid_entry(); + + struct GroupEntry { + GroupPolicy policy; + bool in_use = false; + }; + + struct PidEntry { + std::array group_id{}; + uint64_t last_seen_ns = 0; + bool in_use = false; + }; + + GroupEntry groups_[kMaxGroups]; + PidEntry pid_map_[kMaxPidGroupMap]; + size_t group_count_ = 0; + size_t pid_map_count_ = 0; + Stats stats_; + uint64_t test_seq_ = 0; +}; + +} // namespace gov +} // namespace heidi diff --git a/include/heidi-kernel/process_governor.h b/include/heidi-kernel/process_governor.h index 6816700..d95dba7 100644 --- a/include/heidi-kernel/process_governor.h +++ b/include/heidi-kernel/process_governor.h @@ -1,6 +1,8 @@ #pragma once +#include "heidi-kernel/cgroup_driver.h" #include "heidi-kernel/gov_rule.h" +#include "heidi-kernel/group_policy_store.h" #include #include @@ -16,6 +18,36 @@ namespace heidi { namespace gov { +enum class GovEventType : uint8_t { + APPLY_SUCCESS = 0, + APPLY_FAILURE = 1, + PID_EXIT = 2, + PID_EVICTED = 3, + GROUP_EVICTED = 4, + PIDMAP_EVICTED = 5, + CGROUP_UNAVAILABLE = 6, +}; + +constexpr inline const char* gov_event_name(GovEventType e) { + switch (e) { + case GovEventType::APPLY_SUCCESS: + return "APPLY_SUCCESS"; + case GovEventType::APPLY_FAILURE: + return "APPLY_FAILURE"; + case GovEventType::PID_EXIT: + return "PID_EXIT"; + case GovEventType::PID_EVICTED: + return "PID_EVICTED"; + case GovEventType::GROUP_EVICTED: + return "GROUP_EVICTED"; + case GovEventType::PIDMAP_EVICTED: + return "PIDMAP_EVICTED"; + case GovEventType::CGROUP_UNAVAILABLE: + return "CGROUP_UNAVAILABLE"; + } + return "UNKNOWN"; +} + struct ApplyResult { bool success = false; int err = 0; @@ -55,6 +87,9 @@ class ProcessGovernor { size_t tracked_pids = 0; uint64_t pid_exit_events = 0; uint64_t evicted_events = 0; + uint64_t group_evictions = 0; + uint64_t pidmap_evictions = 0; + uint64_t cgroup_unavailable_events = 0; }; Stats get_stats() const; @@ -68,6 +103,8 @@ class ProcessGovernor { void epoll_loop(); ApplyResult apply_rules(int32_t pid, const GovApplyMsg& msg); + ApplyResult apply_group_policy(int32_t pid, const GovApplyMsg& msg); + ApplyResult apply_cgroup_policy(int32_t pid, const GroupPolicy& group_policy); ApplyResult apply_affinity(int32_t pid, const std::string& affinity); ApplyResult apply_nice(int32_t pid, int8_t nice); @@ -98,6 +135,11 @@ class ProcessGovernor { std::function event_callback_; + GroupPolicyStore group_store_; + CgroupDriver cgroup_driver_; + uint64_t last_cgroup_unavailable_ns_ = 0; + static constexpr uint64_t kCgroupUnavailableRateLimitNs = 1000000000ULL; + Stats stats_; }; diff --git a/src/governor/group_policy_store.cpp b/src/governor/group_policy_store.cpp new file mode 100644 index 0000000..64744ce --- /dev/null +++ b/src/governor/group_policy_store.cpp @@ -0,0 +1,217 @@ +#include "heidi-kernel/group_policy_store.h" + +#include +#include + +namespace heidi { +namespace gov { + +namespace { + +uint64_t get_current_time_ns() { + return std::chrono::duration_cast( + std::chrono::steady_clock::now().time_since_epoch()) + .count(); +} + +} // namespace + +uint64_t GroupPolicyStore::get_time() const { + if (test_seq_ != 0) { + return test_seq_; + } + return std::chrono::duration_cast( + std::chrono::steady_clock::now().time_since_epoch()) + .count(); +} + +void GroupPolicyStore::tick() { + if (test_seq_ != 0) { + test_seq_ += kTimeIncrement; + } +} + +bool GroupPolicy::has_any_policy() const { + return cpu_max_pct.has_value() || cpu_quota_us.has_value() || cpu_period_us.has_value() || + mem_max_bytes.has_value() || mem_high_bytes.has_value() || pids_max.has_value() || + default_action.has_value() || apply_deadline_ms.has_value(); +} + +bool GroupPolicyStore::upsert_group(const char* group_id, const GovApplyMsg& msg) { + if (!msg.group) { + return false; + } + + const char* gid = msg.group->c_str(); + size_t gid_len = msg.group->size(); + if (gid_len > kMaxGroupIdLen) { + return false; + } + + for (size_t i = 0; i < kMaxGroups; ++i) { + if (groups_[i].in_use && + std::strncmp(groups_[i].policy.group_id.data(), gid, kMaxGroupIdLen) == 0) { + groups_[i].policy.last_update_ns = get_time(); + + if (msg.cpu) { + if (msg.cpu->max_pct) + groups_[i].policy.cpu_max_pct = msg.cpu->max_pct; + } + if (msg.mem) { + if (msg.mem->max_bytes) + groups_[i].policy.mem_max_bytes = msg.mem->max_bytes; + if (msg.mem->high_bytes) + groups_[i].policy.mem_high_bytes = msg.mem->high_bytes; + } + if (msg.pids) { + if (msg.pids->max) + groups_[i].policy.pids_max = msg.pids->max; + } + if (msg.action) { + groups_[i].policy.default_action = msg.action; + } + if (msg.timeouts && msg.timeouts->apply_deadline_ms) { + groups_[i].policy.apply_deadline_ms = msg.timeouts->apply_deadline_ms; + } + return true; + } + } + + for (size_t i = 0; i < kMaxGroups; ++i) { + if (!groups_[i].in_use) { + groups_[i] = GroupEntry{}; + std::strncpy(groups_[i].policy.group_id.data(), gid, gid_len); + groups_[i].policy.group_id[gid_len] = '\0'; + groups_[i].policy.last_update_ns = get_time(); + groups_[i].in_use = true; + group_count_++; + return true; + } + } + + evict_oldest_group(); + stats_.group_evictions++; + + for (size_t i = 0; i < kMaxGroups; ++i) { + if (!groups_[i].in_use) { + groups_[i] = GroupEntry{}; + std::strncpy(groups_[i].policy.group_id.data(), gid, gid_len); + groups_[i].policy.group_id[gid_len] = '\0'; + groups_[i].policy.last_update_ns = get_time(); + groups_[i].in_use = true; + group_count_++; + return true; + } + } + + return false; +} + +bool GroupPolicyStore::map_pid_to_group(int32_t pid, const char* group_id) { + for (size_t i = 0; i < kMaxPidGroupMap; ++i) { + if (pid_map_[i].in_use && static_cast(i) == pid) { + std::strncpy(pid_map_[i].group_id.data(), group_id, kMaxGroupIdLen); + pid_map_[i].group_id[kMaxGroupIdLen] = '\0'; + pid_map_[i].last_seen_ns = get_time(); + return true; + } + } + + for (size_t i = 0; i < kMaxPidGroupMap; ++i) { + if (!pid_map_[i].in_use) { + pid_map_[i] = PidEntry{}; + std::strncpy(pid_map_[i].group_id.data(), group_id, kMaxGroupIdLen); + pid_map_[i].group_id[kMaxGroupIdLen] = '\0'; + pid_map_[i].last_seen_ns = get_time(); + pid_map_[i].in_use = true; + pid_map_count_++; + return true; + } + } + + evict_oldest_pid_entry(); + stats_.pidmap_evictions++; + + for (size_t i = 0; i < kMaxPidGroupMap; ++i) { + if (!pid_map_[i].in_use) { + pid_map_[i] = PidEntry{}; + std::strncpy(pid_map_[i].group_id.data(), group_id, kMaxGroupIdLen); + pid_map_[i].group_id[kMaxGroupIdLen] = '\0'; + pid_map_[i].last_seen_ns = get_time(); + pid_map_[i].in_use = true; + pid_map_count_++; + return true; + } + } + + return false; +} + +const GroupPolicy* GroupPolicyStore::get_group(const char* group_id) const { + for (size_t i = 0; i < kMaxGroups; ++i) { + if (groups_[i].in_use && + std::strncmp(groups_[i].policy.group_id.data(), group_id, kMaxGroupIdLen) == 0) { + return &groups_[i].policy; + } + } + return nullptr; +} + +const char* GroupPolicyStore::get_group_for_pid(int32_t pid) const { + for (size_t i = 0; i < kMaxPidGroupMap; ++i) { + if (pid_map_[i].in_use && static_cast(i) == pid) { + return pid_map_[i].group_id.data(); + } + } + return nullptr; +} + +GroupPolicyStore::Stats GroupPolicyStore::get_stats() const { + Stats s = stats_; + s.group_count = group_count_; + s.pid_group_map_count = pid_map_count_; + return s; +} + +void GroupPolicyStore::clear() { + for (auto& g : groups_) { + g = GroupEntry{}; + } + for (auto& p : pid_map_) { + p = PidEntry{}; + } + group_count_ = 0; + pid_map_count_ = 0; + stats_ = Stats(); +} + +void GroupPolicyStore::evict_oldest_group() { + uint64_t oldest = UINT64_MAX; + size_t oldest_idx = 0; + for (size_t i = 0; i < kMaxGroups; ++i) { + if (groups_[i].in_use && groups_[i].policy.last_update_ns < oldest) { + oldest = groups_[i].policy.last_update_ns; + oldest_idx = i; + } + } + groups_[oldest_idx].in_use = false; + if (group_count_ > 0) + group_count_--; +} + +void GroupPolicyStore::evict_oldest_pid_entry() { + uint64_t oldest = UINT64_MAX; + size_t oldest_idx = 0; + for (size_t i = 0; i < kMaxPidGroupMap; ++i) { + if (pid_map_[i].in_use && pid_map_[i].last_seen_ns < oldest) { + oldest = pid_map_[i].last_seen_ns; + oldest_idx = i; + } + } + pid_map_[oldest_idx].in_use = false; + if (pid_map_count_ > 0) + pid_map_count_--; +} + +} // namespace gov +} // namespace heidi diff --git a/src/governor/process_governor.cpp b/src/governor/process_governor.cpp index dae0a87..30757c4 100644 --- a/src/governor/process_governor.cpp +++ b/src/governor/process_governor.cpp @@ -205,6 +205,10 @@ ProcessGovernor::Stats ProcessGovernor::get_stats() const { Stats s = stats_; s.rules_count = rules_.size(); s.tracked_pids = rules_.size(); + auto store_stats = group_store_.get_stats(); + s.group_evictions = store_stats.group_evictions; + s.pidmap_evictions = store_stats.pidmap_evictions; + s.cgroup_unavailable_events = store_stats.cgroup_unavailable_count; return s; } @@ -394,9 +398,120 @@ void ProcessGovernor::epoll_loop() { } } +ApplyResult ProcessGovernor::apply_group_policy(int32_t pid, const GovApplyMsg& msg) { + ApplyResult result; + + if (msg.group) { + bool inserted = group_store_.upsert_group(msg.group->c_str(), msg); + if (!inserted) { + result.err = ENOMEM; + result.error_detail = "failed to upsert group policy"; + return result; + } + + auto prev_stats = group_store_.get_stats(); + group_store_.map_pid_to_group(pid, msg.group->c_str()); + auto new_stats = group_store_.get_stats(); + + if (new_stats.group_evictions > prev_stats.group_evictions) { + stats_.group_evictions++; + if (event_callback_) { + GovApplyMsg evict_msg; + evict_msg.pid = pid; + event_callback_(4, evict_msg, 0); + } + } + if (new_stats.pidmap_evictions > prev_stats.pidmap_evictions) { + stats_.pidmap_evictions++; + if (event_callback_) { + GovApplyMsg evict_msg; + evict_msg.pid = pid; + event_callback_(5, evict_msg, 0); + } + } + + const char* group_id = group_store_.get_group_for_pid(pid); + if (group_id) { + const GroupPolicy* group_policy = group_store_.get_group(group_id); + if (group_policy) { + auto r = apply_cgroup_policy(pid, *group_policy); + if (!r.success) { + return r; + } + } + } + } + + result.success = true; + return result; +} + +ApplyResult ProcessGovernor::apply_cgroup_policy(int32_t pid, const GroupPolicy& group_policy) { + ApplyResult result; + + if (!cgroup_driver_.is_available() || !cgroup_driver_.is_enabled()) { + uint64_t now = get_current_time_ns(); + if (now - last_cgroup_unavailable_ns_ > kCgroupUnavailableRateLimitNs) { + last_cgroup_unavailable_ns_ = now; + stats_.cgroup_unavailable_events++; + if (event_callback_) { + GovApplyMsg msg; + msg.pid = pid; + event_callback_(6, msg, 0); + } + } + result.success = true; + return result; + } + + CgroupDriver::ApplyResult cr; + + CpuPolicy cpu; + if (group_policy.cpu_max_pct) { + cpu.max_pct = group_policy.cpu_max_pct; + } + if (group_policy.cpu_quota_us) { + cpu.quota_us = group_policy.cpu_quota_us; + } + if (group_policy.cpu_period_us) { + cpu.period_us = group_policy.cpu_period_us; + } + + MemPolicy mem; + if (group_policy.mem_max_bytes) { + mem.max_bytes = group_policy.mem_max_bytes; + } + if (group_policy.mem_high_bytes) { + mem.high_bytes = group_policy.mem_high_bytes; + } + + PidsPolicy pids; + if (group_policy.pids_max) { + pids.max = group_policy.pids_max; + } + + cr = cgroup_driver_.apply(pid, cpu, mem, pids); + + if (!cr.success) { + result.err = cr.err; + result.error_detail = cr.error_detail; + return result; + } + + result.success = true; + return result; +} + ApplyResult ProcessGovernor::apply_rules(int32_t pid, const GovApplyMsg& msg) { ApplyResult result; + if (msg.group) { + auto r = apply_group_policy(pid, msg); + if (!r.success) { + return r; + } + } + if (msg.cpu) { if (msg.cpu->affinity) { auto r = apply_affinity(pid, *msg.cpu->affinity); diff --git a/tests/test_group_policy_store.cpp b/tests/test_group_policy_store.cpp new file mode 100644 index 0000000..edc5166 --- /dev/null +++ b/tests/test_group_policy_store.cpp @@ -0,0 +1,191 @@ +#include "heidi-kernel/group_policy_store.h" + +#include +#include + +namespace heidi { +namespace gov { + +class GroupPolicyStoreEvictionTest : public ::testing::Test { +protected: + GroupPolicyStore store_; +}; + +TEST_F(GroupPolicyStoreEvictionTest, Insert256GroupsEvictsOldest) { + store_.set_time_for_test(1); + + for (int i = 0; i < 256; ++i) { + GovApplyMsg msg; + msg.group = "group_" + std::to_string(i); + CpuPolicy cpu; + cpu.max_pct = static_cast(i % 100); + msg.cpu = cpu; + bool ok = store_.upsert_group(msg.group->c_str(), msg); + EXPECT_TRUE(ok); + store_.tick(); + } + + auto stats = store_.get_stats(); + EXPECT_EQ(stats.group_count, 256); + + GovApplyMsg msg257; + msg257.group = "group_257"; + CpuPolicy cpu257; + cpu257.max_pct = 50; + msg257.cpu = cpu257; + bool ok = store_.upsert_group(msg257.group->c_str(), msg257); + EXPECT_TRUE(ok); + + stats = store_.get_stats(); + EXPECT_EQ(stats.group_count, 256); + EXPECT_EQ(stats.group_evictions, 1); + + auto* g0 = store_.get_group("group_0"); + EXPECT_EQ(g0, nullptr); + + auto* g257 = store_.get_group("group_257"); + EXPECT_NE(g257, nullptr); +} + +TEST_F(GroupPolicyStoreEvictionTest, TouchGroupUpdatesLastSeen) { + store_.set_time_for_test(1); + + GovApplyMsg msg1; + msg1.group = "group_a"; + CpuPolicy cpu1; + cpu1.max_pct = 10; + msg1.cpu = cpu1; + store_.upsert_group(msg1.group->c_str(), msg1); + store_.tick(); + + GovApplyMsg msg2; + msg2.group = "group_b"; + CpuPolicy cpu2; + cpu2.max_pct = 20; + msg2.cpu = cpu2; + store_.upsert_group(msg2.group->c_str(), msg2); + store_.tick(); + + store_.tick(); + + GovApplyMsg msg1_touch; + msg1_touch.group = "group_a"; + CpuPolicy cpu1_touch; + cpu1_touch.max_pct = 11; + msg1_touch.cpu = cpu1_touch; + store_.upsert_group(msg1_touch.group->c_str(), msg1_touch); + + for (int i = 2; i < 257; ++i) { + GovApplyMsg msg; + msg.group = "group_" + std::to_string(i); + store_.upsert_group(msg.group->c_str(), msg); + store_.tick(); + } + + auto* ga = store_.get_group("group_a"); + EXPECT_NE(ga, nullptr); + + auto* gb = store_.get_group("group_b"); + EXPECT_EQ(gb, nullptr); +} + +TEST_F(GroupPolicyStoreEvictionTest, UpdateExistingGroupDoesNotIncreaseCount) { + store_.set_time_for_test(1); + + GovApplyMsg msg1; + msg1.group = "group_x"; + CpuPolicy cpu1; + cpu1.max_pct = 10; + msg1.cpu = cpu1; + bool ok = store_.upsert_group(msg1.group->c_str(), msg1); + EXPECT_TRUE(ok); + + auto stats = store_.get_stats(); + EXPECT_EQ(stats.group_count, 1); + + store_.tick(); + + GovApplyMsg msg2; + msg2.group = "group_x"; + CpuPolicy cpu2; + cpu2.max_pct = 20; + msg2.cpu = cpu2; + ok = store_.upsert_group(msg2.group->c_str(), msg2); + EXPECT_TRUE(ok); + + stats = store_.get_stats(); + EXPECT_EQ(stats.group_count, 1); + EXPECT_EQ(stats.group_evictions, 0); +} + +TEST_F(GroupPolicyStoreEvictionTest, Insert8192PidsEvictsOldest) { + store_.set_time_for_test(1); + + for (int i = 0; i < 8192; ++i) { + bool ok = store_.map_pid_to_group(i, "group_0"); + EXPECT_TRUE(ok); + store_.tick(); + } + + auto stats = store_.get_stats(); + EXPECT_EQ(stats.pid_group_map_count, 8192); + + bool ok = store_.map_pid_to_group(8192, "group_0"); + EXPECT_TRUE(ok); + + stats = store_.get_stats(); + EXPECT_EQ(stats.pid_group_map_count, 8192); + EXPECT_EQ(stats.pidmap_evictions, 1); + + const char* g0 = store_.get_group_for_pid(0); + EXPECT_EQ(g0, nullptr); + + const char* g8192 = store_.get_group_for_pid(8192); + EXPECT_NE(g8192, nullptr); +} + +TEST_F(GroupPolicyStoreEvictionTest, TouchPidUpdatesLastSeen) { + store_.set_time_for_test(1); + + for (int i = 0; i < 3; ++i) { + bool ok = store_.map_pid_to_group(i, "group_0"); + EXPECT_TRUE(ok); + store_.tick(); + } + + store_.tick(); + + store_.map_pid_to_group(1, "group_0"); + + for (int i = 3; i < 8193; ++i) { + store_.map_pid_to_group(i, "group_0"); + store_.tick(); + } + + const char* g1 = store_.get_group_for_pid(1); + EXPECT_NE(g1, nullptr); + + const char* g0 = store_.get_group_for_pid(0); + EXPECT_EQ(g0, nullptr); +} + +TEST_F(GroupPolicyStoreEvictionTest, PidToGroupMapFullInsertsEvictsOldest) { + store_.set_time_for_test(1); + + for (int i = 0; i < 8192; ++i) { + std::string group = "group_" + std::to_string(i % 10); + store_.map_pid_to_group(i, group.c_str()); + store_.tick(); + } + + auto stats = store_.get_stats(); + EXPECT_EQ(stats.pid_group_map_count, 8192); + + store_.map_pid_to_group(8192, "new_group"); + + stats = store_.get_stats(); + EXPECT_EQ(stats.pid_group_map_count, 8192); +} + +} // namespace gov +} // namespace heidi