From f618201aa1e9a2475b6b1f66e8afa8c482c4957e Mon Sep 17 00:00:00 2001 From: Finn-Andersonn <148964599+Finn-Andersonn@users.noreply.github.com> Date: Wed, 4 Feb 2026 15:44:15 +0000 Subject: [PATCH 1/2] Feat: Multinode gRPC --- cedana/daemon/daemon.proto | 7 +++++ cedana/plugins/multinode/multinode.proto | 39 ++++++++++++++++++++++++ 2 files changed, 46 insertions(+) create mode 100644 cedana/plugins/multinode/multinode.proto diff --git a/cedana/daemon/daemon.proto b/cedana/daemon/daemon.proto index 708db14..6ab8650 100644 --- a/cedana/daemon/daemon.proto +++ b/cedana/daemon/daemon.proto @@ -13,6 +13,7 @@ import "plugins/containerd/containerd.proto"; import "plugins/kata/kata.proto"; import "plugins/runc/runc.proto"; import "plugins/slurm/slurm.proto"; +import "plugins/multinode/multinode.proto"; service Daemon { ///// C/R ///// @@ -66,6 +67,12 @@ service Daemon { rpc ReloadPlugins(Empty) returns (Empty) {} // Create a unix socket rpc CreateUnixSocket(Empty) returns (SocketResp); + // Send IP from cedana-runc to daemon + rpc RegisterRestoredIP(plugins.multinode.IPReportReq) returns (plugins.multinode.IPReportResp) {} + // Helper listens to calls for events from runc + rpc MonitorIPEvents(plugins.multinode.MonitorIPEventsReq) returns (stream plugins.multinode.IPReportReq); + // Helper calls this when it gets the map from RabbitMQ + rpc SubmitGlobalMap(plugins.multinode.GlobalMapReq) returns (plugins.multinode.GlobalMapResp); } //////////////////////// diff --git a/cedana/plugins/multinode/multinode.proto b/cedana/plugins/multinode/multinode.proto new file mode 100644 index 0000000..63eb324 --- /dev/null +++ b/cedana/plugins/multinode/multinode.proto @@ -0,0 +1,39 @@ +syntax = "proto3"; + +package plugins.multinode; + +//////////////////////// +///// IP Exchange ////// +//////////////////////// + +message IPReportReq { + string old_ip = 1; + string new_ip = 2; + string checkpoint_id = 3; +} + +message IPReportResp { + bool success = 1; + string message = 2; +} + +message MonitorIPEventsReq{ + // empty +} + +message GlobalMapEntry { + string original_ip = 1; + string current_ip = 2; + string pod_name = 3; + string namespace = 4; + string job_name = 5; +} + +message GlobalMapReq { + string cluster_id = 1; + repeated GlobalMapEntry entries = 2; +} + +message GlobalMapResp { + bool success = 1; +} From 607b8b8ffe82288788228063b155122d12660e7b Mon Sep 17 00:00:00 2001 From: Finn-Andersonn <148964599+Finn-Andersonn@users.noreply.github.com> Date: Thu, 5 Feb 2026 08:26:47 +0000 Subject: [PATCH 2/2] Chore: add async, process, etc. --- cedana/daemon/daemon.proto | 1 + cedana/daemon/process.proto | 1 + cedana/daemon/query.proto | 16 ++++++++++++---- cedana/plugins/containerd/containerd.proto | 11 +++++++++++ cedana/plugins/k8s/k8s.proto | 10 ++++------ cedana/plugins/runc/runc.proto | 15 +++++++++++++++ criu/criu/criu.proto | 1 + 7 files changed, 45 insertions(+), 10 deletions(-) diff --git a/cedana/daemon/daemon.proto b/cedana/daemon/daemon.proto index 6ab8650..cc0889d 100644 --- a/cedana/daemon/daemon.proto +++ b/cedana/daemon/daemon.proto @@ -96,6 +96,7 @@ message DumpReq { criu.criu_opts Criu = 7; // CRIU overrides for the dump string GPUFreezeType = 8; // type of GPU freeze to use, e.g., IPC, NCCL DumpAction Action = 9; // action to perform: dump, freeze only, unfreeze only + bool Async = 10; // whether to perform the dump asynchronously } message DumpResp { diff --git a/cedana/daemon/process.proto b/cedana/daemon/process.proto index 8b97b99..10b7a1e 100644 --- a/cedana/daemon/process.proto +++ b/cedana/daemon/process.proto @@ -74,4 +74,5 @@ message Mount { string MountPoint = 6; string Options = 7; string FSType = 8; + string Source = 9; } diff --git a/cedana/daemon/query.proto b/cedana/daemon/query.proto index 8577b29..e7d0c68 100644 --- a/cedana/daemon/query.proto +++ b/cedana/daemon/query.proto @@ -2,20 +2,28 @@ syntax = "proto3"; package daemon; +import "daemon/process.proto"; +import "plugins/containerd/containerd.proto"; import "plugins/k8s/k8s.proto"; import "plugins/runc/runc.proto"; message QueryReq { string Type = 1; - optional plugins.runc.QueryReq Runc = 2; - optional plugins.k8s.QueryReq K8s = 3; + bool Tree = 2; // Include child processes in the query + + repeated uint32 PIDs = 100; + optional plugins.runc.QueryReq Runc = 101; + optional plugins.k8s.QueryReq K8s = 102; + optional plugins.containerd.QueryReq Containerd = 103; // Can add more plugin-specific queries here } message QueryResp { repeated string Messages = 1; + repeated ProcessState States = 2; - optional plugins.runc.QueryResp Runc = 2; - optional plugins.k8s.QueryResp K8s = 3; + optional plugins.runc.QueryResp Runc = 101; + optional plugins.k8s.QueryResp K8s = 102; + optional plugins.containerd.QueryResp Containerd = 103; } diff --git a/cedana/plugins/containerd/containerd.proto b/cedana/plugins/containerd/containerd.proto index 609c565..49ccbd4 100644 --- a/cedana/plugins/containerd/containerd.proto +++ b/cedana/plugins/containerd/containerd.proto @@ -15,6 +15,7 @@ message Containerd { repeated string Args = 8; bool NoPivot = 9; repeated string Env = 10; + string Snapshotter = 11; optional plugins.runc.Runc Runc = 101; // Can add additional low-level runtimes here @@ -25,3 +26,13 @@ message Image { string Username = 2; string Secret = 3; } + +message QueryReq { + string Address = 1; + string Namespace = 2; + repeated string IDs = 3; +} + +message QueryResp { + repeated Containerd Containers = 1; +} diff --git a/cedana/plugins/k8s/k8s.proto b/cedana/plugins/k8s/k8s.proto index b2fe905..71e02b9 100644 --- a/cedana/plugins/k8s/k8s.proto +++ b/cedana/plugins/k8s/k8s.proto @@ -9,18 +9,16 @@ message Pod { string Name = 2; string Namespace = 3; string UID = 4; - string Root = 5; - bool RootfsOnly = 6; + bool RootfsOnly = 5; repeated plugins.containerd.Containerd Containerd = 101; // Can add additional high-level runtimes here } message QueryReq { - string Root = 1; - string Namespace = 2; - string ContainerType = 3; // "container" or "sandbox" - repeated string Names = 4; + string Namespace = 1; + string ContainerType = 2; // "container" or "sandbox" + repeated string Names = 3; } message QueryResp { diff --git a/cedana/plugins/runc/runc.proto b/cedana/plugins/runc/runc.proto index f420719..74a7622 100644 --- a/cedana/plugins/runc/runc.proto +++ b/cedana/plugins/runc/runc.proto @@ -16,6 +16,7 @@ message Runc { bool SystemdCgroup = 11; bool NoSubreaper = 12; int32 PreserveFDs = 13; + string NetnsEth0IPv4Addr = 14; } message QueryReq { @@ -26,3 +27,17 @@ message QueryReq { message QueryResp { repeated Runc Containers = 1; } + +message RWFile { + string Path = 1; + uint32 Mode = 2; + uint32 Uid = 3; + uint32 Gid = 4; + string Type = 5; // "file", "dir", "symlink", "chardev", "blockdev" + uint32 DevMajor = 6; + uint32 DevMinor = 7; + map Xattrs = 8; + string SymlinkTarget = 9; + uint64 Mtime = 10; + repeated bytes Content = 11; // repeated for chunking large files to not take up ram +} diff --git a/criu/criu/criu.proto b/criu/criu/criu.proto index a633be8..bc0a4ba 100644 --- a/criu/criu/criu.proto +++ b/criu/criu/criu.proto @@ -148,6 +148,7 @@ message criu_opts { optional bool leave_stopped = 69; optional bool display_stats = 70; optional bool log_to_stderr = 71; + repeated string sk_inet_redirect = 73; /* optional bool check_mounts = 128; */ }