Skip to content

Commit 5bf156f

Browse files
committed
fix: auto-detect validator network for challenge containers
Instead of hardcoding 'platform-network', detect the network the validator container is running on and use that for challenge containers. This fixes the issue where docker-compose creates networks with project prefixes (e.g. 'myproject_platform-network') and challenge containers couldn't communicate with the validator.
1 parent 27071de commit 5bf156f

File tree

3 files changed

+114
-19
lines changed

3 files changed

+114
-19
lines changed

crates/challenge-orchestrator/src/docker.rs

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,116 @@ impl DockerClient {
4848
})
4949
}
5050

51+
/// Connect and auto-detect the network from the validator container
52+
/// This ensures challenge containers are on the same network as the validator
53+
pub async fn connect_auto_detect() -> anyhow::Result<Self> {
54+
let docker = Docker::connect_with_local_defaults()?;
55+
docker.ping().await?;
56+
info!("Connected to Docker daemon");
57+
58+
// Try to detect the network from the current container
59+
let network_name = Self::detect_validator_network_static(&docker).await
60+
.unwrap_or_else(|e| {
61+
warn!("Could not detect validator network: {}. Using default 'platform-network'", e);
62+
"platform-network".to_string()
63+
});
64+
65+
info!(network = %network_name, "Using network for challenge containers");
66+
67+
Ok(Self {
68+
docker,
69+
network_name,
70+
})
71+
}
72+
73+
/// Detect the network the validator container is running on
74+
async fn detect_validator_network_static(docker: &Docker) -> anyhow::Result<String> {
75+
// Get our container ID
76+
let container_id = Self::get_container_id_static()?;
77+
78+
// Inspect our container to find its networks
79+
let inspect = docker.inspect_container(&container_id, None).await?;
80+
81+
let networks = inspect
82+
.network_settings
83+
.as_ref()
84+
.and_then(|ns| ns.networks.as_ref())
85+
.ok_or_else(|| anyhow::anyhow!("No network settings found"))?;
86+
87+
// Find a suitable network (prefer non-default networks)
88+
// Priority: user-defined bridge > any bridge > host
89+
let mut best_network: Option<String> = None;
90+
91+
for (name, _settings) in networks {
92+
// Skip host and none networks
93+
if name == "host" || name == "none" {
94+
continue;
95+
}
96+
// Skip the default bridge network (containers can't communicate by name on it)
97+
if name == "bridge" {
98+
if best_network.is_none() {
99+
best_network = Some(name.clone());
100+
}
101+
continue;
102+
}
103+
// Any other network is preferred (user-defined bridge)
104+
best_network = Some(name.clone());
105+
break;
106+
}
107+
108+
best_network.ok_or_else(|| anyhow::anyhow!("No suitable network found for validator container"))
109+
}
110+
111+
/// Static version of get_self_container_id for use before Self is constructed
112+
fn get_container_id_static() -> anyhow::Result<String> {
113+
// Method 1: Check hostname (Docker sets hostname to container ID by default)
114+
if let Ok(hostname) = std::env::var("HOSTNAME") {
115+
// Docker container IDs are 12+ hex characters
116+
if hostname.len() >= 12 && hostname.chars().all(|c| c.is_ascii_hexdigit()) {
117+
return Ok(hostname);
118+
}
119+
}
120+
121+
// Method 2: Parse from cgroup (works on Linux)
122+
if let Ok(cgroup) = std::fs::read_to_string("/proc/self/cgroup") {
123+
for line in cgroup.lines() {
124+
if let Some(docker_pos) = line.rfind("/docker/") {
125+
let id = &line[docker_pos + 8..];
126+
if id.len() >= 12 {
127+
return Ok(id[..12].to_string());
128+
}
129+
}
130+
if let Some(containerd_pos) = line.rfind("cri-containerd-") {
131+
let id = &line[containerd_pos + 15..];
132+
if id.len() >= 12 {
133+
return Ok(id[..12].to_string());
134+
}
135+
}
136+
}
137+
}
138+
139+
// Method 3: Check mountinfo
140+
if std::path::Path::new("/.dockerenv").exists() {
141+
if let Ok(mountinfo) = std::fs::read_to_string("/proc/self/mountinfo") {
142+
for line in mountinfo.lines() {
143+
if line.contains("/docker/containers/") {
144+
if let Some(start) = line.find("/docker/containers/") {
145+
let rest = &line[start + 19..];
146+
if let Some(end) = rest.find('/') {
147+
let id = &rest[..end];
148+
if id.len() >= 12 {
149+
return Ok(id[..12].to_string());
150+
}
151+
}
152+
}
153+
}
154+
}
155+
}
156+
}
157+
158+
anyhow::bail!("Not running in a Docker container or unable to determine container ID")
159+
}
160+
51161
/// Ensure the Docker network exists
52162
pub async fn ensure_network(&self) -> anyhow::Result<()> {
53163
let networks = self.docker.list_networks::<String>(None).await?;

crates/challenge-orchestrator/src/lib.rs

Lines changed: 4 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -54,27 +54,13 @@ pub const PLATFORM_NETWORK: &str = "platform-network";
5454

5555
impl ChallengeOrchestrator {
5656
pub async fn new(config: OrchestratorConfig) -> anyhow::Result<Self> {
57-
let docker = DockerClient::connect_with_network(PLATFORM_NETWORK).await?;
57+
// Auto-detect the network from the validator container
58+
// This ensures challenge containers are on the same network as the validator
59+
let docker = DockerClient::connect_auto_detect().await?;
5860

59-
// Ensure the Docker network exists
61+
// Ensure the detected network exists (creates it if running outside Docker)
6062
docker.ensure_network().await?;
6163

62-
// Try to connect the current container to the network (if running in Docker)
63-
// This is CRITICAL for communication with challenge containers
64-
match docker.connect_self_to_network().await {
65-
Ok(_) => {
66-
tracing::info!("Validator container connected to {} network", PLATFORM_NETWORK);
67-
}
68-
Err(e) => {
69-
tracing::warn!(
70-
"Could not connect validator to {} network: {}. \
71-
Challenge containers may not be reachable. \
72-
Ensure validator is started with --network {} or add network in docker-compose.",
73-
PLATFORM_NETWORK, e, PLATFORM_NETWORK
74-
);
75-
}
76-
}
77-
7864
let challenges = Arc::new(RwLock::new(HashMap::new()));
7965
let health_monitor = HealthMonitor::new(challenges.clone(), config.health_check_interval);
8066

docker-compose.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,5 +72,4 @@ volumes:
7272

7373
networks:
7474
platform-network:
75-
name: platform-network
7675
driver: bridge

0 commit comments

Comments
 (0)