Skip to content

Commit bf35a76

Browse files
committed
Add csudo monitor commands for challenge health and logs
- Add monitor subcommands: challenges, validator, logs, health - Add RPC endpoints: monitor_getChallengeHealth, monitor_getChallengeLogs - Display challenge container status with table view
1 parent 39e4724 commit bf35a76

File tree

2 files changed

+396
-1
lines changed

2 files changed

+396
-1
lines changed

bins/csudo/src/main.rs

Lines changed: 304 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,10 @@ enum Commands {
105105
/// Refresh challenges (re-pull images and restart containers on all validators)
106106
#[command(subcommand)]
107107
Refresh(RefreshCommands),
108+
109+
/// Monitor challenges and validators (requires sudo)
110+
#[command(subcommand)]
111+
Monitor(MonitorCommands),
108112
}
109113

110114
#[derive(Subcommand, Debug)]
@@ -223,6 +227,55 @@ enum RefreshCommands {
223227
},
224228
}
225229

230+
#[derive(Subcommand, Debug)]
231+
enum MonitorCommands {
232+
/// Show challenge container status across all validators
233+
Challenges,
234+
/// Show health status of a specific validator's challenges
235+
Validator {
236+
/// Validator hotkey (hex, optional - select from list)
237+
hotkey: Option<String>,
238+
},
239+
/// Get logs from a challenge container
240+
Logs {
241+
/// Challenge name (e.g., "term-challenge")
242+
#[arg(short, long)]
243+
challenge: Option<String>,
244+
/// Number of lines to tail (default: 100)
245+
#[arg(short = 'n', long, default_value = "100")]
246+
lines: u32,
247+
/// Validator RPC URL (optional - use default if not provided)
248+
#[arg(short, long)]
249+
validator_rpc: Option<String>,
250+
},
251+
/// Show overall health status
252+
Health,
253+
}
254+
255+
// ==================== Monitor Data Structures ====================
256+
257+
#[derive(Debug, Clone, serde::Deserialize)]
258+
struct ChallengeContainerStatus {
259+
challenge_id: String,
260+
challenge_name: String,
261+
container_id: Option<String>,
262+
container_name: Option<String>,
263+
status: String,
264+
health: String,
265+
uptime_secs: Option<u64>,
266+
endpoint: Option<String>,
267+
}
268+
269+
#[derive(Debug, Clone, serde::Deserialize)]
270+
struct ValidatorChallengeHealth {
271+
validator_hotkey: String,
272+
validator_ss58: String,
273+
challenges: Vec<ChallengeContainerStatus>,
274+
total_challenges: usize,
275+
healthy_challenges: usize,
276+
unhealthy_challenges: usize,
277+
}
278+
226279
// ==================== State Fetching ====================
227280

228281
#[derive(Debug, Clone, Default)]
@@ -421,6 +474,183 @@ async fn fetch_chain_state(rpc_url: &str) -> Result<ChainStateData> {
421474
Ok(data)
422475
}
423476

477+
// ==================== Monitor Functions ====================
478+
479+
async fn fetch_challenge_health(rpc_url: &str) -> Result<ValidatorChallengeHealth> {
480+
let client = reqwest::Client::new();
481+
let url = format!("{}/rpc", rpc_url.trim_end_matches('/'));
482+
483+
let response = client
484+
.post(&url)
485+
.json(&serde_json::json!({
486+
"jsonrpc": "2.0",
487+
"method": "monitor_getChallengeHealth",
488+
"params": [],
489+
"id": 1
490+
}))
491+
.send()
492+
.await?;
493+
494+
let result: serde_json::Value = response.json().await?;
495+
496+
if let Some(error) = result.get("error") {
497+
anyhow::bail!("RPC Error: {}", error);
498+
}
499+
500+
let health = result
501+
.get("result")
502+
.ok_or_else(|| anyhow::anyhow!("No result"))?;
503+
504+
Ok(serde_json::from_value(health.clone())?)
505+
}
506+
507+
async fn fetch_validator_challenge_health(rpc_url: &str, _hotkey: &str) -> Result<ValidatorChallengeHealth> {
508+
// For now, this just fetches the local validator's health
509+
// In the future, this could query a specific validator via P2P
510+
fetch_challenge_health(rpc_url).await
511+
}
512+
513+
async fn fetch_challenge_logs(rpc_url: &str, challenge_name: &str, lines: u32) -> Result<String> {
514+
let client = reqwest::Client::new();
515+
let url = format!("{}/rpc", rpc_url.trim_end_matches('/'));
516+
517+
let response = client
518+
.post(&url)
519+
.json(&serde_json::json!({
520+
"jsonrpc": "2.0",
521+
"method": "monitor_getChallengeLogs",
522+
"params": {
523+
"challengeName": challenge_name,
524+
"lines": lines
525+
},
526+
"id": 1
527+
}))
528+
.send()
529+
.await?;
530+
531+
let result: serde_json::Value = response.json().await?;
532+
533+
if let Some(error) = result.get("error") {
534+
anyhow::bail!("RPC Error: {}", error);
535+
}
536+
537+
let logs = result
538+
.get("result")
539+
.and_then(|r| r.get("logs"))
540+
.and_then(|l| l.as_str())
541+
.unwrap_or("No logs available");
542+
543+
Ok(logs.to_string())
544+
}
545+
546+
fn display_challenge_health(health: &ValidatorChallengeHealth) {
547+
println!(
548+
" {} {}",
549+
"Validator:".bright_white(),
550+
health.validator_ss58.cyan()
551+
);
552+
println!(
553+
" {} {} / {} healthy",
554+
"Challenges:".bright_white(),
555+
health.healthy_challenges.to_string().green(),
556+
health.total_challenges.to_string().cyan()
557+
);
558+
559+
if health.challenges.is_empty() {
560+
println!("{}", " No challenge containers running.".yellow());
561+
return;
562+
}
563+
564+
let mut table = Table::new();
565+
table
566+
.load_preset(UTF8_FULL)
567+
.set_content_arrangement(ContentArrangement::Dynamic)
568+
.set_header(vec![
569+
Cell::new("Challenge").fg(Color::Cyan),
570+
Cell::new("Container").fg(Color::Cyan),
571+
Cell::new("Status").fg(Color::Cyan),
572+
Cell::new("Health").fg(Color::Cyan),
573+
Cell::new("Uptime").fg(Color::Cyan),
574+
]);
575+
576+
for c in &health.challenges {
577+
let status_color = match c.status.as_str() {
578+
"Running" => Color::Green,
579+
"Starting" => Color::Yellow,
580+
_ => Color::Red,
581+
};
582+
let health_color = match c.health.as_str() {
583+
"Healthy" => Color::Green,
584+
"Starting" => Color::Yellow,
585+
_ => Color::Red,
586+
};
587+
let uptime = c.uptime_secs
588+
.map(|s| format_duration(s))
589+
.unwrap_or_else(|| "-".to_string());
590+
591+
table.add_row(vec![
592+
Cell::new(&c.challenge_name).fg(Color::Green),
593+
Cell::new(c.container_name.as_deref().unwrap_or("-")),
594+
Cell::new(&c.status).fg(status_color),
595+
Cell::new(&c.health).fg(health_color),
596+
Cell::new(uptime),
597+
]);
598+
}
599+
600+
println!("{table}");
601+
}
602+
603+
fn display_validator_health(health: &ValidatorChallengeHealth) {
604+
display_challenge_health(health);
605+
}
606+
607+
fn display_health_summary(health: &ValidatorChallengeHealth) {
608+
let healthy_pct = if health.total_challenges > 0 {
609+
(health.healthy_challenges as f64 / health.total_challenges as f64) * 100.0
610+
} else {
611+
0.0
612+
};
613+
614+
let status_icon = if healthy_pct >= 100.0 {
615+
"✓".green()
616+
} else if healthy_pct >= 50.0 {
617+
"⚠".yellow()
618+
} else {
619+
"✗".red()
620+
};
621+
622+
println!("\n{} Overall Health: {:.0}%", status_icon, healthy_pct);
623+
println!(
624+
" {} Total Challenges: {}",
625+
"📦".to_string(),
626+
health.total_challenges
627+
);
628+
println!(
629+
" {} Healthy: {}",
630+
"✓".green(),
631+
health.healthy_challenges
632+
);
633+
println!(
634+
" {} Unhealthy: {}",
635+
"✗".red(),
636+
health.unhealthy_challenges
637+
);
638+
639+
display_challenge_health(health);
640+
}
641+
642+
fn format_duration(secs: u64) -> String {
643+
if secs < 60 {
644+
format!("{}s", secs)
645+
} else if secs < 3600 {
646+
format!("{}m {}s", secs / 60, secs % 60)
647+
} else if secs < 86400 {
648+
format!("{}h {}m", secs / 3600, (secs % 3600) / 60)
649+
} else {
650+
format!("{}d {}h", secs / 86400, (secs % 86400) / 3600)
651+
}
652+
}
653+
424654
// ==================== Display Functions ====================
425655

426656
fn print_banner() {
@@ -1328,6 +1558,80 @@ async fn main() -> Result<()> {
13281558
}
13291559
}
13301560
}
1561+
1562+
Commands::Monitor(cmd) => {
1563+
match cmd {
1564+
MonitorCommands::Challenges => {
1565+
print_section("Challenge Container Status");
1566+
let health = fetch_challenge_health(&args.rpc).await?;
1567+
display_challenge_health(&health);
1568+
}
1569+
MonitorCommands::Validator { hotkey } => {
1570+
let state = fetch_chain_state(&args.rpc).await?;
1571+
1572+
let selected_hotkey = if let Some(hk) = hotkey {
1573+
hk
1574+
} else {
1575+
if state.validators.is_empty() {
1576+
println!("{}", "No validators registered.".yellow());
1577+
return Ok(());
1578+
}
1579+
1580+
let options: Vec<String> = state
1581+
.validators
1582+
.iter()
1583+
.map(|v| format!("{} ({:.2} TAO)", &v.hotkey[..16], v.stake_tao))
1584+
.collect();
1585+
1586+
let selection = FuzzySelect::with_theme(&ColorfulTheme::default())
1587+
.with_prompt("Select validator to inspect")
1588+
.items(&options)
1589+
.interact()?;
1590+
1591+
state.validators[selection].hotkey.clone()
1592+
};
1593+
1594+
print_section(&format!("Validator: {}", &selected_hotkey[..16]));
1595+
let health = fetch_validator_challenge_health(&args.rpc, &selected_hotkey).await?;
1596+
display_validator_health(&health);
1597+
}
1598+
MonitorCommands::Logs { challenge, lines, validator_rpc } => {
1599+
let state = fetch_chain_state(&args.rpc).await?;
1600+
1601+
let challenge_name = if let Some(name) = challenge {
1602+
name
1603+
} else {
1604+
if state.challenges.is_empty() {
1605+
println!("{}", "No challenges registered.".yellow());
1606+
return Ok(());
1607+
}
1608+
1609+
let options: Vec<String> = state
1610+
.challenges
1611+
.iter()
1612+
.map(|c| c.name.clone())
1613+
.collect();
1614+
1615+
let selection = FuzzySelect::with_theme(&ColorfulTheme::default())
1616+
.with_prompt("Select challenge to view logs")
1617+
.items(&options)
1618+
.interact()?;
1619+
1620+
state.challenges[selection].name.clone()
1621+
};
1622+
1623+
let rpc_url = validator_rpc.as_ref().unwrap_or(&args.rpc);
1624+
print_section(&format!("Logs: {} (last {} lines)", challenge_name, lines));
1625+
let logs = fetch_challenge_logs(rpc_url, &challenge_name, lines).await?;
1626+
println!("{}", logs);
1627+
}
1628+
MonitorCommands::Health => {
1629+
print_section("Network Health Overview");
1630+
let health = fetch_challenge_health(&args.rpc).await?;
1631+
display_health_summary(&health);
1632+
}
1633+
}
1634+
}
13311635
}
13321636

13331637
Ok(())

0 commit comments

Comments
 (0)