@@ -105,6 +105,10 @@ enum Commands {
105105 /// Refresh challenges (re-pull images and restart containers on all validators)
106106 #[ command( subcommand) ]
107107 Refresh ( RefreshCommands ) ,
108+
109+ /// Monitor challenges and validators (requires sudo)
110+ #[ command( subcommand) ]
111+ Monitor ( MonitorCommands ) ,
108112}
109113
110114#[ derive( Subcommand , Debug ) ]
@@ -223,6 +227,55 @@ enum RefreshCommands {
223227 } ,
224228}
225229
230+ #[ derive( Subcommand , Debug ) ]
231+ enum MonitorCommands {
232+ /// Show challenge container status across all validators
233+ Challenges ,
234+ /// Show health status of a specific validator's challenges
235+ Validator {
236+ /// Validator hotkey (hex, optional - select from list)
237+ hotkey : Option < String > ,
238+ } ,
239+ /// Get logs from a challenge container
240+ Logs {
241+ /// Challenge name (e.g., "term-challenge")
242+ #[ arg( short, long) ]
243+ challenge : Option < String > ,
244+ /// Number of lines to tail (default: 100)
245+ #[ arg( short = 'n' , long, default_value = "100" ) ]
246+ lines : u32 ,
247+ /// Validator RPC URL (optional - use default if not provided)
248+ #[ arg( short, long) ]
249+ validator_rpc : Option < String > ,
250+ } ,
251+ /// Show overall health status
252+ Health ,
253+ }
254+
255+ // ==================== Monitor Data Structures ====================
256+
257+ #[ derive( Debug , Clone , serde:: Deserialize ) ]
258+ struct ChallengeContainerStatus {
259+ challenge_id : String ,
260+ challenge_name : String ,
261+ container_id : Option < String > ,
262+ container_name : Option < String > ,
263+ status : String ,
264+ health : String ,
265+ uptime_secs : Option < u64 > ,
266+ endpoint : Option < String > ,
267+ }
268+
269+ #[ derive( Debug , Clone , serde:: Deserialize ) ]
270+ struct ValidatorChallengeHealth {
271+ validator_hotkey : String ,
272+ validator_ss58 : String ,
273+ challenges : Vec < ChallengeContainerStatus > ,
274+ total_challenges : usize ,
275+ healthy_challenges : usize ,
276+ unhealthy_challenges : usize ,
277+ }
278+
226279// ==================== State Fetching ====================
227280
228281#[ derive( Debug , Clone , Default ) ]
@@ -421,6 +474,183 @@ async fn fetch_chain_state(rpc_url: &str) -> Result<ChainStateData> {
421474 Ok ( data)
422475}
423476
477+ // ==================== Monitor Functions ====================
478+
479+ async fn fetch_challenge_health ( rpc_url : & str ) -> Result < ValidatorChallengeHealth > {
480+ let client = reqwest:: Client :: new ( ) ;
481+ let url = format ! ( "{}/rpc" , rpc_url. trim_end_matches( '/' ) ) ;
482+
483+ let response = client
484+ . post ( & url)
485+ . json ( & serde_json:: json!( {
486+ "jsonrpc" : "2.0" ,
487+ "method" : "monitor_getChallengeHealth" ,
488+ "params" : [ ] ,
489+ "id" : 1
490+ } ) )
491+ . send ( )
492+ . await ?;
493+
494+ let result: serde_json:: Value = response. json ( ) . await ?;
495+
496+ if let Some ( error) = result. get ( "error" ) {
497+ anyhow:: bail!( "RPC Error: {}" , error) ;
498+ }
499+
500+ let health = result
501+ . get ( "result" )
502+ . ok_or_else ( || anyhow:: anyhow!( "No result" ) ) ?;
503+
504+ Ok ( serde_json:: from_value ( health. clone ( ) ) ?)
505+ }
506+
507+ async fn fetch_validator_challenge_health ( rpc_url : & str , _hotkey : & str ) -> Result < ValidatorChallengeHealth > {
508+ // For now, this just fetches the local validator's health
509+ // In the future, this could query a specific validator via P2P
510+ fetch_challenge_health ( rpc_url) . await
511+ }
512+
513+ async fn fetch_challenge_logs ( rpc_url : & str , challenge_name : & str , lines : u32 ) -> Result < String > {
514+ let client = reqwest:: Client :: new ( ) ;
515+ let url = format ! ( "{}/rpc" , rpc_url. trim_end_matches( '/' ) ) ;
516+
517+ let response = client
518+ . post ( & url)
519+ . json ( & serde_json:: json!( {
520+ "jsonrpc" : "2.0" ,
521+ "method" : "monitor_getChallengeLogs" ,
522+ "params" : {
523+ "challengeName" : challenge_name,
524+ "lines" : lines
525+ } ,
526+ "id" : 1
527+ } ) )
528+ . send ( )
529+ . await ?;
530+
531+ let result: serde_json:: Value = response. json ( ) . await ?;
532+
533+ if let Some ( error) = result. get ( "error" ) {
534+ anyhow:: bail!( "RPC Error: {}" , error) ;
535+ }
536+
537+ let logs = result
538+ . get ( "result" )
539+ . and_then ( |r| r. get ( "logs" ) )
540+ . and_then ( |l| l. as_str ( ) )
541+ . unwrap_or ( "No logs available" ) ;
542+
543+ Ok ( logs. to_string ( ) )
544+ }
545+
546+ fn display_challenge_health ( health : & ValidatorChallengeHealth ) {
547+ println ! (
548+ " {} {}" ,
549+ "Validator:" . bright_white( ) ,
550+ health. validator_ss58. cyan( )
551+ ) ;
552+ println ! (
553+ " {} {} / {} healthy" ,
554+ "Challenges:" . bright_white( ) ,
555+ health. healthy_challenges. to_string( ) . green( ) ,
556+ health. total_challenges. to_string( ) . cyan( )
557+ ) ;
558+
559+ if health. challenges . is_empty ( ) {
560+ println ! ( "{}" , " No challenge containers running." . yellow( ) ) ;
561+ return ;
562+ }
563+
564+ let mut table = Table :: new ( ) ;
565+ table
566+ . load_preset ( UTF8_FULL )
567+ . set_content_arrangement ( ContentArrangement :: Dynamic )
568+ . set_header ( vec ! [
569+ Cell :: new( "Challenge" ) . fg( Color :: Cyan ) ,
570+ Cell :: new( "Container" ) . fg( Color :: Cyan ) ,
571+ Cell :: new( "Status" ) . fg( Color :: Cyan ) ,
572+ Cell :: new( "Health" ) . fg( Color :: Cyan ) ,
573+ Cell :: new( "Uptime" ) . fg( Color :: Cyan ) ,
574+ ] ) ;
575+
576+ for c in & health. challenges {
577+ let status_color = match c. status . as_str ( ) {
578+ "Running" => Color :: Green ,
579+ "Starting" => Color :: Yellow ,
580+ _ => Color :: Red ,
581+ } ;
582+ let health_color = match c. health . as_str ( ) {
583+ "Healthy" => Color :: Green ,
584+ "Starting" => Color :: Yellow ,
585+ _ => Color :: Red ,
586+ } ;
587+ let uptime = c. uptime_secs
588+ . map ( |s| format_duration ( s) )
589+ . unwrap_or_else ( || "-" . to_string ( ) ) ;
590+
591+ table. add_row ( vec ! [
592+ Cell :: new( & c. challenge_name) . fg( Color :: Green ) ,
593+ Cell :: new( c. container_name. as_deref( ) . unwrap_or( "-" ) ) ,
594+ Cell :: new( & c. status) . fg( status_color) ,
595+ Cell :: new( & c. health) . fg( health_color) ,
596+ Cell :: new( uptime) ,
597+ ] ) ;
598+ }
599+
600+ println ! ( "{table}" ) ;
601+ }
602+
603+ fn display_validator_health ( health : & ValidatorChallengeHealth ) {
604+ display_challenge_health ( health) ;
605+ }
606+
607+ fn display_health_summary ( health : & ValidatorChallengeHealth ) {
608+ let healthy_pct = if health. total_challenges > 0 {
609+ ( health. healthy_challenges as f64 / health. total_challenges as f64 ) * 100.0
610+ } else {
611+ 0.0
612+ } ;
613+
614+ let status_icon = if healthy_pct >= 100.0 {
615+ "✓" . green ( )
616+ } else if healthy_pct >= 50.0 {
617+ "⚠" . yellow ( )
618+ } else {
619+ "✗" . red ( )
620+ } ;
621+
622+ println ! ( "\n {} Overall Health: {:.0}%" , status_icon, healthy_pct) ;
623+ println ! (
624+ " {} Total Challenges: {}" ,
625+ "📦" . to_string( ) ,
626+ health. total_challenges
627+ ) ;
628+ println ! (
629+ " {} Healthy: {}" ,
630+ "✓" . green( ) ,
631+ health. healthy_challenges
632+ ) ;
633+ println ! (
634+ " {} Unhealthy: {}" ,
635+ "✗" . red( ) ,
636+ health. unhealthy_challenges
637+ ) ;
638+
639+ display_challenge_health ( health) ;
640+ }
641+
642+ fn format_duration ( secs : u64 ) -> String {
643+ if secs < 60 {
644+ format ! ( "{}s" , secs)
645+ } else if secs < 3600 {
646+ format ! ( "{}m {}s" , secs / 60 , secs % 60 )
647+ } else if secs < 86400 {
648+ format ! ( "{}h {}m" , secs / 3600 , ( secs % 3600 ) / 60 )
649+ } else {
650+ format ! ( "{}d {}h" , secs / 86400 , ( secs % 86400 ) / 3600 )
651+ }
652+ }
653+
424654// ==================== Display Functions ====================
425655
426656fn print_banner ( ) {
@@ -1328,6 +1558,80 @@ async fn main() -> Result<()> {
13281558 }
13291559 }
13301560 }
1561+
1562+ Commands :: Monitor ( cmd) => {
1563+ match cmd {
1564+ MonitorCommands :: Challenges => {
1565+ print_section ( "Challenge Container Status" ) ;
1566+ let health = fetch_challenge_health ( & args. rpc ) . await ?;
1567+ display_challenge_health ( & health) ;
1568+ }
1569+ MonitorCommands :: Validator { hotkey } => {
1570+ let state = fetch_chain_state ( & args. rpc ) . await ?;
1571+
1572+ let selected_hotkey = if let Some ( hk) = hotkey {
1573+ hk
1574+ } else {
1575+ if state. validators . is_empty ( ) {
1576+ println ! ( "{}" , "No validators registered." . yellow( ) ) ;
1577+ return Ok ( ( ) ) ;
1578+ }
1579+
1580+ let options: Vec < String > = state
1581+ . validators
1582+ . iter ( )
1583+ . map ( |v| format ! ( "{} ({:.2} TAO)" , & v. hotkey[ ..16 ] , v. stake_tao) )
1584+ . collect ( ) ;
1585+
1586+ let selection = FuzzySelect :: with_theme ( & ColorfulTheme :: default ( ) )
1587+ . with_prompt ( "Select validator to inspect" )
1588+ . items ( & options)
1589+ . interact ( ) ?;
1590+
1591+ state. validators [ selection] . hotkey . clone ( )
1592+ } ;
1593+
1594+ print_section ( & format ! ( "Validator: {}" , & selected_hotkey[ ..16 ] ) ) ;
1595+ let health = fetch_validator_challenge_health ( & args. rpc , & selected_hotkey) . await ?;
1596+ display_validator_health ( & health) ;
1597+ }
1598+ MonitorCommands :: Logs { challenge, lines, validator_rpc } => {
1599+ let state = fetch_chain_state ( & args. rpc ) . await ?;
1600+
1601+ let challenge_name = if let Some ( name) = challenge {
1602+ name
1603+ } else {
1604+ if state. challenges . is_empty ( ) {
1605+ println ! ( "{}" , "No challenges registered." . yellow( ) ) ;
1606+ return Ok ( ( ) ) ;
1607+ }
1608+
1609+ let options: Vec < String > = state
1610+ . challenges
1611+ . iter ( )
1612+ . map ( |c| c. name . clone ( ) )
1613+ . collect ( ) ;
1614+
1615+ let selection = FuzzySelect :: with_theme ( & ColorfulTheme :: default ( ) )
1616+ . with_prompt ( "Select challenge to view logs" )
1617+ . items ( & options)
1618+ . interact ( ) ?;
1619+
1620+ state. challenges [ selection] . name . clone ( )
1621+ } ;
1622+
1623+ let rpc_url = validator_rpc. as_ref ( ) . unwrap_or ( & args. rpc ) ;
1624+ print_section ( & format ! ( "Logs: {} (last {} lines)" , challenge_name, lines) ) ;
1625+ let logs = fetch_challenge_logs ( rpc_url, & challenge_name, lines) . await ?;
1626+ println ! ( "{}" , logs) ;
1627+ }
1628+ MonitorCommands :: Health => {
1629+ print_section ( "Network Health Overview" ) ;
1630+ let health = fetch_challenge_health ( & args. rpc ) . await ?;
1631+ display_health_summary ( & health) ;
1632+ }
1633+ }
1634+ }
13311635 }
13321636
13331637 Ok ( ( ) )
0 commit comments