Skip to content

Commit 1635a16

Browse files
committed
fix: add 30s delay before crash exit to allow Watchtower updates
- On fatal error: report to Sentry, wait 30s, then exit - On panic: report to Sentry, wait 30s, then crash - Prevents rapid restart loops that block container updates
1 parent 00df049 commit 1635a16

File tree

1 file changed

+30
-2
lines changed

1 file changed

+30
-2
lines changed

bins/validator-node/src/main.rs

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -216,13 +216,16 @@ fn init_sentry() -> Option<sentry::ClientInitGuard> {
216216
Some(guard)
217217
}
218218

219+
/// Delay before exit on fatal errors - allows Watchtower to update container
220+
const CRASH_DELAY_SECS: u64 = 30;
221+
219222
#[tokio::main]
220223
async fn main() -> Result<()> {
221224
// Initialize Sentry error monitoring FIRST (before anything else)
222225
// This ensures we capture all errors from the very start
223226
let _sentry_guard = init_sentry();
224227

225-
// Set up panic hook to capture panics in Sentry
228+
// Set up panic hook to capture panics in Sentry with delayed exit
226229
let default_panic = std::panic::take_hook();
227230
std::panic::set_hook(Box::new(move |panic_info| {
228231
// Capture panic in Sentry
@@ -231,10 +234,35 @@ async fn main() -> Result<()> {
231234
sentry::Level::Fatal,
232235
);
233236
// Flush Sentry before crashing
234-
sentry::Hub::current().client().map(|c| c.flush(Some(std::time::Duration::from_secs(2))));
237+
sentry::Hub::current().client().map(|c| c.flush(Some(std::time::Duration::from_secs(5))));
238+
239+
// IMPORTANT: Delay exit to allow Watchtower to update container
240+
eprintln!("[FATAL] Panic detected. Waiting {}s before exit to allow container updates...", CRASH_DELAY_SECS);
241+
std::thread::sleep(std::time::Duration::from_secs(CRASH_DELAY_SECS));
242+
235243
// Call default panic handler
236244
default_panic(panic_info);
237245
}));
246+
247+
// Run validator and handle fatal errors with delayed exit
248+
if let Err(e) = run_validator().await {
249+
error!("Fatal error: {}", e);
250+
sentry::capture_message(&format!("FATAL: {}", e), sentry::Level::Fatal);
251+
if let Some(client) = sentry::Hub::current().client() {
252+
client.flush(Some(std::time::Duration::from_secs(5)));
253+
}
254+
255+
// IMPORTANT: Delay exit to allow Watchtower to update container
256+
error!("Waiting {}s before exit to allow Watchtower updates...", CRASH_DELAY_SECS);
257+
tokio::time::sleep(tokio::time::Duration::from_secs(CRASH_DELAY_SECS)).await;
258+
259+
return Err(e);
260+
}
261+
Ok(())
262+
}
263+
264+
/// Main validator logic
265+
async fn run_validator() -> Result<()> {
238266

239267
// Initialize logging with Sentry integration
240268
let subscriber = tracing_subscriber::fmt()

0 commit comments

Comments
 (0)