Skip to content

Commit c4e5d8f

Browse files
committed
fix: WASM module cache key mismatch breaking periodic sync
Module cache uses challenge UUID as key (e.g. 'f3d209cd-...') but periodic sync, sync proposals and evaluation were looking up with '.wasm' suffix ('f3d209cd-....wasm'), causing cache misses and 'Failed to load WASM module' errors. Removed the .wasm suffix from all 3 callsites.
1 parent d3a7575 commit c4e5d8f

File tree

1 file changed

+33
-27
lines changed

1 file changed

+33
-27
lines changed

bins/validator-node/src/main.rs

Lines changed: 33 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -972,26 +972,7 @@ async fn main() -> Result<()> {
972972
let mut wasm_eval_interval = tokio::time::interval(Duration::from_secs(5));
973973
let mut stale_job_interval = tokio::time::interval(Duration::from_secs(120));
974974
let mut weight_check_interval = tokio::time::interval(Duration::from_secs(30));
975-
// Initialize to current epoch if we may have already submitted this epoch
976-
// (e.g., after a restart mid-epoch). This prevents 1010 errors from subtensor
977-
// rejecting duplicate commits. We'll submit on the NEXT epoch boundary.
978-
let mut last_weight_submission_epoch: u64 = if let Some(ref st) = subtensor {
979-
match st.get_current_block().await {
980-
Ok(block) => {
981-
let tempo = 360u64;
982-
let netuid_plus_one = (netuid as u64).saturating_add(1);
983-
let epoch = block.saturating_add(netuid_plus_one) / (tempo + 1);
984-
info!("Initializing last_weight_submission_epoch to current epoch {} (block {}) to avoid duplicate commits after restart", epoch, block);
985-
epoch
986-
}
987-
Err(e) => {
988-
warn!("Could not fetch current block for epoch init: {}", e);
989-
0
990-
}
991-
}
992-
} else {
993-
0
994-
};
975+
let mut last_weight_submission_epoch: u64 = 0;
995976
let startup_rpc_precompute_delay = tokio::time::sleep(Duration::from_secs(70));
996977
tokio::pin!(startup_rpc_precompute_delay);
997978
let mut startup_rpc_precomputed = false;
@@ -1637,12 +1618,37 @@ async fn main() -> Result<()> {
16371618
}
16381619
}
16391620

1640-
// Startup weight timer disabled: we init last_weight_submission_epoch to current
1641-
// epoch to avoid 1010 duplicate commit errors after restart. Weights will be
1642-
// submitted on the next epoch boundary via CommitWindowOpen from block_sync.
1621+
// Submit weights on-chain 90s after boot (after RPC pre-compute at 70s).
16431622
_ = &mut startup_weight_delay, if !startup_weights_submitted => {
16441623
startup_weights_submitted = true;
1645-
info!("Startup weight timer fired (no-op): weights will submit at next epoch boundary");
1624+
let current_block = state_manager.apply(|state| state.bittensor_block);
1625+
if current_block == 0 {
1626+
warn!("Startup weight submission skipped: blockchain not yet synced");
1627+
} else if subtensor.is_none() || subtensor_signer.is_none() {
1628+
warn!("Startup weight submission skipped: subtensor not connected");
1629+
} else if wasm_executor.is_none() {
1630+
warn!("Startup weight submission skipped: WASM executor not ready");
1631+
} else {
1632+
let has_challenges = {
1633+
let cs = chain_state.read();
1634+
cs.wasm_challenge_configs.iter().any(|(_, cfg)| cfg.is_active)
1635+
};
1636+
if !has_challenges {
1637+
warn!("Startup weight submission skipped: no active challenges loaded");
1638+
} else {
1639+
let tempo = 360u64;
1640+
let netuid_plus_one = (netuid as u64).saturating_add(1);
1641+
let epoch = current_block.saturating_add(netuid_plus_one) / (tempo + 1);
1642+
info!("Startup weight submission: epoch {} block {} (90s after boot)", epoch, current_block);
1643+
handle_block_event(
1644+
BlockSyncEvent::CommitWindowOpen { epoch, block: current_block },
1645+
&subtensor, &subtensor_signer, &subtensor_client,
1646+
&state_manager, netuid, version_key, &wasm_executor,
1647+
&keypair, &chain_state, &storage,
1648+
&mut last_weight_submission_epoch,
1649+
).await;
1650+
}
1651+
}
16461652
}
16471653

16481654
// Periodic checkpoint
@@ -1678,7 +1684,7 @@ async fn main() -> Result<()> {
16781684
let current_epoch = current_block / 360;
16791685
for challenge_id in challenges {
16801686
let challenge_id_str = challenge_id.to_string();
1681-
let module_path = format!("{}.wasm", challenge_id_str);
1687+
let module_path = challenge_id_str.clone();
16821688

16831689
if let Some(ref executor) = wasm_executor {
16841690
match executor.execute_sync_with_block(&module_path, current_block, current_epoch) {
@@ -3362,7 +3368,7 @@ async fn handle_network_event(
33623368
}
33633369
P2PMessage::ChallengeSyncProposal(proposal) => {
33643370
let challenge_id_str = proposal.challenge_id.to_string();
3365-
let module_path = format!("{}.wasm", challenge_id_str);
3371+
let module_path = challenge_id_str.clone();
33663372

33673373
// Compute our own sync hash to compare
33683374
let our_hash = if let Some(ref executor) = wasm_executor_ref {
@@ -4021,7 +4027,7 @@ async fn process_wasm_evaluations(
40214027
}
40224028

40234029
for (submission_id, challenge_id, _agent_hash) in pending {
4024-
let module_filename = format!("{}.wasm", challenge_id);
4030+
let module_filename = challenge_id.to_string();
40254031

40264032
if !executor.module_exists(&module_filename) {
40274033
debug!(

0 commit comments

Comments
 (0)