Skip to content

Commit 68a4052

Browse files
authored
fix: comprehensive security and reliability improvements (#4)
* fix(p2p-consensus): improve PBFT consensus security and reliability - Add double-vote prevention in handle_prepare and handle_commit - Validate ViewChange messages match NewView's view number - Fix race conditions in consensus round handling - Replace unwrap with proper error handling in finalize_evaluation - Improve merkle proof generation with is_multiple_of check * fix(storage): add size limits to bincode deserialization for security - Add MAX_ENTRY_SIZE limits to prevent memory exhaustion attacks - Use bincode::options().with_limit() for safe deserialization - Ensure bincode compatibility with serialize defaults - Fix ConflictResolution to use derive(Default) - Apply clippy suggestions for improved code quality * fix(secure-container-runtime): add container security hardening - Add enhanced path traversal protection in policy validation - Improve WebSocket transport security with better authentication - Update container broker with improved error handling - Add additional integration tests for security features * fix(platform-server): secure API endpoints with signature verification - Add signature verification to submit_evaluation endpoint - Add signature verification to all validator API endpoints - Secure CORS configuration and HTTP client error handling - Fix broadcast authentication bypass vulnerability - Add historical_weights pruning and JWT secret warning - Implement Display trait instead of ToString for SubmissionStatus * fix(validator): require authentication and improve security - Require authentication for validator node operations - Update container tests for security compliance - Fix clippy warnings for iterator patterns * chore: fix clippy warnings and apply code formatting - Add allow attributes for await_holding_lock in p2p_client - Apply cargo fmt formatting to all files - Fix CORS configuration formatting in platform server
1 parent eaaf4ad commit 68a4052

File tree

29 files changed

+956
-148
lines changed

29 files changed

+956
-148
lines changed

bins/platform/src/server.rs

Lines changed: 64 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,7 @@ pub async fn run(args: ServerArgs) -> Result<()> {
257257
get(api::evaluations::get_evaluations),
258258
)
259259
.with_state(state)
260-
.layer(CorsLayer::permissive())
260+
.layer(build_cors_layer())
261261
.layer(TraceLayer::new_for_http());
262262

263263
info!("╔══════════════════════════════════════════════════════════════╗");
@@ -437,10 +437,20 @@ async fn proxy_to_challenge(
437437
}
438438
};
439439

440-
let client = reqwest::Client::builder()
440+
let client = match reqwest::Client::builder()
441441
.timeout(std::time::Duration::from_secs(600))
442442
.build()
443-
.unwrap();
443+
{
444+
Ok(c) => c,
445+
Err(e) => {
446+
error!("Failed to create HTTP client: {}", e);
447+
return (
448+
StatusCode::INTERNAL_SERVER_ERROR,
449+
"Failed to create HTTP client",
450+
)
451+
.into_response();
452+
}
453+
};
444454

445455
let mut req_builder = client.request(method, &url);
446456
for (key, value) in headers.iter() {
@@ -481,6 +491,43 @@ async fn proxy_to_challenge(
481491
}
482492
}
483493

494+
/// Build CORS layer based on environment configuration.
495+
/// In development mode (DEVELOPMENT_MODE env var set), allows any origin.
496+
/// In production, only whitelisted origins are allowed.
497+
fn build_cors_layer() -> CorsLayer {
498+
let allowed_origins = std::env::var("CORS_ALLOWED_ORIGINS")
499+
.unwrap_or_else(|_| "https://platform.network,https://chain.platform.network".to_string());
500+
501+
if allowed_origins == "*" || std::env::var("DEVELOPMENT_MODE").is_ok() {
502+
tracing::warn!("CORS allowing all origins - this should only be used in development!");
503+
CorsLayer::new()
504+
.allow_origin(tower_http::cors::Any)
505+
.allow_methods(tower_http::cors::Any)
506+
.allow_headers(tower_http::cors::Any)
507+
} else {
508+
let origins: Vec<axum::http::HeaderValue> = allowed_origins
509+
.split(',')
510+
.filter_map(|s| s.trim().parse().ok())
511+
.collect();
512+
513+
CorsLayer::new()
514+
.allow_origin(origins)
515+
.allow_methods([
516+
axum::http::Method::GET,
517+
axum::http::Method::POST,
518+
axum::http::Method::PUT,
519+
axum::http::Method::DELETE,
520+
axum::http::Method::OPTIONS,
521+
])
522+
.allow_headers([
523+
axum::http::header::AUTHORIZATION,
524+
axum::http::header::CONTENT_TYPE,
525+
axum::http::header::ACCEPT,
526+
])
527+
.allow_credentials(true)
528+
}
529+
}
530+
484531
/// Start the container broker WebSocket server
485532
///
486533
/// This allows challenges to create sandboxed containers without direct Docker access.
@@ -507,11 +554,25 @@ async fn start_container_broker(port: u16) -> Result<()> {
507554
// WebSocket config with JWT auth
508555
let jwt_secret = std::env::var("BROKER_JWT_SECRET").ok();
509556

557+
// Determine if authentication should be required
558+
let is_dev_mode = std::env::var("DEVELOPMENT_MODE").is_ok();
559+
if is_dev_mode {
560+
warn!("SECURITY WARNING: DEVELOPMENT_MODE is set - WebSocket authentication is DISABLED!");
561+
warn!("DO NOT use DEVELOPMENT_MODE in production environments!");
562+
}
563+
564+
if jwt_secret.is_none() && !is_dev_mode {
565+
warn!("BROKER_JWT_SECRET not set - a random JWT secret will be generated");
566+
warn!("This means JWT tokens will be invalidated on server restart!");
567+
warn!("Set BROKER_JWT_SECRET environment variable for production use");
568+
}
569+
510570
let ws_config = WsConfig {
511571
bind_addr: format!("0.0.0.0:{}", port),
512572
jwt_secret,
513573
allowed_challenges: vec![], // Allow all challenges
514574
max_connections_per_challenge: 10,
575+
allow_unauthenticated: is_dev_mode, // Only allow unauthenticated in dev mode
515576
};
516577

517578
info!(

bins/validator-decentralized/src/main.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -414,7 +414,7 @@ async fn persist_state_to_storage(
414414

415415
/// Update validator set from metagraph data
416416
fn update_validator_set_from_metagraph(metagraph: &Metagraph, validator_set: &Arc<ValidatorSet>) {
417-
for (_uid, neuron) in &metagraph.neurons {
417+
for neuron in metagraph.neurons.values() {
418418
let hotkey_bytes: [u8; 32] = neuron.hotkey.clone().into();
419419
let hotkey = Hotkey(hotkey_bytes);
420420
// Get effective stake capped to u64::MAX (neuron.stake is u128)

bins/validator-node/src/main.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -478,6 +478,7 @@ async fn main() -> Result<()> {
478478
jwt_secret: Some(jwt_secret),
479479
allowed_challenges: vec![],
480480
max_connections_per_challenge: 10,
481+
allow_unauthenticated: false, // Require authentication in production
481482
};
482483
let broker_clone = broker.clone();
483484
tokio::spawn(async move {
@@ -951,6 +952,7 @@ fn load_keypair(args: &Args) -> Result<Keypair> {
951952

952953
/// Submit weights for a given epoch
953954
/// This is the core weight submission logic, extracted to be reusable
955+
#[allow(clippy::too_many_arguments)]
954956
async fn submit_weights_for_epoch(
955957
epoch: u64,
956958
platform_client: &Arc<PlatformServerClient>,
@@ -1188,6 +1190,7 @@ async fn submit_weights_for_epoch(
11881190
}
11891191
}
11901192

1193+
#[allow(clippy::too_many_arguments)]
11911194
async fn handle_block_event(
11921195
event: BlockSyncEvent,
11931196
platform_client: &Arc<PlatformServerClient>,

crates/challenge-sdk/src/p2p_client.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,7 @@ impl P2PChallengeClient {
251251
/// # Returns
252252
///
253253
/// Returns a list of pending submissions or an error on timeout/failure.
254+
#[allow(clippy::await_holding_lock)]
254255
pub async fn get_pending_submissions(
255256
&self,
256257
limit: usize,
@@ -355,6 +356,7 @@ impl P2PChallengeClient {
355356
/// # Returns
356357
///
357358
/// Returns aggregated weights as (hotkey, weight) pairs or an error on timeout.
359+
#[allow(clippy::await_holding_lock)]
358360
pub async fn get_weights(&self, epoch: u64) -> Result<Vec<(String, f64)>, ChallengeError> {
359361
let msg = P2PChallengeMessage::RequestWeights {
360362
challenge_id: self.config.challenge_id.clone(),
@@ -458,6 +460,7 @@ impl P2PChallengeClient {
458460
/// Returns a tuple of (evaluations, final_score) where:
459461
/// - `evaluations` is a list of validator evaluation results
460462
/// - `final_score` is `Some(score)` if consensus was reached, `None` otherwise
463+
#[allow(clippy::await_holding_lock)]
461464
pub async fn get_evaluation_status(
462465
&self,
463466
submission_hash: &str,

crates/distributed-storage/src/challenge_store.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -741,7 +741,7 @@ fn build_merkle_proof(leaves: &[[u8; 32]], leaf_index: usize, data: &[u8]) -> Me
741741
let mut index = leaf_index;
742742

743743
while level.len() > 1 {
744-
let sibling_index = if index % 2 == 0 {
744+
let sibling_index = if index.is_multiple_of(2) {
745745
if index + 1 < level.len() {
746746
index + 1
747747
} else {

crates/distributed-storage/src/local.rs

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
//! It stores data with replication metadata to track which remote nodes have copies.
55
66
use async_trait::async_trait;
7+
use bincode::Options;
78
use parking_lot::RwLock;
89
use serde::{Deserialize, Serialize};
910
use sled::{Db, Tree};
@@ -12,6 +13,10 @@ use std::path::Path;
1213
use std::sync::Arc;
1314
use tracing::{debug, trace};
1415

16+
/// Maximum size for deserializing storage entries (100MB).
17+
/// This limit prevents DoS attacks from malformed data causing excessive memory allocation.
18+
const MAX_ENTRY_SIZE: u64 = 100 * 1024 * 1024;
19+
1520
use crate::error::{StorageError, StorageResult};
1621
use crate::query::{
1722
block_index_key, block_range_end, block_range_start, parse_block_index_key, QueryBuilder,
@@ -160,7 +165,12 @@ impl LocalStorage {
160165

161166
match self.data_tree.get(&db_key)? {
162167
Some(bytes) => {
163-
let entry: LocalEntry = bincode::deserialize(&bytes)?;
168+
// Use options compatible with bincode::serialize (legacy format with fixint encoding)
169+
let entry: LocalEntry = bincode::options()
170+
.with_limit(MAX_ENTRY_SIZE)
171+
.with_fixint_encoding()
172+
.allow_trailing_bytes()
173+
.deserialize(&bytes)?;
164174
Ok(Some(entry))
165175
}
166176
None => Ok(None),
@@ -260,7 +270,12 @@ impl LocalStorage {
260270
for result in self.data_tree.iter() {
261271
let (key_bytes, value_bytes) = result?;
262272

263-
let entry: LocalEntry = bincode::deserialize(&value_bytes)?;
273+
// Use options compatible with bincode::serialize (legacy format with fixint encoding)
274+
let entry: LocalEntry = bincode::options()
275+
.with_limit(MAX_ENTRY_SIZE)
276+
.with_fixint_encoding()
277+
.allow_trailing_bytes()
278+
.deserialize(&value_bytes)?;
264279

265280
if entry.replication.needs_replication {
266281
// Parse the key back into a StorageKey
@@ -466,7 +481,12 @@ impl LocalStorage {
466481

467482
// Get the actual data
468483
if let Some(value_bytes) = self.data_tree.get(&data_key_bytes)? {
469-
let entry: LocalEntry = bincode::deserialize(&value_bytes)?;
484+
// Use options compatible with bincode::serialize (legacy format with fixint encoding)
485+
let entry: LocalEntry = bincode::options()
486+
.with_limit(MAX_ENTRY_SIZE)
487+
.with_fixint_encoding()
488+
.allow_trailing_bytes()
489+
.deserialize(&value_bytes)?;
470490

471491
// Skip expired entries
472492
if entry.value.metadata.is_expired() {
@@ -648,7 +668,12 @@ impl DistributedStore for LocalStorage {
648668

649669
// Get the actual data
650670
if let Some(value_bytes) = self.data_tree.get(&data_key)? {
651-
let entry: LocalEntry = bincode::deserialize(&value_bytes)?;
671+
// Use options compatible with bincode::serialize (legacy format with fixint encoding)
672+
let entry: LocalEntry = bincode::options()
673+
.with_limit(MAX_ENTRY_SIZE)
674+
.with_fixint_encoding()
675+
.allow_trailing_bytes()
676+
.deserialize(&value_bytes)?;
652677

653678
// Skip expired entries
654679
if entry.value.metadata.is_expired() {

crates/distributed-storage/src/query.rs

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,25 @@
33
//! This module provides SQL-like query capabilities using sled's range iterators.
44
//! It supports block-based filtering, pagination, and fluent query construction.
55
6+
use bincode::Options;
67
use serde::{Deserialize, Serialize};
78

89
use crate::store::{StorageKey, StoredValue};
910

11+
/// Maximum size for deserializing query cursor data (1MB).
12+
/// This limit prevents DoS attacks from malformed data causing excessive memory allocation.
13+
/// Cursors are small structures, so 1MB is more than sufficient.
14+
const MAX_CURSOR_SIZE: u64 = 1024 * 1024;
15+
16+
/// Create bincode options with size limit for safe deserialization.
17+
/// Uses fixint encoding and allows trailing bytes for compatibility with `bincode::serialize()`.
18+
fn bincode_options() -> impl Options {
19+
bincode::options()
20+
.with_limit(MAX_CURSOR_SIZE)
21+
.with_fixint_encoding()
22+
.allow_trailing_bytes()
23+
}
24+
1025
/// Result of a query operation with pagination support
1126
#[derive(Clone, Debug)]
1227
pub struct QueryResult {
@@ -112,9 +127,10 @@ impl QueryCursor {
112127
bincode::serialize(self).unwrap_or_default()
113128
}
114129

115-
/// Decode cursor from bytes
130+
/// Decode cursor from bytes with size limit protection.
131+
/// Limits deserialization to MAX_CURSOR_SIZE bytes to prevent DoS via memory exhaustion.
116132
pub fn from_bytes(bytes: &[u8]) -> Option<Self> {
117-
bincode::deserialize(bytes).ok()
133+
bincode_options().deserialize(bytes).ok()
118134
}
119135

120136
/// Encode cursor to base64 string
@@ -156,10 +172,10 @@ impl QueryFilter {
156172
/// Check if an entry matches this filter
157173
pub fn matches(&self, block_id: Option<u64>, created_at: i64, key: &[u8]) -> bool {
158174
match self {
159-
QueryFilter::BlockBefore(max_block) => block_id.map_or(true, |b| b < *max_block),
160-
QueryFilter::BlockAfter(min_block) => block_id.map_or(false, |b| b > *min_block),
175+
QueryFilter::BlockBefore(max_block) => block_id.is_none_or(|b| b < *max_block),
176+
QueryFilter::BlockAfter(min_block) => block_id.is_some_and(|b| b > *min_block),
161177
QueryFilter::BlockRange { start, end } => {
162-
block_id.map_or(false, |b| b >= *start && b <= *end)
178+
block_id.is_some_and(|b| b >= *start && b <= *end)
163179
}
164180
QueryFilter::CreatedBefore(timestamp) => created_at < *timestamp,
165181
QueryFilter::CreatedAfter(timestamp) => created_at > *timestamp,

crates/distributed-storage/src/replication.rs

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -102,22 +102,17 @@ impl ReplicationConfig {
102102
}
103103

104104
/// Strategy for resolving conflicts between divergent values
105-
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
105+
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq, Default)]
106106
pub enum ConflictResolution {
107107
/// Last write wins (based on timestamp)
108+
#[default]
108109
LastWriteWins,
109110
/// Highest version number wins
110111
HighestVersion,
111112
/// Custom merge function (caller provides)
112113
Custom,
113114
}
114115

115-
impl Default for ConflictResolution {
116-
fn default() -> Self {
117-
Self::LastWriteWins
118-
}
119-
}
120-
121116
/// Replication policy for a namespace
122117
#[derive(Clone, Debug, Serialize, Deserialize)]
123118
pub struct ReplicationPolicy {

crates/distributed-storage/src/store.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ impl StorageKey {
6666
/// Compute SHA256 hash of the key (for DHT routing)
6767
pub fn hash(&self) -> [u8; 32] {
6868
let mut hasher = Sha256::new();
69-
hasher.update(&self.to_bytes());
69+
hasher.update(self.to_bytes());
7070
hasher.finalize().into()
7171
}
7272

crates/distributed-storage/src/submission.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,7 @@ impl AggregatedEvaluations {
329329
scores.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
330330

331331
let len = scores.len();
332-
let median = if len % 2 == 0 {
332+
let median = if len.is_multiple_of(2) {
333333
(scores[len / 2 - 1] + scores[len / 2]) / 2.0
334334
} else {
335335
scores[len / 2]

0 commit comments

Comments
 (0)