From b10c4b9ad70918ffb6c20f943729908b73ab5a43 Mon Sep 17 00:00:00 2001 From: k5602 <188656344+k5602@users.noreply.github.com> Date: Fri, 13 Feb 2026 02:20:50 +0200 Subject: [PATCH 1/9] feat: Add allowlist regex suppression for secret findings - Introduce global and rule-scoped allowlist patterns in SecretDetectionConfig - Suppress secret findings matching allowlist regex before reporting - Track suppression counts and breakdown in scan metadata - Extend SecretFinding and Finding with secret_metadata for detailed context - Implement SecretVerificationState for secret findings and API responses - Update tests for allowlist suppression and metadata propagation This enables flexible exclusion of known or placeholder secrets and improves triage accuracy. --- .github/copilot-instructions.md | 2 +- CHANGELOG.md | 2 + vulnera-api/src/module.rs | 1 + vulnera-core/src/config/mod.rs | 6 + vulnera-core/src/config/validation.rs | 28 +++ vulnera-core/src/domain/module/entities.rs | 33 ++++ vulnera-deps/src/module.rs | 1 + vulnera-sast/src/module.rs | 1 + vulnera-secrets/src/application/use_cases.rs | 96 +++++++++- vulnera-secrets/src/domain/entities.rs | 13 ++ vulnera-secrets/src/domain/mod.rs | 2 +- .../src/infrastructure/baseline/repository.rs | 176 ++++++++++++++++-- .../infrastructure/detectors/ast_extractor.rs | 149 +++++++++++---- .../detectors/detector_engine.rs | 91 ++++++--- .../detectors/entropy_detector.rs | 116 +++++++++++- .../src/infrastructure/git/scanner.rs | 146 ++++++++++++--- .../infrastructure/verification/verifier.rs | 92 +++++++-- vulnera-secrets/src/module.rs | 46 ++++- .../tests/test_secret_detection.rs | 68 +++++++ vulnera-secrets/tests/test_verifiers.rs | 49 ++++- 20 files changed, 993 insertions(+), 125 deletions(-) diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 8841f048..715709a3 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -46,7 +46,7 @@ vulnera-cli (standalone workspace - offline analysis + server API calls) - **SAST**: Supports **Python, JavaScript, TypeScript, Rust, Go, C, C++** using Tree-sitter. - **Sandboxing**: Tiered isolation. Linux (Landlock + Seccomp) -> Process -> fallback. `SandboxPolicy::for_analysis` includes system paths, `/tmp` RW, and optional HTTP/Redis ports. -- **Dependency Analysis**: Cross-ecosystem (NPM, PyPI, Cargo, Maven) with `vulnera-advisor` intelligence. +- **Dependency Analysis**: Cross-ecosystem (NPM, PyPI, Cargo, Maven) with `vulnera-advisor` intelligence. Supports CWE filtering and Git commit range matching. - **LLM**: Integrated `GeminiLlmProvider` for automated remediation and finding enrichment. ## Configuration & Secrets diff --git a/CHANGELOG.md b/CHANGELOG.md index 5415290b..719dc0fe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,8 @@ The format is based on Keep a Changelog and this project adheres to Semantic Ver ### Added - **Dependency Analysis Improvements:** - Precise semver interval intersection in `VersionRange::overlaps_with` for better vulnerability matching. + - Support for Git commit range matching in vulnerability checks via `matches_git_range`. + - CWE ID filtering and normalization (standardizing formats like "79" → "CWE-79"). - Robust glob pattern matching for dependency ignore rules using `globset`. - Resolution of dependency edges to actual package versions in npm, Ruby, PHP, Python (uv), and Rust lockfile parsers. - Preserved dependency edges for git/path dependencies in Cargo parser for accurate graph analysis. diff --git a/vulnera-api/src/module.rs b/vulnera-api/src/module.rs index e25f42f7..b6b4fa28 100644 --- a/vulnera-api/src/module.rs +++ b/vulnera-api/src/module.rs @@ -142,6 +142,7 @@ impl AnalysisModule for ApiSecurityModule { confidence: FindingConfidence::High, // API spec findings are high confidence description: f.description, recommendation: Some(f.recommendation), + secret_metadata: None, enrichment: None, }) .collect(); diff --git a/vulnera-core/src/config/mod.rs b/vulnera-core/src/config/mod.rs index 25d54d83..5fbfc8b2 100644 --- a/vulnera-core/src/config/mod.rs +++ b/vulnera-core/src/config/mod.rs @@ -790,6 +790,10 @@ pub struct SecretDetectionConfig { pub verification_concurrent_limit: usize, /// Optional path to baseline file for tracking known secrets pub baseline_file_path: Option, + /// Global allowlist regex patterns; matches are suppressed before reporting + pub global_allowlist_patterns: Vec, + /// Rule-scoped allowlist regex patterns keyed by rule id + pub rule_allowlist_patterns: std::collections::HashMap>, /// Whether to update baseline after scan pub update_baseline: bool, /// Whether to scan git history for secrets @@ -842,6 +846,8 @@ impl Default for SecretDetectionConfig { verification_timeout_seconds: 5, verification_concurrent_limit: 10, baseline_file_path: None, + global_allowlist_patterns: Vec::new(), + rule_allowlist_patterns: std::collections::HashMap::new(), update_baseline: false, scan_git_history: false, max_commits_to_scan: None, diff --git a/vulnera-core/src/config/validation.rs b/vulnera-core/src/config/validation.rs index dc12f573..35ab406f 100644 --- a/vulnera-core/src/config/validation.rs +++ b/vulnera-core/src/config/validation.rs @@ -422,6 +422,34 @@ impl Validate for SecretDetectionConfig { )); } + // Validate global allowlist regex patterns + for pattern in &self.global_allowlist_patterns { + if let Err(err) = regex::Regex::new(pattern) { + return Err(ValidationError::secret_detection(format!( + "Invalid global allowlist regex pattern '{}': {}", + pattern, err + ))); + } + } + + // Validate rule allowlist regex patterns + for (rule_id, patterns) in &self.rule_allowlist_patterns { + if rule_id.trim().is_empty() { + return Err(ValidationError::secret_detection( + "rule_allowlist_patterns contains empty rule id".to_string(), + )); + } + + for pattern in patterns { + if let Err(err) = regex::Regex::new(pattern) { + return Err(ValidationError::secret_detection(format!( + "Invalid allowlist regex for rule '{}': '{}': {}", + rule_id, pattern, err + ))); + } + } + } + // Validate scan_timeout_seconds > 0 if Some if let Some(scan_timeout) = self.scan_timeout_seconds { if scan_timeout == 0 { diff --git a/vulnera-core/src/domain/module/entities.rs b/vulnera-core/src/domain/module/entities.rs index e30419d4..255333f0 100644 --- a/vulnera-core/src/domain/module/entities.rs +++ b/vulnera-core/src/domain/module/entities.rs @@ -69,11 +69,44 @@ pub struct Finding { pub description: String, /// Recommended remediation (if available) pub recommendation: Option, + /// Secret-specific metadata (populated only for secret findings) + #[serde(skip_serializing_if = "Option::is_none")] + pub secret_metadata: Option, /// LLM-generated enrichment data (populated on-demand via enrichment endpoint) #[serde(skip_serializing_if = "Option::is_none")] pub enrichment: Option, } +/// Secret-specific metadata attached to secret findings +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct SecretFindingMetadata { + /// Secret detector identifier + pub detector_id: String, + /// Verification state returned by the verifier subsystem + pub verification_state: SecretVerificationState, + /// Redacted secret snippet (safe for display) + pub redacted_secret: String, + /// Optional entropy value when entropy detector contributed to the finding + pub entropy: Option, + /// Optional evidence notes used during scoring and triage + pub evidence: Vec, +} + +/// Verification state for detected secrets +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)] +pub enum SecretVerificationState { + /// Secret was successfully verified against provider/API + Verified, + /// Secret was checked and determined invalid + Invalid, + /// Verification attempted but result is indeterminate (timeouts/network/provider errors) + Unknown, + /// Verification available but not attempted for this finding + Unverified, + /// No verifier exists for this secret type + NotSupported, +} + /// Finding type #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)] pub enum FindingType { diff --git a/vulnera-deps/src/module.rs b/vulnera-deps/src/module.rs index 79568972..22a35d94 100644 --- a/vulnera-deps/src/module.rs +++ b/vulnera-deps/src/module.rs @@ -255,6 +255,7 @@ impl AnalysisModule for DependencyAnalyzerModule { None } }, + secret_metadata: None, enrichment: None, }; findings.push(finding); diff --git a/vulnera-sast/src/module.rs b/vulnera-sast/src/module.rs index 62344435..7761d8e6 100644 --- a/vulnera-sast/src/module.rs +++ b/vulnera-sast/src/module.rs @@ -167,6 +167,7 @@ impl AnalysisModule for SastModule { }, description: f.description, recommendation: f.recommendation, + secret_metadata: None, enrichment: None, }) .collect(); diff --git a/vulnera-secrets/src/application/use_cases.rs b/vulnera-secrets/src/application/use_cases.rs index e2ac788f..21fb6841 100644 --- a/vulnera-secrets/src/application/use_cases.rs +++ b/vulnera-secrets/src/application/use_cases.rs @@ -1,5 +1,6 @@ //! Secret detection use cases +use regex::Regex; use std::path::Path; use std::sync::Arc; use tokio::sync::Semaphore; @@ -21,6 +22,9 @@ use crate::infrastructure::verification::VerificationService; pub struct ScanResult { pub findings: Vec, pub files_scanned: usize, + pub baseline_suppressed: usize, + pub allowlist_suppressed: usize, + pub suppression_breakdown: std::collections::HashMap, } /// Use case for scanning a project for secrets @@ -41,6 +45,10 @@ pub struct ScanForSecretsUseCase { scan_markdown_codeblocks: bool, /// Overall scan timeout scan_timeout: Option, + /// Compiled global allowlist regex patterns + global_allowlist_patterns: Vec, + /// Compiled rule-scoped allowlist regex patterns + rule_allowlist_patterns: std::collections::HashMap>, } impl ScanForSecretsUseCase { @@ -106,6 +114,36 @@ impl ScanForSecretsUseCase { let file_read_timeout = Duration::from_secs(config.file_read_timeout_seconds); let scan_timeout = config.scan_timeout_seconds.map(Duration::from_secs); + let global_allowlist_patterns = config + .global_allowlist_patterns + .iter() + .filter_map(|pattern| match Regex::new(pattern) { + Ok(regex) => Some(regex), + Err(err) => { + warn!(pattern = %pattern, error = %err, "Invalid global allowlist regex; skipping pattern"); + None + } + }) + .collect(); + + let mut rule_allowlist_patterns: std::collections::HashMap> = + std::collections::HashMap::new(); + for (rule_id, patterns) in &config.rule_allowlist_patterns { + let mut compiled_patterns = Vec::new(); + for pattern in patterns { + match Regex::new(pattern) { + Ok(regex) => compiled_patterns.push(regex), + Err(err) => { + warn!(rule_id = %rule_id, pattern = %pattern, error = %err, "Invalid rule allowlist regex; skipping pattern"); + } + } + } + + if !compiled_patterns.is_empty() { + rule_allowlist_patterns.insert(rule_id.clone(), compiled_patterns); + } + } + Self { scanner, detector_engine: detector_engine.clone(), @@ -118,6 +156,8 @@ impl ScanForSecretsUseCase { file_read_timeout, scan_markdown_codeblocks: config.scan_markdown_codeblocks, scan_timeout, + global_allowlist_patterns, + rule_allowlist_patterns, } } @@ -264,12 +304,28 @@ impl ScanForSecretsUseCase { } } + // Filter findings using allowlists first + let mut allowlist_suppressed = 0usize; + let mut suppression_breakdown = std::collections::HashMap::new(); + let mut allowlist_filtered = Vec::new(); + + for finding in all_findings { + match self.allowlist_reason(&finding) { + Some(reason) => { + allowlist_suppressed += 1; + *suppression_breakdown.entry(reason).or_insert(0) += 1; + } + None => allowlist_filtered.push(finding), + } + } + // Filter findings using baseline if available + let mut baseline_suppressed = 0usize; let filtered_findings = if let Some(ref baseline_repo) = self.baseline_repository { let mut filtered = Vec::new(); let mut new_findings_for_baseline = Vec::new(); - for finding in all_findings { + for finding in allowlist_filtered { match baseline_repo.contains(&finding) { Ok(true) => { debug!( @@ -278,6 +334,10 @@ impl ScanForSecretsUseCase { "Finding filtered by baseline" ); // Skip findings that exist in baseline (assumed false positives) + baseline_suppressed += 1; + *suppression_breakdown + .entry("baseline".to_string()) + .or_insert(0) += 1; } Ok(false) => { filtered.push(finding.clone()); @@ -303,7 +363,11 @@ impl ScanForSecretsUseCase { let entries: Vec<_> = new_findings_for_baseline .iter() .map(|finding| { - FileBaselineRepository::finding_to_entry(finding, true, false) + let is_verified = matches!( + finding.verification_state, + crate::domain::entities::SecretVerificationState::Verified + ); + FileBaselineRepository::finding_to_entry(finding, true, is_verified) }) .collect(); @@ -316,7 +380,7 @@ impl ScanForSecretsUseCase { filtered } else { - all_findings + allowlist_filtered }; info!( @@ -326,6 +390,9 @@ impl ScanForSecretsUseCase { Ok(ScanResult { findings: filtered_findings, files_scanned, + baseline_suppressed, + allowlist_suppressed, + suppression_breakdown, }) }; @@ -426,6 +493,29 @@ impl ScanForSecretsUseCase { } } +impl ScanForSecretsUseCase { + fn allowlist_reason(&self, finding: &SecretFinding) -> Option { + if self + .global_allowlist_patterns + .iter() + .any(|pattern| pattern.is_match(&finding.matched_secret)) + { + return Some("allowlist:global".to_string()); + } + + if let Some(patterns) = self.rule_allowlist_patterns.get(&finding.rule_id) { + if patterns + .iter() + .any(|pattern| pattern.is_match(&finding.matched_secret)) + { + return Some(format!("allowlist:rule:{}", finding.rule_id)); + } + } + + None + } +} + impl Default for ScanForSecretsUseCase { fn default() -> Self { Self::new() diff --git a/vulnera-secrets/src/domain/entities.rs b/vulnera-secrets/src/domain/entities.rs index a3e5cdbe..bec4ae81 100644 --- a/vulnera-secrets/src/domain/entities.rs +++ b/vulnera-secrets/src/domain/entities.rs @@ -9,14 +9,27 @@ use super::value_objects::Confidence; pub struct SecretFinding { pub id: String, pub rule_id: String, + pub detector_id: String, pub secret_type: SecretType, pub location: Location, pub severity: Severity, pub confidence: Confidence, + pub verification_state: SecretVerificationState, pub description: String, pub recommendation: Option, pub matched_secret: String, // Partial/redacted secret for context pub entropy: Option, + pub evidence: Vec, +} + +/// Verification state for a secret finding +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +pub enum SecretVerificationState { + Verified, + Invalid, + Unknown, + Unverified, + NotSupported, } /// Location of a finding in source code diff --git a/vulnera-secrets/src/domain/mod.rs b/vulnera-secrets/src/domain/mod.rs index 0014f224..a5a22bcb 100644 --- a/vulnera-secrets/src/domain/mod.rs +++ b/vulnera-secrets/src/domain/mod.rs @@ -3,5 +3,5 @@ pub mod entities; pub mod value_objects; -pub use entities::{Location, SecretFinding, SecretType, Severity}; +pub use entities::{Location, SecretFinding, SecretType, SecretVerificationState, Severity}; pub use value_objects::{Confidence, Entropy, EntropyEncoding, RulePattern, SecretRule}; diff --git a/vulnera-secrets/src/infrastructure/baseline/repository.rs b/vulnera-secrets/src/infrastructure/baseline/repository.rs index 12762965..f631b3a0 100644 --- a/vulnera-secrets/src/infrastructure/baseline/repository.rs +++ b/vulnera-secrets/src/infrastructure/baseline/repository.rs @@ -1,6 +1,7 @@ //! Baseline repository for tracking known secrets use crate::domain::entities::SecretFinding; +use chrono::{DateTime, Utc}; use serde::{Deserialize, Serialize}; use std::path::Path; use std::sync::{Arc, RwLock}; @@ -9,8 +10,8 @@ use tracing::{debug, info}; /// Baseline entry for a secret finding #[derive(Debug, Clone, Serialize, Deserialize)] pub struct BaselineEntry { - /// Hash of the secret (for identification) - pub secret_hash: String, + /// Stable finding fingerprint (v2 format) + pub fingerprint: String, /// File path where secret was found pub file_path: String, /// Line number @@ -21,6 +22,19 @@ pub struct BaselineEntry { pub is_secret: bool, /// Whether the secret was verified pub is_verified: bool, + /// Optional suppression reason + #[serde(default)] + pub suppression_reason: Option, + /// Timestamp when this entry was created + #[serde(default = "default_suppressed_at")] + pub suppressed_at: DateTime, + /// Optional detector id used for this finding + #[serde(default)] + pub detector_id: Option, +} + +fn default_suppressed_at() -> DateTime { + Utc::now() } /// Baseline structure @@ -35,7 +49,7 @@ pub struct Baseline { impl Default for Baseline { fn default() -> Self { Self { - version: "1.0".to_string(), + version: "2.0".to_string(), entries: Vec::new(), } } @@ -148,11 +162,72 @@ impl FileBaselineRepository { Ok(baseline) } - /// Hash a secret for baseline tracking - pub fn hash_secret(secret: &str) -> String { + /// Normalize path separators for stable cross-platform fingerprints + fn normalize_path(path: &str) -> String { + path.replace('\\', "/") + } + + fn token_shape(secret: &str) -> String { + if secret.is_empty() { + return "empty".to_string(); + } + + let mut has_lower = false; + let mut has_upper = false; + let mut has_digit = false; + let mut has_symbol = false; + + for ch in secret.chars() { + if ch.is_ascii_lowercase() { + has_lower = true; + } else if ch.is_ascii_uppercase() { + has_upper = true; + } else if ch.is_ascii_digit() { + has_digit = true; + } else { + has_symbol = true; + } + } + + format!( + "len:{}|l:{}|u:{}|d:{}|s:{}", + secret.len(), + has_lower, + has_upper, + has_digit, + has_symbol + ) + } + + /// Create a stable fingerprint for a finding (v2) + pub fn finding_fingerprint(finding: &SecretFinding) -> String { use sha2::{Digest, Sha256}; + + let normalized_path = Self::normalize_path(&finding.location.file_path); + let detector_id = if finding.detector_id.is_empty() { + finding.rule_id.as_str() + } else { + finding.detector_id.as_str() + }; + let token_shape = Self::token_shape(&finding.matched_secret); + let context = finding.evidence.join("|"); + + let canonical = format!( + "v2|detector:{}|rule:{}|path:{}|line:{}|col:{:?}|end_line:{:?}|end_col:{:?}|stype:{:?}|shape:{}|ctx:{}", + detector_id, + finding.rule_id, + normalized_path, + finding.location.line, + finding.location.column, + finding.location.end_line, + finding.location.end_column, + finding.secret_type, + token_shape, + context + ); + let mut hasher = Sha256::new(); - hasher.update(secret.as_bytes()); + hasher.update(canonical.as_bytes()); format!("{:x}", hasher.finalize()) } } @@ -199,15 +274,13 @@ impl BaselineRepository for FileBaselineRepository { fn contains(&self, finding: &SecretFinding) -> Result { // Use cached baseline to avoid file I/O let baseline = self.load_cached()?; - let secret_hash = Self::hash_secret(&finding.matched_secret); + let fingerprint = Self::finding_fingerprint(finding); // Check if finding exists in baseline - let exists = baseline.entries.iter().any(|entry| { - entry.secret_hash == secret_hash - && entry.file_path == finding.location.file_path - && entry.line == finding.location.line - && entry.rule_id == finding.rule_id - }); + let exists = baseline + .entries + .iter() + .any(|entry| entry.fingerprint == fingerprint); Ok(exists) } @@ -236,12 +309,15 @@ impl FileBaselineRepository { is_verified: bool, ) -> BaselineEntry { BaselineEntry { - secret_hash: Self::hash_secret(&finding.matched_secret), + fingerprint: Self::finding_fingerprint(finding), file_path: finding.location.file_path.clone(), line: finding.location.line, rule_id: finding.rule_id.clone(), is_secret, is_verified, + suppression_reason: Some("baseline_suppression".to_string()), + suppressed_at: Utc::now(), + detector_id: Some(finding.detector_id.clone()), } } } @@ -258,3 +334,75 @@ pub enum BaselineError { #[error("Serialize error: {0}")] SerializeError(String), } + +#[cfg(test)] +mod tests { + use super::*; + use crate::domain::entities::{ + Location, SecretFinding, SecretType, SecretVerificationState, Severity, + }; + use crate::domain::value_objects::Confidence; + + fn sample_finding() -> SecretFinding { + SecretFinding { + id: "f-1".to_string(), + rule_id: "github-token".to_string(), + detector_id: "github-token".to_string(), + secret_type: SecretType::GitHubToken, + location: Location { + file_path: "src/main.rs".to_string(), + line: 42, + column: Some(5), + end_line: Some(42), + end_column: Some(48), + }, + severity: Severity::High, + confidence: Confidence::High, + verification_state: SecretVerificationState::Verified, + description: "GitHub token detected".to_string(), + recommendation: Some("Rotate token".to_string()), + matched_secret: "ghp_123456789012345678901234567890123456".to_string(), + entropy: None, + evidence: vec!["detection:regex".to_string()], + } + } + + #[test] + fn baseline_contains_v2_fingerprint_entry() { + let temp = tempfile::tempdir().unwrap(); + let baseline_path = temp.path().join("baseline.json"); + let repo = FileBaselineRepository::new(&baseline_path); + let finding = sample_finding(); + + let entry = FileBaselineRepository::finding_to_entry(&finding, true, true); + repo.add_entries(vec![entry]).unwrap(); + + assert!(repo.contains(&finding).unwrap()); + } + + #[test] + fn baseline_does_not_match_non_fingerprint_entry() { + let temp = tempfile::tempdir().unwrap(); + let baseline_path = temp.path().join("baseline.json"); + let repo = FileBaselineRepository::new(&baseline_path); + let finding = sample_finding(); + + let baseline = Baseline { + version: "2.0".to_string(), + entries: vec![BaselineEntry { + fingerprint: String::new(), + file_path: finding.location.file_path.clone(), + line: finding.location.line, + rule_id: finding.rule_id.clone(), + is_secret: true, + is_verified: true, + suppression_reason: None, + suppressed_at: Utc::now(), + detector_id: None, + }], + }; + + repo.save(&baseline).unwrap(); + assert!(!repo.contains(&finding).unwrap()); + } +} diff --git a/vulnera-secrets/src/infrastructure/detectors/ast_extractor.rs b/vulnera-secrets/src/infrastructure/detectors/ast_extractor.rs index bf26ae3f..60ee3893 100644 --- a/vulnera-secrets/src/infrastructure/detectors/ast_extractor.rs +++ b/vulnera-secrets/src/infrastructure/detectors/ast_extractor.rs @@ -1,6 +1,7 @@ //! AST Context extraction using tree-sitter to provide semantic metadata for secret detection. use crate::domain::value_objects::SemanticContext; +use std::collections::HashMap; use std::path::Path; use tree_sitter::{Language, Node, Parser, Point}; @@ -21,63 +22,114 @@ impl AstContextExtractor { column: u32, file_path: &Path, ) -> SemanticContext { - let mut context = SemanticContext { - is_test_context: Self::is_test_file(file_path), - ..Default::default() - }; + let contexts = Self::extract_contexts(source, &[(line, column)], file_path); + contexts + .get(&(line, column)) + .cloned() + .unwrap_or_else(|| SemanticContext { + is_test_context: Self::is_test_file(file_path), + ..Default::default() + }) + } + + /// Extract semantic contexts for multiple locations in one parse pass. + pub fn extract_contexts( + source: &str, + positions: &[(u32, u32)], + file_path: &Path, + ) -> HashMap<(u32, u32), SemanticContext> { + let is_test_context = Self::is_test_file(file_path); + let mut contexts = HashMap::new(); + + if positions.is_empty() { + return contexts; + } // Determine language from extension let extension = file_path.extension().and_then(|s| s.to_str()).unwrap_or(""); let language = match Self::get_language(extension) { Some(l) => l, - None => return context, // Fallback if language not supported + None => { + for (line, column) in positions { + contexts.insert( + (*line, *column), + SemanticContext { + is_test_context, + ..Default::default() + }, + ); + } + return contexts; + } }; // Initialize parser + let context = SemanticContext { + is_test_context, + ..Default::default() + }; let mut parser = Parser::new(); if parser.set_language(&language).is_err() { - return context; + for (line, column) in positions { + contexts.insert((*line, *column), context.clone()); + } + return contexts; } // Parse source let tree = match parser.parse(source, None) { Some(t) => t, - None => return context, + None => { + for (line, column) in positions { + contexts.insert((*line, *column), context.clone()); + } + return contexts; + } }; - // Convert 1-based coordinates to 0-based Point for tree-sitter - let row = line.saturating_sub(1) as usize; - let col = column.saturating_sub(1) as usize; - let start_point = Point::new(row, col); - let end_point = Point::new(row, col + 1); - - // Find the most specific node at the position (including anonymous nodes like comments) let root = tree.root_node(); - if let Some(node) = root.named_descendant_for_point_range(start_point, end_point) { - context.node_type = node.kind().to_string(); - - // Check if we're inside a comment by walking up the tree - let mut current = Some(node); - while let Some(n) = current { - let kind = n.kind(); - if kind == "comment" - || kind == "line_comment" - || kind == "block_comment" - || kind == "string_comment" - { - context.node_type = kind.to_string(); - break; + + for (line, column) in positions { + let mut current_context = SemanticContext { + is_test_context, + ..Default::default() + }; + + // Convert 1-based coordinates to 0-based Point for tree-sitter + let row = line.saturating_sub(1) as usize; + let col = column.saturating_sub(1) as usize; + let start_point = Point::new(row, col); + let end_point = Point::new(row, col + 1); + + // Find the most specific node at the position (including anonymous nodes like comments) + if let Some(node) = root.named_descendant_for_point_range(start_point, end_point) { + current_context.node_type = node.kind().to_string(); + + // Check if we're inside a comment by walking up the tree + let mut current = Some(node); + while let Some(n) = current { + let kind = n.kind(); + if kind == "comment" + || kind == "line_comment" + || kind == "block_comment" + || kind == "string_comment" + { + current_context.node_type = kind.to_string(); + break; + } + current = n.parent(); } - current = n.parent(); + + let (lhs, rhs) = Self::find_assignment_context(node, source); + current_context.lhs_variable = lhs; + current_context.rhs_value = rhs; } - let (lhs, rhs) = Self::find_assignment_context(node, source); - context.lhs_variable = lhs; - context.rhs_value = rhs; + contexts.insert((*line, *column), current_context); } - context + contexts } /// Maps file extensions to tree-sitter languages. @@ -211,3 +263,34 @@ impl AstContextExtractor { (lhs_variable, rhs_value) } } + +#[cfg(test)] +mod tests { + use super::AstContextExtractor; + use std::path::Path; + + #[test] + fn batch_extraction_matches_single_extraction() { + let source = "const api_key = \"ghp_123456789012345678901234567890123456\";\n// comment"; + let path = Path::new("src/main.js"); + + let single = AstContextExtractor::extract_context(source, 1, 7, path); + let batch = AstContextExtractor::extract_contexts(source, &[(1, 7)], path); + let batch_ctx = batch.get(&(1, 7)).cloned().unwrap_or_default(); + + assert_eq!(single.node_type, batch_ctx.node_type); + assert_eq!(single.lhs_variable, batch_ctx.lhs_variable); + assert_eq!(single.rhs_value, batch_ctx.rhs_value); + assert_eq!(single.is_test_context, batch_ctx.is_test_context); + } + + #[test] + fn batch_extraction_marks_test_context() { + let source = "const token = \"dummy\";"; + let path = Path::new("tests/sample.test.js"); + let contexts = AstContextExtractor::extract_contexts(source, &[(1, 7)], path); + let context = contexts.get(&(1, 7)).cloned().unwrap_or_default(); + + assert!(context.is_test_context); + } +} diff --git a/vulnera-secrets/src/infrastructure/detectors/detector_engine.rs b/vulnera-secrets/src/infrastructure/detectors/detector_engine.rs index f77b0894..464ad2f6 100644 --- a/vulnera-secrets/src/infrastructure/detectors/detector_engine.rs +++ b/vulnera-secrets/src/infrastructure/detectors/detector_engine.rs @@ -1,6 +1,8 @@ //! Detector engine that orchestrates multiple detectors -use crate::domain::entities::{Location, SecretFinding, SecretType, Severity}; +use crate::domain::entities::{ + Location, SecretFinding, SecretType, SecretVerificationState, Severity, +}; use crate::domain::value_objects::{Confidence, SecretRule, ValidationResult}; use crate::infrastructure::detectors::ast_extractor::AstContextExtractor; use crate::infrastructure::detectors::semantic_validator::{HeuristicValidator, SemanticValidator}; @@ -9,6 +11,7 @@ use crate::infrastructure::rules::RuleRepository; use crate::infrastructure::verification::{VerificationResult, VerificationService}; use globset::{Glob, GlobMatcher}; use std::collections::HashMap; +use std::collections::HashSet; use std::path::Path; use std::sync::Arc; use tracing::debug; @@ -132,15 +135,26 @@ impl DetectorEngine { } } + // Build semantic contexts in one parse pass for all candidate positions + let mut positions = HashSet::new(); + for (line_number, regex_match) in &all_regex_matches { + positions.insert((*line_number, regex_match.start_pos as u32 + 1)); + } + for (line_number, entropy_match) in &all_entropy_matches { + positions.insert((*line_number, entropy_match.start_pos as u32 + 1)); + } + let positions_vec: Vec<(u32, u32)> = positions.into_iter().collect(); + let semantic_contexts = + AstContextExtractor::extract_contexts(content, &positions_vec, file_path); + // Pass 2: Process regex findings with verification using collected context for (line_number, regex_match) in all_regex_matches { // Stage 2: AST Analysis - let semantic_context = AstContextExtractor::extract_context( - content, - line_number, - regex_match.start_pos as u32 + 1, - file_path, - ); + let column = regex_match.start_pos as u32 + 1; + let semantic_context = semantic_contexts + .get(&(line_number, column)) + .cloned() + .unwrap_or_default(); let mut confidence = self.calculate_confidence_for_regex_match(®ex_match); let severity = self.determine_severity(®ex_match.rule.secret_type); @@ -149,6 +163,7 @@ impl DetectorEngine { let mut temp_finding = SecretFinding { id: String::new(), rule_id: regex_match.rule_id.clone(), + detector_id: regex_match.rule_id.clone(), secret_type: regex_match.rule.secret_type.clone(), location: Location { file_path: file_path_str.clone(), @@ -159,10 +174,12 @@ impl DetectorEngine { }, severity: severity.clone(), confidence, + verification_state: SecretVerificationState::Unverified, description: regex_match.rule.description.clone(), recommendation: None, matched_secret: regex_match.matched_text.clone(), entropy: None, + evidence: vec![], }; let validation_result = self @@ -182,7 +199,8 @@ impl DetectorEngine { confidence = temp_finding.confidence; // Verify secret if verification is enabled - let mut is_verified = false; + let mut verification_state = SecretVerificationState::Unverified; + let mut evidence = vec![]; if let Some(ref verification_service) = self.verification_service { let verification_result = verification_service .verify_secret( @@ -194,24 +212,35 @@ impl DetectorEngine { match verification_result { VerificationResult::Verified => { - is_verified = true; + verification_state = SecretVerificationState::Verified; confidence = Confidence::High; // Verified secrets get high confidence + evidence.push("verification:provider_verified".to_string()); } VerificationResult::Invalid => { + verification_state = SecretVerificationState::Invalid; // Invalid secrets might be false positives, lower confidence if confidence == Confidence::High { confidence = Confidence::Medium; } + evidence.push("verification:provider_invalid".to_string()); } - _ => { - // Failed or not supported - keep original confidence + VerificationResult::Failed => { + verification_state = SecretVerificationState::Unknown; + evidence.push("verification:provider_indeterminate".to_string()); + } + VerificationResult::NotSupported => { + verification_state = SecretVerificationState::NotSupported; + evidence.push("verification:not_supported".to_string()); } } + } else { + evidence.push("verification:disabled".to_string()); } findings.push(SecretFinding { id: format!("{}-{}-{}", regex_match.rule_id, file_path_str, line_number), rule_id: regex_match.rule_id.clone(), + detector_id: regex_match.rule_id.clone(), secret_type: regex_match.rule.secret_type.clone(), location: Location { file_path: file_path_str.clone(), @@ -222,9 +251,10 @@ impl DetectorEngine { }, severity, confidence, + verification_state, description: { let mut desc = regex_match.rule.description.clone(); - if is_verified { + if matches!(verification_state, SecretVerificationState::Verified) { desc.push_str(" (VERIFIED - Secret is active)"); } desc @@ -233,20 +263,20 @@ impl DetectorEngine { "Remove or rotate the exposed {}", regex_match.rule.name )), - matched_secret: Self::redact_secret(®ex_match.matched_text), + matched_secret: regex_match.matched_text.clone(), entropy: None, + evidence, }); } // Pass 3: Process entropy findings (check overlap with regex findings) for (line_number, entropy_match) in all_entropy_matches { // Stage 2: AST Analysis for entropy - let semantic_context = AstContextExtractor::extract_context( - content, - line_number, - entropy_match.start_pos as u32 + 1, - file_path, - ); + let column = entropy_match.start_pos as u32 + 1; + let semantic_context = semantic_contexts + .get(&(line_number, column)) + .cloned() + .unwrap_or_default(); // Check if this entropy match overlaps with any regex match let overlaps = findings.iter().any(|f| { @@ -272,6 +302,7 @@ impl DetectorEngine { file_path_str, line_number, entropy_match.start_pos ), rule_id: format!("entropy-{:?}", entropy_match.encoding), + detector_id: "entropy".to_string(), secret_type, location: Location { file_path: file_path_str.clone(), @@ -288,6 +319,7 @@ impl DetectorEngine { } else { Confidence::Low }, + verification_state: SecretVerificationState::NotSupported, description: format!( "High-entropy {:?} string detected (entropy: {:.2})", entropy_match.encoding, entropy_match.entropy @@ -295,8 +327,9 @@ impl DetectorEngine { recommendation: Some( "Review this high-entropy string - it may be a secret or token".to_string(), ), - matched_secret: Self::redact_secret(&entropy_match.matched_text), + matched_secret: entropy_match.matched_text.clone(), entropy: Some(entropy_match.entropy), + evidence: vec!["detection:entropy".to_string()], }; // Stage 3: Semantic Validation for entropy @@ -367,6 +400,7 @@ impl DetectorEngine { findings.push(SecretFinding { id: format!("{}-{}-{}", regex_match.rule_id, file_path_str, line_number), rule_id: regex_match.rule_id.clone(), + detector_id: regex_match.rule_id.clone(), secret_type: regex_match.rule.secret_type.clone(), location: Location { file_path: file_path_str.clone(), @@ -377,13 +411,15 @@ impl DetectorEngine { }, severity, confidence, + verification_state: SecretVerificationState::Unverified, description: regex_match.rule.description.clone(), recommendation: Some(format!( "Remove or rotate the exposed {}", regex_match.rule.name )), - matched_secret: Self::redact_secret(®ex_match.matched_text), + matched_secret: regex_match.matched_text.clone(), entropy: None, + evidence: vec!["detection:regex".to_string()], }); } @@ -413,6 +449,7 @@ impl DetectorEngine { file_path_str, line_number, entropy_match.start_pos ), rule_id: format!("entropy-{:?}", entropy_match.encoding), + detector_id: "entropy".to_string(), secret_type, location: Location { file_path: file_path_str.clone(), @@ -429,6 +466,7 @@ impl DetectorEngine { } else { Confidence::Low }, + verification_state: SecretVerificationState::NotSupported, description: format!( "High-entropy {:?} string detected (entropy: {:.2})", entropy_match.encoding, entropy_match.entropy @@ -436,8 +474,9 @@ impl DetectorEngine { recommendation: Some( "Review this high-entropy string - it may be a secret or token".to_string(), ), - matched_secret: Self::redact_secret(&entropy_match.matched_text), + matched_secret: entropy_match.matched_text.clone(), entropy: Some(entropy_match.entropy), + evidence: vec!["detection:entropy".to_string()], }); } } @@ -486,14 +525,6 @@ impl DetectorEngine { } } - /// Redact secret for safe logging/display - fn redact_secret(secret: &str) -> String { - if secret.len() <= 8 { - return "***".to_string(); - } - format!("{}...{}", &secret[..4], &secret[secret.len() - 4..]) - } - /// Check whether a rule applies to the supplied file path. /// Precompiled matchers (from `path_matchers`) are used for efficient matching. /// If a rule defines `path_patterns`, the rule only applies when at least one compiled pattern matches the file path. diff --git a/vulnera-secrets/src/infrastructure/detectors/entropy_detector.rs b/vulnera-secrets/src/infrastructure/detectors/entropy_detector.rs index afed7139..9b7f1f70 100644 --- a/vulnera-secrets/src/infrastructure/detectors/entropy_detector.rs +++ b/vulnera-secrets/src/infrastructure/detectors/entropy_detector.rs @@ -3,6 +3,7 @@ use crate::domain::value_objects::{Entropy, EntropyEncoding}; use once_cell::sync::Lazy; use regex::Regex; +use std::collections::HashMap; use tracing::debug; static CANDIDATE_REGEX: Lazy = Lazy::new(|| { @@ -16,6 +17,13 @@ pub struct EntropyDetector { hex_threshold: f64, } +const GENERIC_THRESHOLD: f64 = 4.3; +const MIN_TOKEN_LENGTH: usize = 20; +const MIN_UNIQUE_BASE64: usize = 10; +const MIN_UNIQUE_HEX: usize = 8; +const MIN_UNIQUE_GENERIC: usize = 12; +const MAX_DOMINANT_CHAR_RATIO: f64 = 0.35; + impl EntropyDetector { pub fn new(base64_threshold: f64, hex_threshold: f64) -> Self { Self { @@ -30,14 +38,27 @@ impl EntropyDetector { for mat in CANDIDATE_REGEX.find_iter(content) { let word = mat.as_str(); + if !Self::is_reasonable_candidate(word) { + continue; + } + + let entropy = Entropy::shannon_entropy(word); + let unique_chars = Self::unique_char_count(word); + let dominant_ratio = Self::dominant_char_ratio(word); // Check Base64-like strings if Entropy::is_base64_like(word) { - let entropy = Entropy::shannon_entropy(word); - if entropy >= self.base64_threshold { + let normalized = entropy / 6.0; // max entropy for base64 alphabet ~= log2(64)=6 + if entropy >= self.base64_threshold + && normalized >= 0.70 + && unique_chars >= MIN_UNIQUE_BASE64 + && dominant_ratio <= MAX_DOMINANT_CHAR_RATIO + { debug!( line = line_number, entropy = entropy, + normalized_entropy = normalized, + unique_chars = unique_chars, "Found high-entropy Base64 string" ); matches.push(EntropyMatch { @@ -54,11 +75,17 @@ impl EntropyDetector { // Check hex-like strings (only if not already added as Base64) if Entropy::is_hex_like(word) { - let entropy = Entropy::shannon_entropy(word); - if entropy >= self.hex_threshold { + let normalized = entropy / 4.0; // max entropy for hex alphabet = log2(16)=4 + if entropy >= self.hex_threshold + && normalized >= 0.72 + && unique_chars >= MIN_UNIQUE_HEX + && dominant_ratio <= MAX_DOMINANT_CHAR_RATIO + { debug!( line = line_number, entropy = entropy, + normalized_entropy = normalized, + unique_chars = unique_chars, "Found high-entropy hex string" ); matches.push(EntropyMatch { @@ -69,6 +96,30 @@ impl EntropyDetector { end_pos: mat.end(), }); } + continue; + } + + // Generic high-entropy token detection for non-base64/non-hex strings + let normalized_generic = entropy / (word.len().min(94) as f64).log2(); + if entropy >= GENERIC_THRESHOLD + && normalized_generic >= 0.70 + && unique_chars >= MIN_UNIQUE_GENERIC + && dominant_ratio <= MAX_DOMINANT_CHAR_RATIO + { + debug!( + line = line_number, + entropy = entropy, + normalized_entropy = normalized_generic, + unique_chars = unique_chars, + "Found high-entropy generic string" + ); + matches.push(EntropyMatch { + encoding: EntropyEncoding::Generic, + entropy, + matched_text: word.to_string(), + start_pos: mat.start(), + end_pos: mat.end(), + }); } } @@ -79,6 +130,43 @@ impl EntropyDetector { pub fn detect_line(&self, line: &str, line_number: u32) -> Vec { self.detect(line, line_number) } + + fn is_reasonable_candidate(word: &str) -> bool { + if word.len() < MIN_TOKEN_LENGTH { + return false; + } + + // Skip obvious separators-only patterns often found in formatting or dummy values + let starts_or_ends_with_separator = word.starts_with('_') + || word.ends_with('_') + || word.starts_with('-') + || word.ends_with('-') + || word.starts_with('='); + + !starts_or_ends_with_separator + } + + fn unique_char_count(word: &str) -> usize { + let mut freq = HashMap::new(); + for ch in word.chars() { + *freq.entry(ch).or_insert(0usize) += 1; + } + freq.len() + } + + fn dominant_char_ratio(word: &str) -> f64 { + if word.is_empty() { + return 1.0; + } + + let mut freq = HashMap::new(); + for ch in word.chars() { + *freq.entry(ch).or_insert(0usize) += 1; + } + + let max_count = freq.values().copied().max().unwrap_or(0); + max_count as f64 / word.len() as f64 + } } /// Result of entropy detection @@ -90,3 +178,23 @@ pub struct EntropyMatch { pub start_pos: usize, pub end_pos: usize, } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn entropy_detector_rejects_repeated_char_sequences() { + let detector = EntropyDetector::new(4.5, 3.0); + let findings = detector.detect_line("token=AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", 1); + assert!(findings.is_empty()); + } + + #[test] + fn entropy_detector_detects_random_base64_like_string() { + let detector = EntropyDetector::new(4.5, 3.0); + let findings = + detector.detect_line("token=Q29tcGxleFJhbmRvbVN0cmluZ1dpdGhIaWdoRW50cm9weQ==", 1); + assert!(!findings.is_empty()); + } +} diff --git a/vulnera-secrets/src/infrastructure/git/scanner.rs b/vulnera-secrets/src/infrastructure/git/scanner.rs index ff9d53a5..2f172dda 100644 --- a/vulnera-secrets/src/infrastructure/git/scanner.rs +++ b/vulnera-secrets/src/infrastructure/git/scanner.rs @@ -4,6 +4,8 @@ use crate::domain::entities::{Location, SecretFinding}; use crate::infrastructure::detectors::DetectorEngine; use chrono::{DateTime, Utc}; use git2::{Commit, DiffDelta, DiffHunk, DiffLine, Repository}; +use std::cell::RefCell; +use std::collections::HashMap; use std::path::{Path, PathBuf}; use tracing::{debug, error, info, instrument}; @@ -17,6 +19,21 @@ pub struct CommitMetadata { pub message: String, } +#[derive(Debug, Clone)] +struct GitHunkChunk { + file_path: PathBuf, + content: String, + added_line_map: HashMap, +} + +#[derive(Debug, Default)] +struct HunkBuilder { + file_path: PathBuf, + content: String, + snippet_line: u32, + added_line_map: HashMap, +} + /// Git history scanner pub struct GitScanner { detector_engine: DetectorEngine, @@ -131,18 +148,28 @@ impl GitScanner { ); } - // Process collected lines asynchronously - for (file_path, content) in lines_to_scan { - let line_findings = self + // Process collected hunks asynchronously + for chunk in lines_to_scan { + let hunk_findings = self .detector_engine - .detect_in_file_async(&file_path, &content) + .detect_in_file_async(&chunk.file_path, &chunk.content) .await; // Convert to git history findings with commit metadata - for finding in line_findings { + for mut finding in hunk_findings { + // Keep only findings that originate from added lines in this hunk + let snippet_line = finding.location.line; + let Some(actual_line) = chunk.added_line_map.get(&snippet_line).copied() else { + continue; + }; + + finding.location.line = actual_line; + finding.location.end_line = Some(actual_line); + all_findings.push(SecretFinding { id: format!("{}-{}", metadata.hash, finding.id), rule_id: finding.rule_id, + detector_id: finding.detector_id, secret_type: finding.secret_type, location: Location { file_path: format!("{}:{}", metadata.hash, finding.location.file_path), @@ -153,6 +180,7 @@ impl GitScanner { }, severity: finding.severity, confidence: finding.confidence, + verification_state: finding.verification_state, description: format!( "{} (Found in commit {} by {} on {})", finding.description, @@ -163,6 +191,7 @@ impl GitScanner { recommendation: finding.recommendation, matched_secret: finding.matched_secret, entropy: finding.entropy, + evidence: finding.evidence, }); } } @@ -182,8 +211,8 @@ impl GitScanner { &self, repo: &Repository, commit: &Commit<'_>, - ) -> Result, GitScanError> { - let mut lines_to_scan: Vec<(PathBuf, String)> = Vec::new(); + ) -> Result, GitScanError> { + let hunk_builders: RefCell> = RefCell::new(HashMap::new()); // Get parent commit for diff let parent = if commit.parent_count() > 0 { @@ -203,28 +232,97 @@ impl GitScanner { .map_err(GitScanError::GitError)?; let mut file_cb = |_delta: DiffDelta<'_>, _progress: f32| true; - let mut line_cb = - |delta: DiffDelta<'_>, _hunk: Option>, line: DiffLine<'_>| { - // Only scan added lines (new secrets) - if line.origin() == '+' { - if let Ok(content) = std::str::from_utf8(line.content()) { - let file_path = delta - .new_file() - .path() - .or_else(|| delta.old_file().path()) - .map(|p| p.to_path_buf()) - .unwrap_or_else(|| PathBuf::from("unknown")); - - lines_to_scan.push((file_path, content.to_string())); + let mut hunk_cb = |delta: DiffDelta<'_>, hunk: DiffHunk<'_>| { + let file_path = delta + .new_file() + .path() + .or_else(|| delta.old_file().path()) + .map(|p| p.to_path_buf()) + .unwrap_or_else(|| PathBuf::from("unknown")); + + let key = format!( + "{}:{}:{}", + file_path.display(), + hunk.new_start(), + hunk.new_lines() + ); + + hunk_builders + .borrow_mut() + .entry(key) + .or_insert_with(|| HunkBuilder { + file_path, + content: String::new(), + snippet_line: 1, + added_line_map: HashMap::new(), + }); + + true + }; + + let mut line_cb = |delta: DiffDelta<'_>, hunk: Option>, line: DiffLine<'_>| { + let Some(hunk) = hunk else { + return true; + }; + + let file_path = delta + .new_file() + .path() + .or_else(|| delta.old_file().path()) + .map(|p| p.to_path_buf()) + .unwrap_or_else(|| PathBuf::from("unknown")); + + let key = format!( + "{}:{}:{}", + file_path.display(), + hunk.new_start(), + hunk.new_lines() + ); + + let mut builders = hunk_builders.borrow_mut(); + let Some(builder) = builders.get_mut(&key) else { + return true; + }; + + // Build a scanable hunk content stream using context and added lines. + // Removed lines are skipped because they do not exist in the new file version. + if line.origin() == '+' || line.origin() == ' ' { + if let Ok(content) = std::str::from_utf8(line.content()) { + builder.content.push_str(content); + if !content.ends_with('\n') { + builder.content.push('\n'); + } + + if line.origin() == '+' { + if let Some(actual_line) = line.new_lineno() { + builder + .added_line_map + .insert(builder.snippet_line, actual_line); + } } + + builder.snippet_line = builder.snippet_line.saturating_add(1); } - true - }; + } - diff.foreach(&mut file_cb, None, None, Some(&mut line_cb)) + true + }; + + diff.foreach(&mut file_cb, None, Some(&mut hunk_cb), Some(&mut line_cb)) .map_err(GitScanError::GitError)?; - Ok(lines_to_scan) + let chunks = hunk_builders + .into_inner() + .into_values() + .filter(|builder| !builder.added_line_map.is_empty() && !builder.content.is_empty()) + .map(|builder| GitHunkChunk { + file_path: builder.file_path, + content: builder.content, + added_line_map: builder.added_line_map, + }) + .collect(); + + Ok(chunks) } } diff --git a/vulnera-secrets/src/infrastructure/verification/verifier.rs b/vulnera-secrets/src/infrastructure/verification/verifier.rs index 703a1152..dba9b7d8 100644 --- a/vulnera-secrets/src/infrastructure/verification/verifier.rs +++ b/vulnera-secrets/src/infrastructure/verification/verifier.rs @@ -3,7 +3,9 @@ use crate::domain::entities::SecretType; use async_trait::async_trait; use std::collections::HashMap; +use std::sync::Arc; use std::time::Duration; +use tokio::sync::RwLock; // Import verifiers use super::aws_verifier::AwsVerifier; @@ -48,21 +50,71 @@ pub trait SecretVerifier: Send + Sync { ) -> VerificationResult; } +/// Registry for verifier providers +#[derive(Default)] +pub struct VerifierRegistry { + verifiers: Vec>, +} + +impl VerifierRegistry { + pub fn new() -> Self { + Self::default() + } + + pub fn register(&mut self, verifier: Arc) { + self.verifiers.push(verifier); + } + + pub fn resolve(&self, secret_type: &SecretType) -> Option> { + self.verifiers + .iter() + .find(|verifier| verifier.supports(secret_type)) + .cloned() + } +} + /// Verification service that routes to appropriate verifiers pub struct VerificationService { - verifiers: Vec>, + registry: VerifierRegistry, timeout: Duration, + cache: RwLock>, } impl VerificationService { + const MAX_CACHE_ENTRIES: usize = 10_000; + pub fn new(timeout: Duration) -> Self { - let verifiers: Vec> = vec![ - Box::new(AwsVerifier {}), - Box::new(GitHubVerifier::new()), - Box::new(GitLabVerifier::new()), - ]; + let mut registry = VerifierRegistry::new(); + registry.register(Arc::new(AwsVerifier {})); + registry.register(Arc::new(GitHubVerifier::new())); + registry.register(Arc::new(GitLabVerifier::new())); - Self { verifiers, timeout } + Self::with_registry(timeout, registry) + } + + pub fn with_registry(timeout: Duration, registry: VerifierRegistry) -> Self { + Self { + registry, + timeout, + cache: RwLock::new(HashMap::new()), + } + } + + pub fn with_verifiers(timeout: Duration, verifiers: Vec>) -> Self { + let mut registry = VerifierRegistry::new(); + for verifier in verifiers { + registry.register(verifier); + } + Self::with_registry(timeout, registry) + } + + fn cache_key(secret: &str, secret_type: &SecretType) -> String { + use sha2::{Digest, Sha256}; + + let mut hasher = Sha256::new(); + hasher.update(secret.as_bytes()); + let secret_hash = format!("{:x}", hasher.finalize()); + format!("{:?}:{}", secret_type, secret_hash) } /// Verify a secret using the appropriate verifier @@ -72,14 +124,28 @@ impl VerificationService { secret_type: &SecretType, context: Option<&HashMap>, ) -> VerificationResult { - for verifier in &self.verifiers { - if verifier.supports(secret_type) { - return verifier - .verify(secret, secret_type, context, self.timeout) - .await; + let cache_key = Self::cache_key(secret, secret_type); + + if let Some(cached) = self.cache.read().await.get(&cache_key).cloned() { + return cached; + } + + let result = if let Some(verifier) = self.registry.resolve(secret_type) { + verifier + .verify(secret, secret_type, context, self.timeout) + .await + } else { + VerificationResult::NotSupported + }; + + { + let mut cache = self.cache.write().await; + if cache.len() >= Self::MAX_CACHE_ENTRIES { + cache.clear(); } + cache.insert(cache_key, result.clone()); } - VerificationResult::NotSupported + result } } diff --git a/vulnera-secrets/src/module.rs b/vulnera-secrets/src/module.rs index 1a64aca8..4ac2d2d7 100644 --- a/vulnera-secrets/src/module.rs +++ b/vulnera-secrets/src/module.rs @@ -8,6 +8,7 @@ use vulnera_core::config::SecretDetectionConfig; use vulnera_core::domain::module::{ AnalysisModule, Finding, FindingConfidence, FindingSeverity, FindingType, Location, ModuleConfig, ModuleExecutionError, ModuleResult, ModuleResultMetadata, ModuleType, + SecretFindingMetadata, SecretVerificationState, }; use crate::application::use_cases::ScanForSecretsUseCase; @@ -84,10 +85,46 @@ impl AnalysisModule for SecretDetectionModule { }, description: f.description, recommendation: f.recommendation, + secret_metadata: Some(SecretFindingMetadata { + detector_id: f.detector_id, + verification_state: match f.verification_state { + crate::domain::entities::SecretVerificationState::Verified => { + SecretVerificationState::Verified + } + crate::domain::entities::SecretVerificationState::Invalid => { + SecretVerificationState::Invalid + } + crate::domain::entities::SecretVerificationState::Unknown => { + SecretVerificationState::Unknown + } + crate::domain::entities::SecretVerificationState::Unverified => { + SecretVerificationState::Unverified + } + crate::domain::entities::SecretVerificationState::NotSupported => { + SecretVerificationState::NotSupported + } + }, + redacted_secret: redact_secret(&f.matched_secret), + entropy: f.entropy, + evidence: f.evidence, + }), enrichment: None, }) .collect(); + let mut additional_info = std::collections::HashMap::new(); + additional_info.insert( + "baseline_suppressed".to_string(), + scan_result.baseline_suppressed.to_string(), + ); + additional_info.insert( + "allowlist_suppressed".to_string(), + scan_result.allowlist_suppressed.to_string(), + ); + for (reason, count) in scan_result.suppression_breakdown { + additional_info.insert(format!("suppressed:{}", reason), count.to_string()); + } + let duration = start_time.elapsed(); Ok(ModuleResult { @@ -97,7 +134,7 @@ impl AnalysisModule for SecretDetectionModule { metadata: ModuleResultMetadata { files_scanned: scan_result.files_scanned, duration_ms: duration.as_millis() as u64, - additional_info: std::collections::HashMap::new(), + additional_info, }, error: None, }) @@ -109,3 +146,10 @@ impl Default for SecretDetectionModule { Self::new() } } + +fn redact_secret(secret: &str) -> String { + if secret.len() <= 8 { + return "***".to_string(); + } + format!("{}...{}", &secret[..4], &secret[secret.len() - 4..]) +} diff --git a/vulnera-secrets/tests/test_secret_detection.rs b/vulnera-secrets/tests/test_secret_detection.rs index 4d530d2a..f93053b4 100644 --- a/vulnera-secrets/tests/test_secret_detection.rs +++ b/vulnera-secrets/tests/test_secret_detection.rs @@ -287,3 +287,71 @@ async fn test_git_scanner() { "Should find AWS Access Key in git repo" ); } + +#[tokio::test] +async fn test_allowlist_suppression_and_metadata() { + let temp_dir = tempfile::tempdir().unwrap(); + + create_test_file( + &temp_dir, + "secrets.env", + r#"github_token=ghp_123456789012345678901234567890123456 +api_key=ALLOWED_PLACEHOLDER_TOKEN_VALUE_123456 +"#, + ) + .await; + + let mut config = SecretDetectionConfig::default(); + config.enable_entropy_detection = false; + config.global_allowlist_patterns = vec![r"ALLOWED_PLACEHOLDER_TOKEN_VALUE_[0-9]+".to_string()]; + config.rule_allowlist_patterns = std::collections::HashMap::from([( + "github-token".to_string(), + vec![r"ghp_123456789012345678901234567890123456".to_string()], + )]); + + let module = SecretDetectionModule::with_config(&config); + + let module_config = ModuleConfig { + job_id: Uuid::new_v4(), + project_id: "test-project".to_string(), + source_uri: temp_dir.path().to_string_lossy().to_string(), + config: std::collections::HashMap::new(), + }; + + let result = module + .execute(&module_config) + .await + .expect("Module execution failed"); + + assert!( + result.findings.is_empty(), + "All findings should be suppressed by allowlists" + ); + + assert_eq!( + result + .metadata + .additional_info + .get("allowlist_suppressed") + .cloned(), + Some("2".to_string()) + ); + + assert_eq!( + result + .metadata + .additional_info + .get("suppressed:allowlist:rule:github-token") + .cloned(), + Some("1".to_string()) + ); + + assert_eq!( + result + .metadata + .additional_info + .get("suppressed:allowlist:global") + .cloned(), + Some("1".to_string()) + ); +} diff --git a/vulnera-secrets/tests/test_verifiers.rs b/vulnera-secrets/tests/test_verifiers.rs index 96046515..9ea4a701 100644 --- a/vulnera-secrets/tests/test_verifiers.rs +++ b/vulnera-secrets/tests/test_verifiers.rs @@ -1,7 +1,10 @@ +use async_trait::async_trait; use std::time::Duration; +use std::{collections::HashMap, sync::Arc}; +use tokio::sync::Mutex; use vulnera_secrets::domain::entities::SecretType; use vulnera_secrets::infrastructure::verification::{ - GitHubVerifier, GitLabVerifier, SecretVerifier, VerificationResult, + GitHubVerifier, GitLabVerifier, SecretVerifier, VerificationResult, VerificationService, }; use wiremock::matchers::{header, method, path}; use wiremock::{Mock, MockServer, ResponseTemplate}; @@ -76,3 +79,47 @@ async fn test_gitlab_verifier_success() { assert_eq!(result, VerificationResult::Verified); } + +struct CountingVerifier { + calls: Arc>, +} + +#[async_trait] +impl SecretVerifier for CountingVerifier { + fn supports(&self, secret_type: &SecretType) -> bool { + matches!(secret_type, SecretType::ApiKey) + } + + async fn verify( + &self, + _secret: &str, + _secret_type: &SecretType, + _context: Option<&HashMap>, + _timeout: Duration, + ) -> VerificationResult { + let mut calls = self.calls.lock().await; + *calls += 1; + VerificationResult::Verified + } +} + +#[tokio::test] +async fn test_verification_service_caches_results() { + let calls = Arc::new(Mutex::new(0usize)); + let verifier = Arc::new(CountingVerifier { + calls: calls.clone(), + }); + + let service = VerificationService::with_verifiers(Duration::from_secs(1), vec![verifier]); + + let first = service + .verify_secret("same-secret", &SecretType::ApiKey, None) + .await; + let second = service + .verify_secret("same-secret", &SecretType::ApiKey, None) + .await; + + assert_eq!(first, VerificationResult::Verified); + assert_eq!(second, VerificationResult::Verified); + assert_eq!(*calls.lock().await, 1); +} From 7752e9d799a90656aba3e084d038af4282cb7cc9 Mon Sep 17 00:00:00 2001 From: k5602 <188656344+k5602@users.noreply.github.com> Date: Fri, 13 Feb 2026 03:11:36 +0200 Subject: [PATCH 2/9] Raise MSRV to 1.91 and add SAST config gates and metadata - Update MSRV references to 1.91+ in docs, badges, clippy, Dockerfile, and Cargo.toml files - Add SAST config options: js_ts_frontend, min_finding_severity, min_finding_confidence, require_data_flow_evidence_for_dataflow, require_recommendation - Introduce VulnerabilityFindingMetadata and data flow trace types to Finding entity - Add parser frontend selection logic and OXC integration for JS/TS analysis - Register modules in composition root for explicit dependency injection - Add new SAST rules for Lodash prototype pollution (JS) and tarfile path traversal (Python) - Provide CVE test fixtures for new rules - Include SAST scale benchmark example for performance profiling These changes enable progressive rollout of OXC for JS/TS, fine-grained policy gating for findings, and richer vulnerability metadata for downstream consumers. --- .github/copilot-instructions.md | 2 +- CONTRIBUTING.md | 2 +- Cargo.lock | 340 +++++++++++++ Cargo.toml | 5 + Dockerfile | 22 +- README.md | 6 +- clippy.toml | 2 +- config/default.toml | 5 + docs/src/reference/architecture.md | 2 +- rustfmt.toml | 2 +- src/app.rs | 1 + vulnera-api/Cargo.toml | 1 + vulnera-api/src/module.rs | 2 + vulnera-core/Cargo.toml | 1 + vulnera-core/src/config/mod.rs | 15 + vulnera-core/src/domain/module/entities.rs | 35 ++ vulnera-deps/Cargo.toml | 1 + vulnera-deps/src/module.rs | 2 + vulnera-llm/Cargo.toml | 1 + vulnera-orchestrator/Cargo.toml | 1 + .../src/application/use_cases.rs | 29 +- vulnera-sandbox/Cargo.toml | 1 + vulnera-sast/Cargo.toml | 6 +- vulnera-sast/README.md | 10 + vulnera-sast/examples/scale_benchmark.rs | 189 +++++++ vulnera-sast/rules/javascript.toml | 30 ++ vulnera-sast/rules/python.toml | 22 + vulnera-sast/src/application/use_cases.rs | 460 ++++++++++++------ vulnera-sast/src/infrastructure/call_graph.rs | 113 +++-- vulnera-sast/src/infrastructure/mod.rs | 4 + .../src/infrastructure/oxc_frontend.rs | 57 +++ .../src/infrastructure/parser_frontend.rs | 46 ++ vulnera-sast/src/module.rs | 274 ++++++++++- ...2019-10744-lodash-prototype-pollution.yaml | 58 +++ .../cve-2007-4559-tarfile-path-traversal.yaml | 53 ++ vulnera-secrets/Cargo.toml | 1 + vulnera-secrets/src/module.rs | 3 +- 37 files changed, 1586 insertions(+), 218 deletions(-) create mode 100644 vulnera-sast/examples/scale_benchmark.rs create mode 100644 vulnera-sast/src/infrastructure/oxc_frontend.rs create mode 100644 vulnera-sast/src/infrastructure/parser_frontend.rs create mode 100644 vulnera-sast/tests/data/cve-fixtures/javascript/cve-2019-10744-lodash-prototype-pollution.yaml create mode 100644 vulnera-sast/tests/data/cve-fixtures/python/cve-2007-4559-tarfile-path-traversal.yaml diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 715709a3..693b5b8b 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -1,6 +1,6 @@ # Vulnera Copilot Instructions -Vulnera is a multi-module async Rust vulnerability analysis platform (MSRV 1.82+). AI agents should focus on domain-driven design (DDD) patterns, the sandbox-isolated execution model, and the modular analysis pipeline orchestrated by the composition root. +Vulnera is a multi-module async Rust vulnerability analysis platform (MSRV 1.91+). AI agents should focus on domain-driven design (DDD) patterns, the sandbox-isolated execution model, and the modular analysis pipeline orchestrated by the composition root. ## Architecture: The Big Picture diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index af4794d1..d62e099e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -23,7 +23,7 @@ If you'd like to financially support the project, you can do so via [GitHub Spon ## Getting Started 1. Fork the repo and create a feature branch from `main`. -2. Install Rust stable (MSRV 1.82+) and PostgreSQL 12+. +2. Install Rust stable (MSRV 1.91+) and PostgreSQL 12+. 3. Set up the development database: ```bash diff --git a/Cargo.lock b/Cargo.lock index c5770ec3..e7a8d92c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1090,6 +1090,15 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" +[[package]] +name = "castaway" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dec551ab6e7578819132c713a93c022a05d60159dc86e7a7050223577484c55a" +dependencies = [ + "rustversion", +] + [[package]] name = "cc" version = "1.2.55" @@ -1269,6 +1278,20 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "compact_str" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb1325a1cece981e8a296ab8f0f9b63ae357bd0784a9faaf548cc7b480707a" +dependencies = [ + "castaway", + "cfg-if", + "itoa", + "rustversion", + "ryu", + "static_assertions", +] + [[package]] name = "concurrent-queue" version = "2.5.0" @@ -1409,6 +1432,12 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "cow-utils" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "417bef24afe1460300965a25ff4a24b8b45ad011948302ec221e8a0a81eb2c79" + [[package]] name = "cpe" version = "0.1.5" @@ -2146,6 +2175,12 @@ version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1435fa1053d8b2fbbe9be7e97eca7f33d37b28409959813daefc1446a14247f1" +[[package]] +name = "dragonbox_ecma" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd8e701084c37e7ef62d3f9e453b618130cbc0ef3573847785952a3ac3f746bf" + [[package]] name = "dunce" version = "1.0.5" @@ -3992,6 +4027,12 @@ dependencies = [ "libc", ] +[[package]] +name = "nonmax" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "610a5acd306ec67f907abe5567859a3c693fb9886eb1f012ab8f2a47bef3db51" + [[package]] name = "nonzero_ext" version = "0.3.0" @@ -4281,6 +4322,212 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e" +[[package]] +name = "owo-colors" +version = "4.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c6901729fa79e91a0913333229e9ca5dc725089d1c363b2f4b4760709dc4a52" + +[[package]] +name = "oxc-miette" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60a7ba54c704edefead1f44e9ef09c43e5cfae666bdc33516b066011f0e6ebf7" +dependencies = [ + "cfg-if", + "owo-colors", + "oxc-miette-derive", + "textwrap", + "thiserror 2.0.18", + "unicode-segmentation", + "unicode-width", +] + +[[package]] +name = "oxc-miette-derive" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4faecb54d0971f948fbc1918df69b26007e6f279a204793669542e1e8b75eb3" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.115", +] + +[[package]] +name = "oxc_allocator" +version = "0.113.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2ff9e6bf079784415f6e9302504653528dfd4a5fc827fa933d18f6dff6dc472" +dependencies = [ + "allocator-api2", + "hashbrown 0.16.1", + "oxc_data_structures", + "rustc-hash", +] + +[[package]] +name = "oxc_ast" +version = "0.113.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8472428ea69d68518173d78e71cb1651a6219d586c096c828e7cae4b2aa8ab8a" +dependencies = [ + "bitflags", + "oxc_allocator", + "oxc_ast_macros", + "oxc_data_structures", + "oxc_diagnostics", + "oxc_estree", + "oxc_regular_expression", + "oxc_span", + "oxc_syntax", +] + +[[package]] +name = "oxc_ast_macros" +version = "0.113.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14bf7dd890145006a4f8e37f9ade3021914fa725141d0afbf8ce0ccc20f2d3f6" +dependencies = [ + "phf", + "proc-macro2", + "quote", + "syn 2.0.115", +] + +[[package]] +name = "oxc_data_structures" +version = "0.113.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d956c1b25ef0b8b832f1990bddb4b26b0ebc38661f880f08bf25571096b228c4" + +[[package]] +name = "oxc_diagnostics" +version = "0.113.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "470549485dce75015bb692fb58de92ce4763e206a01591f9081b97868695f52e" +dependencies = [ + "cow-utils", + "oxc-miette", + "percent-encoding", +] + +[[package]] +name = "oxc_ecmascript" +version = "0.113.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1db54a556908dd5ef0d9acf67b822a32da632514eda35e9dcac776408b63d53e" +dependencies = [ + "cow-utils", + "num-bigint", + "num-traits", + "oxc_allocator", + "oxc_ast", + "oxc_regular_expression", + "oxc_span", + "oxc_syntax", +] + +[[package]] +name = "oxc_estree" +version = "0.113.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "358fc8f625ac137612daace04daf34ba9eaf1818ccebb094253252db1507ee33" + +[[package]] +name = "oxc_index" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb3e6120999627ec9703025eab7c9f410ebb7e95557632a8902ca48210416c2b" +dependencies = [ + "nonmax", + "serde", +] + +[[package]] +name = "oxc_parser" +version = "0.113.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e11640aa19dc72e52d90e85f179315c32316ffd08e4274399dc6b44d5e486967" +dependencies = [ + "bitflags", + "cow-utils", + "memchr", + "num-bigint", + "num-traits", + "oxc_allocator", + "oxc_ast", + "oxc_data_structures", + "oxc_diagnostics", + "oxc_ecmascript", + "oxc_regular_expression", + "oxc_span", + "oxc_syntax", + "rustc-hash", + "seq-macro", +] + +[[package]] +name = "oxc_regular_expression" +version = "0.113.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "033b2b447361776a5542cf227bc5d0e26ac69179f864d699d9682913655f905e" +dependencies = [ + "bitflags", + "oxc_allocator", + "oxc_ast_macros", + "oxc_diagnostics", + "oxc_span", + "phf", + "rustc-hash", + "unicode-id-start", +] + +[[package]] +name = "oxc_span" +version = "0.113.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63cd06081823f05136c47f2205f4bf9fb03b0110222e0a7d5f76fedeb2edf5fc" +dependencies = [ + "compact_str", + "oxc-miette", + "oxc_allocator", + "oxc_ast_macros", + "oxc_estree", + "oxc_str", +] + +[[package]] +name = "oxc_str" +version = "0.113.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c3f9d4d363078bb88b53d78156826086daf97bfb0ffaf1e9265e3d5b8177489" +dependencies = [ + "compact_str", + "hashbrown 0.16.1", + "oxc_allocator", + "oxc_estree", +] + +[[package]] +name = "oxc_syntax" +version = "0.113.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "035992c58c272cbb0ca8a4782df013dd3a43813888b481c54d7339f95688a698" +dependencies = [ + "bitflags", + "cow-utils", + "dragonbox_ecma", + "nonmax", + "oxc_allocator", + "oxc_ast_macros", + "oxc_estree", + "oxc_index", + "oxc_span", + "phf", + "unicode-id-start", +] + [[package]] name = "p256" version = "0.11.1" @@ -4507,6 +4754,49 @@ dependencies = [ "serde", ] +[[package]] +name = "phf" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf" +dependencies = [ + "phf_macros", + "phf_shared", + "serde", +] + +[[package]] +name = "phf_generator" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "135ace3a761e564ec88c03a77317a7c6b80bb7f7135ef2544dbe054243b89737" +dependencies = [ + "fastrand", + "phf_shared", +] + +[[package]] +name = "phf_macros" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "812f032b54b1e759ccd5f8b6677695d5268c588701effba24601f6932f8269ef" +dependencies = [ + "phf_generator", + "phf_shared", + "proc-macro2", + "quote", + "syn 2.0.115", +] + +[[package]] +name = "phf_shared" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e57fef6bc5981e38c2ce2d63bfa546861309f875b8a75f092d1d54ae2d64f266" +dependencies = [ + "siphasher", +] + [[package]] name = "pin-project" version = "1.1.10" @@ -5813,6 +6103,12 @@ dependencies = [ "serde_core", ] +[[package]] +name = "seq-macro" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc" + [[package]] name = "serde" version = "1.0.228" @@ -6100,6 +6396,12 @@ dependencies = [ "time", ] +[[package]] +name = "siphasher" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" + [[package]] name = "sized-chunks" version = "0.6.5" @@ -6125,6 +6427,12 @@ dependencies = [ "serde", ] +[[package]] +name = "smawk" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c388c1b5e93756d0c740965c41e8822f866621d41acbdf6336a6a168f8840c" + [[package]] name = "snafu" version = "0.8.9" @@ -6414,6 +6722,12 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "streaming-iterator" version = "0.1.9" @@ -6605,6 +6919,17 @@ dependencies = [ "url", ] +[[package]] +name = "textwrap" +version = "0.16.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c13547615a44dc9c452a8a534638acdf07120d4b6847c8178705da06306a3057" +dependencies = [ + "smawk", + "unicode-linebreak", + "unicode-width", +] + [[package]] name = "thiserror" version = "1.0.69" @@ -7217,12 +7542,24 @@ version = "0.3.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5" +[[package]] +name = "unicode-id-start" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81b79ad29b5e19de4260020f8919b443b2ef0277d242ce532ec7b7a2cc8b6007" + [[package]] name = "unicode-ident" version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "537dd038a89878be9b64dd4bd1b260315c1bb94f4d784956b81e27a088d9a09e" +[[package]] +name = "unicode-linebreak" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b09c83c3c29d37506a3e260c08c03743a6bb66a9cd432c6934ab501a190571f" + [[package]] name = "unicode-normalization" version = "0.1.25" @@ -7717,6 +8054,9 @@ dependencies = [ "insta", "moka", "once_cell", + "oxc_allocator", + "oxc_parser", + "oxc_span", "proc-macro2", "proptest", "quote", diff --git a/Cargo.toml b/Cargo.toml index 12735ce0..8b873fef 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,6 +17,7 @@ resolver = "2" name = "vulnera-rust" version.workspace = true edition.workspace = true +rust-version.workspace = true authors.workspace = true license.workspace = true repository.workspace = true @@ -101,6 +102,7 @@ tempfile.workspace = true [workspace.package] version = "0.5.1" edition = "2024" +rust-version = "1.91" authors = ["Vulnera-Team"] license = "BUSL-1.1" license-file = "LICENSE" @@ -209,6 +211,9 @@ tree-sitter-c = "0.24.1" tree-sitter-cpp = "0.23.4" tree-sitter-grep = "0.1.0" globset = "0.4" +oxc_allocator = "0.113.0" +oxc_parser = "0.113.0" +oxc_span = "0.113.0" # Serialization (binary) bincode = "1.3" diff --git a/Dockerfile b/Dockerfile index fb5b0825..b34256e2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,7 @@ # Multi-stage build for Vulnera Rust FROM rust:slim as builder -# Install system dependencies including PostgreSQL +# Install system dependencies RUN apt-get update && apt-get install -y \ pkg-config \ libssl-dev \ @@ -29,7 +29,7 @@ COPY vulnera-secrets/Cargo.toml ./vulnera-secrets/ COPY vulnera-api/Cargo.toml ./vulnera-api/ COPY vulnera-llm/Cargo.toml ./vulnera-llm/ -# Create dummy source files to build dependencies only (layer caching optimization) +# Create dummy source files to build dependencies only RUN mkdir -p src vulnera-core/src vulnera-deps/src vulnera-orchestrator/src \ vulnera-sast/src vulnera-secrets/src vulnera-api/src vulnera-llm/src && \ echo "fn main() {}" > src/main.rs && \ @@ -110,8 +110,6 @@ RUN --mount=type=cache,target=/usr/local/cargo/registry \ cp /usr/local/bin/sqlx /app/bin/sqlx" # Runtime stage -# Use sid (unstable) to match GLIBC version from rust:slim builder -# This ensures sqlx-cli binary compiled in builder stage is compatible FROM debian:sid-slim # Install runtime dependencies @@ -123,18 +121,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ python3 \ && rm -rf /var/lib/apt/lists/* -# Install semgrep via pipx with proper permissions for all users -ENV PIPX_HOME=/opt/pipx -ENV PIPX_BIN_DIR=/usr/local/bin -RUN pipx install semgrep \ - && chmod -R a+rX /opt/pipx \ - && semgrep --version - -# Verify semgrep is executable by non-root users -RUN which semgrep && ls -la /usr/local/bin/semgrep && ls -la /opt/pipx/venvs/semgrep/bin/semgrep - -# Create app user with home directory (needed for semgrep config) -RUN useradd -r -s /bin/false -m -d /app/home vulnera # Create app directory WORKDIR /app @@ -188,7 +174,7 @@ ENV VULNERA__LLM__ENRICHMENT__INCLUDE_CODE_CONTEXT="true" # --- Sandbox Configuration --- # Secure isolation for SAST/secrets modules ENV VULNERA__SANDBOX__ENABLED="true" -ENV VULNERA__SANDBOX__BACKEND="process" +ENV VULNERA__SANDBOX__BACKEND="landlock" ENV VULNERA__SANDBOX__EXECUTION_TIMEOUT_SECS="30" ENV VULNERA__SANDBOX__MEMORY_LIMIT_MB="256" @@ -205,6 +191,6 @@ EXPOSE 3000 HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \ CMD curl -f http://localhost:3000/health || exit 1 -# Use entrypoint script (runs migrations by default, set RUN_MIGRATIONS=false to disable) +# Use entrypoint script ENTRYPOINT ["docker-entrypoint.sh"] CMD ["vulnera-rust"] diff --git a/README.md b/README.md index 89e40f94..6c99df74 100644 --- a/README.md +++ b/README.md @@ -5,8 +5,8 @@ **High-Performance Vulnerability Analysis Platform** [![License: BUSL 1.1](https://img.shields.io/badge/License-BUSL_1.1-blue.svg)](LICENSE) -[![MSRV](https://img.shields.io/badge/MSRV-1.82%2B-orange.svg)](https://www.rust-lang.org/tools/install) -[![Rust](https://img.shields.io/badge/Rust-1.82+-orange.svg?logo=rust)](https://www.rust-lang.org/) +[![MSRV](https://img.shields.io/badge/MSRV-1.91%2B-orange.svg)](https://www.rust-lang.org/tools/install) +[![Rust](https://img.shields.io/badge/Rust-1.91+-orange.svg?logo=rust)](https://www.rust-lang.org/) [![Docker](https://img.shields.io/badge/Docker-Ready-blue.svg?logo=docker)](https://hub.docker.com/) [![OpenAPI](https://img.shields.io/badge/OpenAPI-3.0-green.svg?logo=openapi-initiative)](https://swagger.io/) @@ -106,7 +106,7 @@ flowchart TB ### Prerequisites -- Rust 1.82+ • PostgreSQL 12+ • SQLx CLI +- Rust 1.91+ • PostgreSQL 12+ • SQLx CLI ### Install & Run diff --git a/clippy.toml b/clippy.toml index f06f8786..44815dc2 100644 --- a/clippy.toml +++ b/clippy.toml @@ -1,5 +1,5 @@ # Clippy configuration -msrv = "1.85.0" +msrv = "1.91.0" cognitive-complexity-threshold = 30 too-many-arguments-threshold = 8 type-complexity-threshold = 250 diff --git a/config/default.toml b/config/default.toml index 0bc24a3f..9137c063 100644 --- a/config/default.toml +++ b/config/default.toml @@ -184,6 +184,11 @@ dynamic_depth_enabled = true # Auto-detect depth based on repo size (opt-out dynamic_depth_file_count_threshold = 500 # Downgrade depth above this file count dynamic_depth_total_bytes_threshold = 52428800 # Downgrade depth above 50 MB tree_cache_max_entries = 1024 # Max parsed trees cached per worker +js_ts_frontend = "oxc_preferred" # tree_sitter | oxc_preferred +min_finding_severity = "info" # critical | high | medium | low | info +min_finding_confidence = "low" # high | medium | low +require_data_flow_evidence_for_dataflow = false +require_recommendation = false # Cache TTL for popular package vulnerability listings (in hours) diff --git a/docs/src/reference/architecture.md b/docs/src/reference/architecture.md index 75d51acf..ae1cf007 100644 --- a/docs/src/reference/architecture.md +++ b/docs/src/reference/architecture.md @@ -292,7 +292,7 @@ All services instantiated and wired: ### Docker ```dockerfile -FROM rust:1.82 AS builder +FROM rust:1.91 AS builder # Build Vulnera binary... FROM debian:bookworm diff --git a/rustfmt.toml b/rustfmt.toml index 3aa60178..8066fa28 100644 --- a/rustfmt.toml +++ b/rustfmt.toml @@ -1,4 +1,4 @@ -# Rust formatting configuration (stable-only options) +# Rust formatting configuration # Use the project's Rust edition edition = "2024" diff --git a/src/app.rs b/src/app.rs index e98f04d9..cecf56d3 100644 --- a/src/app.rs +++ b/src/app.rs @@ -120,6 +120,7 @@ pub async fn create_app( let create_job_use_case = Arc::new(CreateAnalysisJobUseCase::new( project_detector, module_selector, + modules.registry.registered_modules(), )); let execute_job_use_case = Arc::new(ExecuteAnalysisJobUseCase::new( modules.registry.clone(), diff --git a/vulnera-api/Cargo.toml b/vulnera-api/Cargo.toml index 6c45e219..43e96517 100644 --- a/vulnera-api/Cargo.toml +++ b/vulnera-api/Cargo.toml @@ -2,6 +2,7 @@ name = "vulnera-api" version.workspace = true edition.workspace = true +rust-version.workspace = true authors.workspace = true license.workspace = true repository.workspace = true diff --git a/vulnera-api/src/module.rs b/vulnera-api/src/module.rs index b6b4fa28..adb397c7 100644 --- a/vulnera-api/src/module.rs +++ b/vulnera-api/src/module.rs @@ -8,6 +8,7 @@ use vulnera_core::config::ApiSecurityConfig; use vulnera_core::domain::module::{ AnalysisModule, Finding, FindingConfidence, FindingSeverity, FindingType, Location, ModuleConfig, ModuleExecutionError, ModuleResult, ModuleResultMetadata, ModuleType, + VulnerabilityFindingMetadata, }; use crate::application::use_cases::ScanApiSpecificationUseCase; @@ -143,6 +144,7 @@ impl AnalysisModule for ApiSecurityModule { description: f.description, recommendation: Some(f.recommendation), secret_metadata: None, + vulnerability_metadata: VulnerabilityFindingMetadata::default(), enrichment: None, }) .collect(); diff --git a/vulnera-core/Cargo.toml b/vulnera-core/Cargo.toml index b68150cc..271dfb03 100644 --- a/vulnera-core/Cargo.toml +++ b/vulnera-core/Cargo.toml @@ -2,6 +2,7 @@ name = "vulnera-core" version.workspace = true edition.workspace = true +rust-version.workspace = true authors.workspace = true license.workspace = true repository.workspace = true diff --git a/vulnera-core/src/config/mod.rs b/vulnera-core/src/config/mod.rs index 5fbfc8b2..f59902d0 100644 --- a/vulnera-core/src/config/mod.rs +++ b/vulnera-core/src/config/mod.rs @@ -705,6 +705,16 @@ pub struct SastConfig { pub max_findings_per_file: Option, /// Maximum total findings across all files (stops scan early if exceeded, None = no limit) pub max_total_findings: Option, + /// Preferred parser frontend for JavaScript/TypeScript (`tree_sitter` or `oxc_preferred`) + pub js_ts_frontend: Option, + /// Minimum severity threshold to report (critical|high|medium|low|info) + pub min_finding_severity: Option, + /// Minimum confidence threshold to report (high|medium|low) + pub min_finding_confidence: Option, + /// Require data-flow evidence for findings generated by data-flow rules + pub require_data_flow_evidence_for_dataflow: bool, + /// Require non-empty recommendations for reported findings + pub require_recommendation: bool, /// Enable incremental analysis (skip unchanged files based on content hash) pub enable_incremental: Option, /// Path to store incremental analysis state (file hashes) @@ -754,6 +764,11 @@ impl Default for SastConfig { scan_timeout_seconds: None, // No overall limit by default max_findings_per_file: Some(100), max_total_findings: None, // No limit by default + js_ts_frontend: Some("oxc_preferred".to_string()), + min_finding_severity: Some("info".to_string()), + min_finding_confidence: Some("low".to_string()), + require_data_flow_evidence_for_dataflow: false, + require_recommendation: false, enable_incremental: Some(false), // Disabled by default incremental_state_path: None, } diff --git a/vulnera-core/src/domain/module/entities.rs b/vulnera-core/src/domain/module/entities.rs index 255333f0..15a42465 100644 --- a/vulnera-core/src/domain/module/entities.rs +++ b/vulnera-core/src/domain/module/entities.rs @@ -72,11 +72,46 @@ pub struct Finding { /// Secret-specific metadata (populated only for secret findings) #[serde(skip_serializing_if = "Option::is_none")] pub secret_metadata: Option, + /// Vulnerability-specific metadata (populated by vulnerability analyzers such as SAST) + pub vulnerability_metadata: VulnerabilityFindingMetadata, /// LLM-generated enrichment data (populated on-demand via enrichment endpoint) #[serde(skip_serializing_if = "Option::is_none")] pub enrichment: Option, } +/// Vulnerability-specific metadata attached to vulnerability findings +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema, Default)] +pub struct VulnerabilityFindingMetadata { + /// Code snippet at the finding location + pub snippet: Option, + /// Rule metavariable bindings captured during matching + pub bindings: Option>, + /// Optional data-flow trace for taint/dataflow findings + pub data_flow_path: Option, +} + +/// Data flow path showing source-to-sink propagation +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct VulnerabilityDataFlowPath { + /// Source location where taint originated + pub source: VulnerabilityDataFlowNode, + /// Intermediate propagation steps + pub steps: Vec, + /// Sink location where taint is consumed + pub sink: VulnerabilityDataFlowNode, +} + +/// Data flow node metadata for vulnerability traces +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] +pub struct VulnerabilityDataFlowNode { + /// Location in source code + pub location: Location, + /// Description of the node operation + pub description: String, + /// Expression tracked at this node + pub expression: String, +} + /// Secret-specific metadata attached to secret findings #[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] pub struct SecretFindingMetadata { diff --git a/vulnera-deps/Cargo.toml b/vulnera-deps/Cargo.toml index ffb6271b..99e42fb1 100644 --- a/vulnera-deps/Cargo.toml +++ b/vulnera-deps/Cargo.toml @@ -2,6 +2,7 @@ name = "vulnera-deps" version.workspace = true edition.workspace = true +rust-version.workspace = true authors.workspace = true license.workspace = true repository.workspace = true diff --git a/vulnera-deps/src/module.rs b/vulnera-deps/src/module.rs index 22a35d94..30ffb5b8 100644 --- a/vulnera-deps/src/module.rs +++ b/vulnera-deps/src/module.rs @@ -12,6 +12,7 @@ use vulnera_core::infrastructure::parsers::ParserFactory; use vulnera_core::domain::module::{ AnalysisModule, Finding, FindingConfidence, FindingSeverity, FindingType, Location, ModuleConfig, ModuleExecutionError, ModuleResult, ModuleResultMetadata, ModuleType, + VulnerabilityFindingMetadata, }; use crate::use_cases::AnalyzeDependenciesUseCase; @@ -256,6 +257,7 @@ impl AnalysisModule for DependencyAnalyzerModule { } }, secret_metadata: None, + vulnerability_metadata: VulnerabilityFindingMetadata::default(), enrichment: None, }; findings.push(finding); diff --git a/vulnera-llm/Cargo.toml b/vulnera-llm/Cargo.toml index b57c56a3..2084ea3a 100644 --- a/vulnera-llm/Cargo.toml +++ b/vulnera-llm/Cargo.toml @@ -2,6 +2,7 @@ name = "vulnera-llm" version.workspace = true edition.workspace = true +rust-version.workspace = true authors.workspace = true license.workspace = true repository.workspace = true diff --git a/vulnera-orchestrator/Cargo.toml b/vulnera-orchestrator/Cargo.toml index dfdc1fcd..6879720d 100644 --- a/vulnera-orchestrator/Cargo.toml +++ b/vulnera-orchestrator/Cargo.toml @@ -2,6 +2,7 @@ name = "vulnera-orchestrator" version.workspace = true edition.workspace = true +rust-version.workspace = true authors.workspace = true license.workspace = true repository.workspace = true diff --git a/vulnera-orchestrator/src/application/use_cases.rs b/vulnera-orchestrator/src/application/use_cases.rs index 850dce7c..b4f65cc9 100644 --- a/vulnera-orchestrator/src/application/use_cases.rs +++ b/vulnera-orchestrator/src/application/use_cases.rs @@ -1,5 +1,6 @@ //! Orchestrator use cases +use std::collections::HashSet; use std::sync::Arc; use tokio::task::JoinSet; @@ -25,16 +26,19 @@ use vulnera_sandbox::{ pub struct CreateAnalysisJobUseCase { project_detector: Arc, module_selector: Arc, + available_modules: HashSet, } impl CreateAnalysisJobUseCase { pub fn new( project_detector: Arc, module_selector: Arc, + available_modules: Vec, ) -> Self { Self { project_detector, module_selector, + available_modules: available_modules.into_iter().collect(), } } @@ -67,15 +71,38 @@ impl CreateAnalysisJobUseCase { })?; // Select modules to run - let modules_to_run = self + let selected_modules = self .module_selector .select_modules(&project, &analysis_depth); + let mut unavailable_modules = Vec::new(); + let modules_to_run: Vec = selected_modules + .into_iter() + .filter(|module_type| { + if self.available_modules.contains(module_type) { + true + } else { + unavailable_modules.push(module_type.clone()); + false + } + }) + .collect(); + + if !unavailable_modules.is_empty() { + warn!( + project_id = %project.id, + analysis_depth = ?analysis_depth, + unavailable_modules = ?unavailable_modules, + "Filtered selected modules that are not registered in the current runtime" + ); + } + info!( project_id = %project.id, source_type = ?source_type, analysis_depth = ?analysis_depth, selected_modules = modules_to_run.len(), + filtered_modules = unavailable_modules.len(), duration_ms = start_time.elapsed().as_millis(), "Created analysis job plan" ); diff --git a/vulnera-sandbox/Cargo.toml b/vulnera-sandbox/Cargo.toml index e805be68..b0e0ec89 100644 --- a/vulnera-sandbox/Cargo.toml +++ b/vulnera-sandbox/Cargo.toml @@ -2,6 +2,7 @@ name = "vulnera-sandbox" version.workspace = true edition.workspace = true +rust-version.workspace = true authors.workspace = true license.workspace = true repository.workspace = true diff --git a/vulnera-sast/Cargo.toml b/vulnera-sast/Cargo.toml index 07a41e32..a4133f1b 100644 --- a/vulnera-sast/Cargo.toml +++ b/vulnera-sast/Cargo.toml @@ -2,6 +2,7 @@ name = "vulnera-sast" version.workspace = true edition.workspace = true +rust-version.workspace = true authors.workspace = true license.workspace = true repository.workspace = true @@ -40,6 +41,9 @@ tree-sitter-go = { workspace = true } tree-sitter-c = { workspace = true } tree-sitter-cpp = { workspace = true } tree-sitter-rust = { workspace = true } +oxc_allocator = { workspace = true } +oxc_parser = { workspace = true } +oxc_span = { workspace = true } syn = { workspace = true } quote = { workspace = true } proc-macro2 = { workspace = true } @@ -49,6 +53,7 @@ git2 = { workspace = true } # Streaming iterator for tree-sitter queries streaming-iterator = { workspace = true } +futures = { workspace = true } # Caching serialization @@ -72,7 +77,6 @@ proptest = { workspace = true } datatest-stable = { workspace = true } rstest = { workspace = true } tempfile = { workspace = true } -futures = { workspace = true } uuid = { workspace = true } serde_yml = { workspace = true } insta = { version = "1.46", features = ["yaml"] } diff --git a/vulnera-sast/README.md b/vulnera-sast/README.md index 4e57804a..7c500d11 100644 --- a/vulnera-sast/README.md +++ b/vulnera-sast/README.md @@ -40,6 +40,8 @@ Key settings: - Rule file path and taint config path - Analysis depth (Quick / Standard / Deep) - AST caching and incremental analysis +- JS/TS frontend rollout (`js_ts_frontend = "oxc_preferred"` by default; set to `tree_sitter` to opt out) +- Policy gates (`min_finding_severity`, `min_finding_confidence`, recommendation/evidence requirements) ## Rule system @@ -59,6 +61,14 @@ Rules are stored under `vulnera-sast/rules/` and taint patterns under `vulnera-s - Data-driven SAST rules: `cargo test -p vulnera-sast --test datatest_sast_rules` - Snapshot review: `cargo insta review` +## Benchmarking at scale + +Run baseline vs tuned parallelism comparison on a target repository: + +- `cargo run -p vulnera-sast --example scale_benchmark -- /path/to/repo` +- Optional args: `iterations` and `depth` (`quick|standard|deep`) +- Example: `cargo run -p vulnera-sast --example scale_benchmark -- . 5 deep` + ## Limitations - Tree‑sitter is syntax‑level; no macro expansion or full type resolution. diff --git a/vulnera-sast/examples/scale_benchmark.rs b/vulnera-sast/examples/scale_benchmark.rs new file mode 100644 index 00000000..f3e33997 --- /dev/null +++ b/vulnera-sast/examples/scale_benchmark.rs @@ -0,0 +1,189 @@ +use std::path::PathBuf; +use std::time::Instant; + +use vulnera_core::config::{AnalysisDepth, SastConfig}; +use vulnera_sast::application::use_cases::{AnalysisConfig, ScanProjectUseCase}; + +#[derive(Debug, Clone)] +struct BenchmarkMetrics { + avg_ms: f64, + p95_ms: u128, + files_per_sec: f64, + findings_per_sec: f64, + avg_files_scanned: f64, + avg_findings: f64, +} + +#[tokio::main] +async fn main() { + if let Err(err) = run().await { + eprintln!("benchmark failed: {err}"); + std::process::exit(1); + } +} + +async fn run() -> Result<(), String> { + let mut args = std::env::args().skip(1); + let target = args + .next() + .map(PathBuf::from) + .ok_or_else(|| "usage: cargo run -p vulnera-sast --example scale_benchmark -- [iterations] [depth: quick|standard|deep]".to_string())?; + + if !target.exists() { + return Err(format!("target path does not exist: {}", target.display())); + } + + let iterations = args + .next() + .and_then(|s| s.parse::().ok()) + .unwrap_or(3) + .max(1); + + let depth = args + .next() + .as_deref() + .map(parse_depth) + .transpose()? + .unwrap_or(AnalysisDepth::Standard); + + let cpu_threads = std::thread::available_parallelism() + .map(|n| n.get()) + .unwrap_or(1); + + let baseline_parallelism = 1usize; + let tuned_parallelism = (cpu_threads * 2).clamp(2, 32); + + println!("== vulnera-sast scale benchmark =="); + println!("target={}", target.display()); + println!("iterations={iterations}"); + println!("depth={depth:?}"); + println!("cpu_threads={cpu_threads}"); + println!("baseline_parallelism={baseline_parallelism}"); + println!("tuned_parallelism={tuned_parallelism}"); + + let baseline = benchmark_profile( + "baseline", + &target, + iterations, + depth, + baseline_parallelism, + ) + .await?; + + let tuned = benchmark_profile("tuned", &target, iterations, depth, tuned_parallelism).await?; + + let speedup = if tuned.avg_ms > 0.0 { + baseline.avg_ms / tuned.avg_ms + } else { + 0.0 + }; + + println!("\n== summary =="); + print_metrics("baseline", &baseline); + print_metrics("tuned", &tuned); + println!("speedup={speedup:.2}x"); + + Ok(()) +} + +fn parse_depth(input: &str) -> Result { + match input.trim().to_ascii_lowercase().as_str() { + "quick" => Ok(AnalysisDepth::Quick), + "standard" => Ok(AnalysisDepth::Standard), + "deep" => Ok(AnalysisDepth::Deep), + other => Err(format!("invalid depth: {other}")), + } +} + +async fn benchmark_profile( + profile_name: &str, + target: &PathBuf, + iterations: usize, + depth: AnalysisDepth, + max_concurrent_files: usize, +) -> Result { + let mut sast_config = SastConfig { + analysis_depth: depth, + max_concurrent_files: Some(max_concurrent_files), + dynamic_depth_enabled: Some(false), + ..SastConfig::default() + }; + + if matches!(depth, AnalysisDepth::Quick) { + sast_config.enable_data_flow = false; + sast_config.enable_call_graph = false; + } + + let analysis_config = AnalysisConfig::from(&sast_config); + let use_case = ScanProjectUseCase::with_config(&sast_config, analysis_config); + + let mut durations: Vec = Vec::with_capacity(iterations); + let mut total_files_scanned = 0usize; + let mut total_findings = 0usize; + + println!("\n-- profile={profile_name} --"); + + for i in 0..iterations { + let start = Instant::now(); + let result = use_case + .execute(target) + .await + .map_err(|e| format!("scan failed: {e}"))?; + let elapsed = start.elapsed().as_millis(); + + durations.push(elapsed); + total_files_scanned += result.files_scanned; + total_findings += result.findings.len(); + + println!( + "run={} duration_ms={} files_scanned={} findings={} files_failed={} errors={}", + i + 1, + elapsed, + result.files_scanned, + result.findings.len(), + result.files_failed, + result.errors.len() + ); + } + + durations.sort_unstable(); + + let avg_ms = durations.iter().copied().map(|v| v as f64).sum::() / iterations as f64; + let p95_index = ((iterations as f64) * 0.95).ceil() as usize; + let p95_ms = durations[p95_index.saturating_sub(1).min(durations.len().saturating_sub(1))]; + + let avg_files_scanned = total_files_scanned as f64 / iterations as f64; + let avg_findings = total_findings as f64 / iterations as f64; + let files_per_sec = if avg_ms > 0.0 { + (avg_files_scanned * 1000.0) / avg_ms + } else { + 0.0 + }; + let findings_per_sec = if avg_ms > 0.0 { + (avg_findings * 1000.0) / avg_ms + } else { + 0.0 + }; + + Ok(BenchmarkMetrics { + avg_ms, + p95_ms, + files_per_sec, + findings_per_sec, + avg_files_scanned, + avg_findings, + }) +} + +fn print_metrics(name: &str, m: &BenchmarkMetrics) { + println!( + "profile={} avg_ms={:.2} p95_ms={} avg_files_scanned={:.2} avg_findings={:.2} files_per_sec={:.2} findings_per_sec={:.2}", + name, + m.avg_ms, + m.p95_ms, + m.avg_files_scanned, + m.avg_findings, + m.files_per_sec, + m.findings_per_sec + ); +} diff --git a/vulnera-sast/rules/javascript.toml b/vulnera-sast/rules/javascript.toml index dcfbbb17..e1b2cd59 100644 --- a/vulnera-sast/rules/javascript.toml +++ b/vulnera-sast/rules/javascript.toml @@ -278,6 +278,36 @@ value = ''' ) @call ''' +[[rules]] +id = "js-lodash-prototype-pollution" +name = "Lodash Merge Prototype Pollution" +description = "Unsafe object merge via lodash-style helpers can allow prototype pollution" +severity = "High" +languages = ["JavaScript"] +cwe_ids = ["CWE-1321"] +owasp_categories = ["A03:2021 - Injection"] +tags = ["prototype-pollution", "lodash", "javascript"] +message = "Avoid merging untrusted objects directly. Strip __proto__, constructor, and prototype keys before merge operations." + +[rules.pattern] +type = "TreeSitterQuery" +value = ''' +[ + (call_expression + function: (member_expression + object: (identifier) @obj + property: (property_identifier) @fn + ) + (#match? @obj "^(_|lodash)$") + (#match? @fn "^(merge|mergeWith|defaultsDeep)$") + ) @call + (call_expression + function: (identifier) @fn + (#match? @fn "^(merge|mergeWith|defaultsDeep)$") + ) @call +] +''' + [[rules]] id = "js-open-redirect" name = "Open Redirect" diff --git a/vulnera-sast/rules/python.toml b/vulnera-sast/rules/python.toml index 297171b9..814feb05 100644 --- a/vulnera-sast/rules/python.toml +++ b/vulnera-sast/rules/python.toml @@ -587,6 +587,28 @@ value = ''' ) @extract ''' +[[rules]] +id = "python-tarfile-path-traversal" +name = "Tarfile Path Traversal" +description = "tarfile.extractall on untrusted archives may write files outside the destination" +severity = "Critical" +languages = ["Python"] +cwe_ids = ["CWE-22", "CWE-73"] +owasp_categories = ["A01:2021 - Broken Access Control"] +tags = ["tarfile", "path-traversal", "zip-slip", "python"] +message = "Validate tar member paths before extraction. Reject entries resolving outside the target directory." + +[rules.pattern] +type = "TreeSitterQuery" +value = ''' +(call + function: (attribute + attribute: (identifier) @fn + ) + (#eq? @fn "extractall") +) @extract +''' + [[rules]] id = "python-jinja-autoescape-off" name = "Jinja2 Autoescape Disabled" diff --git a/vulnera-sast/src/application/use_cases.rs b/vulnera-sast/src/application/use_cases.rs index a59e4bd9..c1d4e280 100644 --- a/vulnera-sast/src/application/use_cases.rs +++ b/vulnera-sast/src/application/use_cases.rs @@ -6,6 +6,7 @@ //! - Call graph analysis for cross-function vulnerability detection //! - SARIF v2.1.0 export +use futures::stream::{self, StreamExt}; use serde::{Deserialize, Serialize}; use std::collections::HashMap; use std::path::{Path, PathBuf}; @@ -28,6 +29,10 @@ use crate::infrastructure::ast_cache::AstCacheService; use crate::infrastructure::call_graph::CallGraphBuilder; use crate::infrastructure::data_flow::{DataFlowAnalyzer, InterProceduralContext, TaintMatch}; use crate::infrastructure::incremental::IncrementalTracker; +use crate::infrastructure::oxc_frontend::OxcFrontend; +use crate::infrastructure::parser_frontend::{ + JavaScriptFrontend, ParserFrontend, ParserFrontendSelector, +}; use crate::infrastructure::parsers::convert_tree_sitter_node; use crate::infrastructure::regex_cache; use crate::infrastructure::rules::{ @@ -35,7 +40,7 @@ use crate::infrastructure::rules::{ }; use crate::infrastructure::sarif::{SarifExporter, SarifExporterConfig}; use crate::infrastructure::sast_engine::{SastEngine, SastEngineHandle}; -use crate::infrastructure::scanner::DirectoryScanner; +use crate::infrastructure::scanner::{DirectoryScanner, ScanFile}; use crate::infrastructure::semantic::SemanticContext; use crate::infrastructure::taint_queries::{ TaintConfig, get_propagation_queries, get_sanitizer_queries, @@ -111,6 +116,21 @@ const fn default_per_file_timeout() -> u64 { const fn default_max_findings_per_file() -> usize { 100 } +fn default_js_ts_frontend() -> JavaScriptFrontend { + JavaScriptFrontend::OxcPreferred +} + +fn parse_js_ts_frontend(value: Option<&str>) -> JavaScriptFrontend { + match value + .map(str::trim) + .map(str::to_ascii_lowercase) + .as_deref() + { + Some("tree_sitter") => JavaScriptFrontend::TreeSitter, + Some("oxc_preferred") => JavaScriptFrontend::OxcPreferred, + _ => default_js_ts_frontend(), + } +} fn default_depth_file_threshold() -> Option { Some(500) } @@ -169,6 +189,9 @@ pub struct AnalysisConfig { pub max_total_findings: Option, /// Path to incremental state file (None = full scan every time) pub incremental_state_path: Option, + /// Preferred parser frontend strategy for JavaScript/TypeScript. + #[serde(default = "default_js_ts_frontend")] + pub js_ts_frontend: JavaScriptFrontend, } impl From<&SastConfig> for AnalysisConfig { @@ -206,6 +229,7 @@ impl From<&SastConfig> for AnalysisConfig { .unwrap_or(default_max_findings_per_file()), max_total_findings: config.max_total_findings, incremental_state_path: config.incremental_state_path.clone(), + js_ts_frontend: parse_js_ts_frontend(config.js_ts_frontend.as_deref()), } } } @@ -242,6 +266,24 @@ struct CallAssignment { column: usize, } +#[derive(Debug)] +struct ScanPlan { + files: Vec, + rules: Vec, + effective_depth: AnalysisDepth, + effective_parallelism: usize, + ast_cache_ttl: Duration, +} + +#[derive(Debug, Default)] +struct AnalysisStageResult { + findings: Vec, + files_scanned: usize, + files_skipped: usize, + files_failed: usize, + errors: Vec, +} + /// Production-ready use case for scanning a project pub struct ScanProjectUseCase { scanner: DirectoryScanner, @@ -257,6 +299,10 @@ pub struct ScanProjectUseCase { call_graph_builder: Arc>, /// Content-hash tracker for incremental analysis (skip unchanged files) incremental_tracker: Mutex>, + /// OXC parser frontend for JS/TS primary parsing lane. + oxc_frontend: OxcFrontend, + /// Parser frontend selector for language-specific routing. + parser_frontend_selector: ParserFrontendSelector, /// Analysis configuration config: AnalysisConfig, } @@ -308,6 +354,8 @@ impl ScanProjectUseCase { data_flow_context: Arc::new(RwLock::new(InterProceduralContext::new())), call_graph_builder: Arc::new(RwLock::new(CallGraphBuilder::new())), incremental_tracker: Mutex::new(incremental_tracker), + oxc_frontend: OxcFrontend, + parser_frontend_selector: ParserFrontendSelector::new(analysis_config.js_ts_frontend), config: analysis_config, } } @@ -378,11 +426,114 @@ impl ScanProjectUseCase { } } + fn resolve_effective_parallelism(&self, file_count: usize, total_bytes: u64) -> usize { + let configured = self.config.max_concurrent_files.max(1); + let cpu_cap = std::thread::available_parallelism() + .map(|n| n.get().saturating_mul(2)) + .unwrap_or(2) + .max(1); + let mut effective = configured.min(cpu_cap).max(1); + + if file_count < 16 { + effective = 1; + } else if file_count < 64 { + effective = effective.min(4); + } + + if total_bytes > 268_435_456 { + effective = (effective / 2).max(1); + } + + effective + } + #[instrument(skip(self), fields(root = %root.display()))] pub async fn execute(&self, root: &Path) -> Result { let start_time = std::time::Instant::now(); info!("Starting native SAST scan"); + let plan = self.build_scan_plan(root).await?; + + let (parsed_files, mut stage_index_errors) = self + .build_parse_index_stage( + &plan.files, + plan.effective_depth, + plan.effective_parallelism, + plan.ast_cache_ttl, + ) + .await; + + let mut ast_cache_stats = AstCacheStats::default(); + let mut stage_analysis = self + .run_analysis_stage( + &plan.files, + &plan.rules, + plan.effective_depth, + plan.ast_cache_ttl, + &parsed_files, + &mut ast_cache_stats, + ) + .await; + + stage_analysis.errors.append(&mut stage_index_errors); + + if self.config.enable_data_flow && plan.effective_depth != AnalysisDepth::Quick { + Self::adjust_severity_for_data_flow(&mut stage_analysis.findings); + } + + stage_analysis.findings = Self::deduplicate_findings(stage_analysis.findings); + + { + let mut tracker = self.incremental_tracker.lock().unwrap(); + if let Some(ref mut t) = *tracker { + t.finalize( + stage_analysis.files_scanned + stage_analysis.files_skipped, + stage_analysis.files_skipped, + ); + if let Some(ref state_path) = self.config.incremental_state_path { + if let Err(e) = t.save_to_file(state_path) { + warn!(error = %e, "Failed to save incremental state"); + } + } + let stats = t.stats(); + info!( + previous = stats.previous_files, + analyzed = stats.files_analyzed, + skipped = stats.files_skipped, + "Incremental analysis stats" + ); + } + } + + let duration_ms = start_time.elapsed().as_millis() as u64; + info!( + l1_hits = ast_cache_stats.l1_hits, + l1_misses = ast_cache_stats.l1_misses, + l2_hits = ast_cache_stats.l2_hits, + l2_misses = ast_cache_stats.l2_misses, + "SAST AST cache stats" + ); + info!( + finding_count = stage_analysis.findings.len(), + files_scanned = stage_analysis.files_scanned, + files_skipped = stage_analysis.files_skipped, + files_failed = stage_analysis.files_failed, + effective_parallelism = plan.effective_parallelism, + duration_ms, + "SAST scan completed" + ); + + Ok(ScanResult { + findings: stage_analysis.findings, + files_scanned: stage_analysis.files_scanned, + files_skipped: stage_analysis.files_skipped, + files_failed: stage_analysis.files_failed, + errors: stage_analysis.errors, + duration_ms, + }) + } + + async fn build_scan_plan(&self, root: &Path) -> Result { let files = self.scanner.scan(root).map_err(|e| { error!(error = %e, "Failed to scan directory"); ScanError::scan_failed(root, e) @@ -393,55 +544,101 @@ impl ScanProjectUseCase { .iter() .filter_map(|file| std::fs::metadata(&file.path).ok().map(|meta| meta.len())) .sum(); + let effective_depth = self.resolve_analysis_depth(file_count, total_bytes); + let effective_parallelism = self.resolve_effective_parallelism(file_count, total_bytes); info!( file_count, total_bytes, configured_depth = ?self.config.analysis_depth, effective_depth = ?effective_depth, - "Found files to scan" + configured_parallelism = self.config.max_concurrent_files, + effective_parallelism, + "Scan planning stage completed" ); - let mut all_findings = Vec::new(); - let mut files_scanned = 0; - let mut files_skipped = 0; - let mut files_failed = 0; - let mut errors: Vec = Vec::new(); - let mut ast_cache_stats = AstCacheStats::default(); - let ast_cache_ttl = - Duration::from_secs(self.config.ast_cache_ttl_hours.saturating_mul(3600)); - - let rules = self.rule_repository.read().await; - let all_rules = rules.get_all_rules(); + let rules = { + let rules_guard = self.rule_repository.read().await; + rules_guard.get_all_rules().to_vec() + }; - // ========================================================================= - // Phase 1: Build Call Graph & Parse All Files - // ========================================================================= - // we first build the complete call graph by parsing - // all files, then resolve cross-file references before analysis. + Ok(ScanPlan { + files, + rules, + effective_depth, + effective_parallelism, + ast_cache_ttl: Duration::from_secs(self.config.ast_cache_ttl_hours.saturating_mul(3600)), + }) + } + async fn build_parse_index_stage( + &self, + files: &[ScanFile], + effective_depth: AnalysisDepth, + effective_parallelism: usize, + ast_cache_ttl: Duration, + ) -> (HashMap, Vec) { let mut parsed_files: HashMap = HashMap::new(); + let mut errors = Vec::new(); - if self.config.enable_call_graph && effective_depth != AnalysisDepth::Quick { - debug!("Phase 1: Building call graph with cross-file resolution"); - let mut call_graph = self.call_graph_builder.write().await; + if !(self.config.enable_call_graph && effective_depth != AnalysisDepth::Quick) { + return (parsed_files, errors); + } - // 1a. Parse all files and build initial graph - for file in &files { - if let Ok(content) = std::fs::read_to_string(&file.path) { - let file_path_str = file.path.display().to_string(); + debug!("Stage: parse/index with cross-file call graph"); + let mut call_graph = self.call_graph_builder.write().await; - let tree = match self.sast_engine.parse(&content, file.language).await { - Ok(tree) => tree, - Err(_) => continue, - }; + let results = stream::iter(files.iter().cloned()) + .map(|file| async move { + let file_path_str = file.path.display().to_string(); + let content = std::fs::read_to_string(&file.path) + .map_err(|e| format!("Failed to read {} in parse/index: {}", file.path.display(), e))?; + + let selected_frontend = self.parser_frontend_selector.select(file.language); + let mut warnings = Vec::new(); + + if selected_frontend == ParserFrontend::Oxc && OxcFrontend::supports(file.language) { + if let Err(err) = self.oxc_frontend.parse_file(&file.path, &content) { + warnings.push(format!( + "OXC parse warning for {}: {}", + file.path.display(), + err + )); + } + } + + let tree = self + .sast_engine + .parse(&content, file.language) + .await + .map_err(|e| { + format!( + "Parse/index stage failed for {}: {}", + file.path.display(), + e + ) + })?; + + Ok::<_, String>((file_path_str, file.language, content, tree, warnings)) + }) + .buffer_unordered(effective_parallelism.max(1)) + .collect::>() + .await; + + for result in results { + match result { + Ok((file_path_str, language, content, tree, warnings)) => { + for warning in warnings { + warn!(file = %file_path_str, "{}", warning); + errors.push(warning); + } if self.config.enable_ast_cache { if let Some(cache) = self.ast_cache.as_ref() { let content_hash = Self::compute_content_hash(&content); let ast = convert_tree_sitter_node(tree.root_node(), &content, None); if let Err(e) = cache - .set(&content_hash, &file.language, &ast, Some(ast_cache_ttl)) + .set(&content_hash, &language, &ast, Some(ast_cache_ttl)) .await { warn!(error = %e, "Failed to write L2 AST cache"); @@ -449,59 +646,73 @@ impl ScanProjectUseCase { } } - // Build call graph nodes and edges - call_graph.analyze_ast(&file_path_str, &tree, &file.language, &content); + call_graph.analyze_ast(&file_path_str, &tree, &language, &content); - // Cache the parsed tree for reuse in analysis phase if parsed_files.len() < self.config.tree_cache_max_entries { parsed_files.insert(file_path_str, (tree, content)); } } + Err(error) => errors.push(error), } + } - // 1b. Resolve cross-file references - let resolved_count = call_graph.graph_mut().resolve_all_calls(); - let stats = call_graph.graph().stats(); - - info!( - functions = stats.total_functions, - calls = stats.total_calls, - resolved = resolved_count, - entry_points = stats.entry_points, - "Call graph built with cross-file resolution" - ); + let resolved_count = call_graph.graph_mut().resolve_all_calls(); + let stats = call_graph.graph().stats(); + info!( + functions = stats.total_functions, + calls = stats.total_calls, + resolved = resolved_count, + entry_points = stats.entry_points, + "Parse/index stage call graph built" + ); - // Seed inter-procedural context from call graph for cross-function taint propagation + { let mut df_ctx = self.data_flow_context.write().await; df_ctx.seed_from_call_graph(call_graph.graph()); - drop(df_ctx); + } - // Extract file-level dependencies for incremental tracking - { - let file_deps = call_graph.graph().file_dependencies(); - if !file_deps.is_empty() { - let mut tracker = self.incremental_tracker.lock().unwrap(); - if let Some(ref mut t) = *tracker { - debug!( - cross_file_edges = file_deps.len(), - "Setting file dependencies from call graph" - ); - t.set_file_dependencies(file_deps); - } + { + let file_deps = call_graph.graph().file_dependencies(); + if !file_deps.is_empty() { + let mut tracker = self.incremental_tracker.lock().unwrap(); + if let Some(ref mut t) = *tracker { + debug!( + cross_file_edges = file_deps.len(), + "Setting file dependencies from call graph" + ); + t.set_file_dependencies(file_deps); } } } - // ========================================================================= - // Phase 2: File Analysis (Pattern Matching & Data Flow) - // ========================================================================= + (parsed_files, errors) + } + + async fn run_analysis_stage( + &self, + files: &[ScanFile], + all_rules: &[PatternRule], + effective_depth: AnalysisDepth, + ast_cache_ttl: Duration, + parsed_files: &HashMap, + ast_cache_stats: &mut AstCacheStats, + ) -> AnalysisStageResult { + let mut stage = AnalysisStageResult::default(); + for file in files { - // Check file size limit + let selected_frontend = self.parser_frontend_selector.select(file.language); + debug!( + file = %file.path.display(), + language = %file.language, + frontend = ?selected_frontend, + "Stage: file analysis routing" + ); + let file_size = match std::fs::metadata(&file.path) { Ok(meta) => meta.len(), Err(e) => { debug!(file = %file.path.display(), error = %e, "Failed to get file metadata"); - files_skipped += 1; + stage.files_skipped += 1; continue; } }; @@ -513,34 +724,48 @@ impl ScanProjectUseCase { max_size = self.config.max_file_size_bytes, "Skipping file: exceeds size limit" ); - files_skipped += 1; + stage.files_skipped += 1; continue; } - debug!(file = %file.path.display(), language = ?file.language, "Scanning file"); - let content = match std::fs::read_to_string(&file.path) { Ok(content) => content, Err(e) => { warn!(file = %file.path.display(), error = %e, "Failed to read file"); - files_failed += 1; - errors.push(format!("Failed to read {}: {}", file.path.display(), e)); + stage.files_failed += 1; + stage + .errors + .push(format!("Failed to read {}: {}", file.path.display(), e)); continue; } }; + if selected_frontend == ParserFrontend::Oxc && OxcFrontend::supports(file.language) { + if let Err(err) = self.oxc_frontend.parse_file(&file.path, &content) { + warn!( + file = %file.path.display(), + language = %file.language, + error = %err, + "OXC parse failed, continuing with Tree-sitter compatibility lane" + ); + stage.errors.push(format!( + "OXC parse warning for {}: {}", + file.path.display(), + err + )); + } + } + let file_path_str = file.path.display().to_string(); let content_hash = Self::compute_content_hash(&content); - // Incremental check: skip files whose content hasn't changed { let tracker = self.incremental_tracker.lock().unwrap(); if let Some(ref t) = *tracker { let (needs, _) = t.needs_analysis(&file_path_str, &content); if !needs { debug!(file = %file_path_str, "Skipping unchanged file (incremental)"); - files_skipped += 1; - // Still record previous findings in current state + stage.files_skipped += 1; drop(tracker); let mut tracker = self.incremental_tracker.lock().unwrap(); if let Some(ref mut t) = *tracker { @@ -557,9 +782,7 @@ impl ScanProjectUseCase { } } - let mut cached_tree = parsed_files - .get(&file_path_str) - .map(|(tree, _)| tree.clone()); + let mut cached_tree = parsed_files.get(&file_path_str).map(|(tree, _)| tree.clone()); let mut l2_hit = false; if cached_tree.is_some() { @@ -573,17 +796,16 @@ impl ScanProjectUseCase { match cache.get(&content_hash, &file.language).await { Ok(Some(_)) => { l2_hit = true; - Self::update_l2_cache_stats(&mut ast_cache_stats, true) + Self::update_l2_cache_stats(ast_cache_stats, true) } - Ok(None) => Self::update_l2_cache_stats(&mut ast_cache_stats, false), + Ok(None) => Self::update_l2_cache_stats(ast_cache_stats, false), Err(e) => { warn!(error = %e, "Failed to read L2 AST cache"); } } } - let query_engine = &self.sast_engine; - if let Ok(tree) = query_engine.parse(&content, file.language).await { + if let Ok(tree) = self.sast_engine.parse(&content, file.language).await { if self.config.enable_ast_cache && !l2_hit { if let Some(cache) = self.ast_cache.as_ref() { let ast = convert_tree_sitter_node(tree.root_node(), &content, None); @@ -601,10 +823,8 @@ impl ScanProjectUseCase { let suppressions = FileSuppressions::parse(&content); let is_test_context = Self::is_test_file(&file.path, &content); + stage.files_scanned += 1; - files_scanned += 1; - - // Get rules applicable to this language let applicable_rules: Vec<&PatternRule> = all_rules .iter() .filter(|r| r.languages.contains(&file.language)) @@ -614,7 +834,6 @@ impl ScanProjectUseCase { continue; } - // Execute tree-sitter pattern analysis if let Err(e) = self .execute_tree_sitter_analysis( &file.path, @@ -624,19 +843,16 @@ impl ScanProjectUseCase { &suppressions, is_test_context, cached_tree.as_ref(), - &mut all_findings, + &mut stage.findings, ) .await { warn!(file = %file.path.display(), error = %e, "Tree-sitter analysis failed"); - errors.push(format!( - "Analysis failed for {}: {}", - file.path.display(), - e - )); + stage + .errors + .push(format!("Analysis failed for {}: {}", file.path.display(), e)); } - // Phase 3: Data flow analysis if self.config.enable_data_flow && effective_depth != AnalysisDepth::Quick { self.execute_data_flow_analysis( &file.path, @@ -644,16 +860,17 @@ impl ScanProjectUseCase { &content, cached_tree.as_ref(), effective_depth, - &mut all_findings, + &mut stage.findings, ) .await; } - // Check max findings per file limit - let file_finding_count = all_findings + let file_finding_count = stage + .findings .iter() .filter(|f| f.location.file_path == file.path.display().to_string()) .count(); + if file_finding_count >= self.config.max_findings_per_file { debug!( file = %file.path.display(), @@ -662,14 +879,13 @@ impl ScanProjectUseCase { ); } - // Check max total findings limit if let Some(max_total) = self.config.max_total_findings { - if all_findings.len() >= max_total { + if stage.findings.len() >= max_total { info!( - total_findings = all_findings.len(), - max_total, "Max total findings limit reached, stopping scan early" + total_findings = stage.findings.len(), + max_total, + "Max total findings limit reached, stopping scan early" ); - // Record this file before breaking let mut tracker = self.incremental_tracker.lock().unwrap(); if let Some(ref mut t) = *tracker { t.record_file( @@ -683,7 +899,6 @@ impl ScanProjectUseCase { } } - // Record file in incremental tracker { let mut tracker = self.incremental_tracker.lock().unwrap(); if let Some(ref mut t) = *tracker { @@ -697,54 +912,7 @@ impl ScanProjectUseCase { } } - // Phase 4: Adjust severity for data-flow confirmed findings - if self.config.enable_data_flow && effective_depth != AnalysisDepth::Quick { - Self::adjust_severity_for_data_flow(&mut all_findings); - } - - all_findings = Self::deduplicate_findings(all_findings); - - // Finalize incremental tracker and persist state - { - let mut tracker = self.incremental_tracker.lock().unwrap(); - if let Some(ref mut t) = *tracker { - t.finalize(files_scanned + files_skipped, files_skipped); - if let Some(ref state_path) = self.config.incremental_state_path { - if let Err(e) = t.save_to_file(state_path) { - warn!(error = %e, "Failed to save incremental state"); - } - } - let stats = t.stats(); - info!( - previous = stats.previous_files, - analyzed = stats.files_analyzed, - skipped = stats.files_skipped, - "Incremental analysis stats" - ); - } - } - - let duration_ms = start_time.elapsed().as_millis() as u64; - info!( - l1_hits = ast_cache_stats.l1_hits, - l1_misses = ast_cache_stats.l1_misses, - l2_hits = ast_cache_stats.l2_hits, - l2_misses = ast_cache_stats.l2_misses, - "SAST AST cache stats" - ); - info!( - finding_count = all_findings.len(), - files_scanned, files_skipped, files_failed, duration_ms, "SAST scan completed" - ); - - Ok(ScanResult { - findings: all_findings, - files_scanned, - files_skipped, - files_failed, - errors, - duration_ms, - }) + stage } #[allow(clippy::too_many_arguments)] diff --git a/vulnera-sast/src/infrastructure/call_graph.rs b/vulnera-sast/src/infrastructure/call_graph.rs index bd90fdd8..f415cedd 100644 --- a/vulnera-sast/src/infrastructure/call_graph.rs +++ b/vulnera-sast/src/infrastructure/call_graph.rs @@ -39,6 +39,10 @@ pub struct UnresolvedCall { pub callee_name: String, /// Optional module hint (e.g., from import statement) pub module_hint: Option, + /// Caller file path, used for locality-aware disambiguation. + pub caller_file_path: String, + /// Optional caller scope/class name (for method disambiguation). + pub caller_scope: Option, /// Call location pub line: u32, pub column: u32, @@ -297,47 +301,78 @@ impl CallGraph { /// Try to resolve a single call to its target function ID fn resolve_call(&self, call: &UnresolvedCall) -> Option { - // Strategy 1: Check if there's a function with exact name match if let Some(candidates) = self.name_index.get(&call.callee_name) { - // If only one candidate, use it if candidates.len() == 1 { return Some(candidates[0].clone()); } - // Strategy 2: Use module hint to disambiguate - if let Some(ref hint) = call.module_hint { - for candidate in candidates { - // Check if the candidate's file path contains the module hint - if let Some(node) = self.nodes.get(candidate) { - if node.file_path.contains(hint) { - return Some(candidate.clone()); - } - } - } + let mut ranked: Vec<(&String, i32)> = candidates + .iter() + .filter_map(|candidate| { + let node = self.nodes.get(candidate)?; + Some((candidate, Self::score_candidate(call, node))) + }) + .collect(); + + ranked.sort_by(|(lhs_id, lhs_score), (rhs_id, rhs_score)| { + rhs_score + .cmp(lhs_score) + .then_with(|| lhs_id.cmp(rhs_id)) + }); + + if let Some((best_id, _)) = ranked.first() { + return Some((*best_id).clone()); } + } - // Strategy 3: Prefer functions in the same directory as caller - if let Some(caller_node) = self.nodes.get(&call.caller_id) { - let caller_dir = std::path::Path::new(&caller_node.file_path) - .parent() - .map(|p| p.to_string_lossy().to_string()); - - if let Some(dir) = caller_dir { - for candidate in candidates { - if let Some(node) = self.nodes.get(candidate) { - if node.file_path.starts_with(&dir) { - return Some(candidate.clone()); - } - } - } - } + None + } + + fn score_candidate(call: &UnresolvedCall, node: &CallGraphNode) -> i32 { + let mut score = 0; + + if node.file_path == call.caller_file_path { + score += 120; + } + + if let Some(caller_dir) = std::path::Path::new(&call.caller_file_path).parent() { + if node.file_path.starts_with(caller_dir.to_string_lossy().as_ref()) { + score += 70; } + } - // Fallback: use first candidate (ambiguous but better than nothing) - return Some(candidates[0].clone()); + if let Some(ref hint) = call.module_hint { + if !hint.is_empty() + && (node.file_path.contains(hint) + || std::path::Path::new(&node.file_path) + .file_stem() + .and_then(|s| s.to_str()) + .map(|stem| stem == hint) + .unwrap_or(false)) + { + score += 90; + } } - None + if let Some(ref scope) = call.caller_scope { + if node.id.contains(&format!("::{}::", scope)) { + score += 100; + } + } + + score + Self::path_similarity_bonus(&call.caller_file_path, &node.file_path) + } + + fn path_similarity_bonus(lhs: &str, rhs: &str) -> i32 { + let lhs_components = lhs.split('/').collect::>(); + let rhs_components = rhs.split('/').collect::>(); + let common_prefix = lhs_components + .iter() + .zip(rhs_components.iter()) + .take_while(|(l, r)| l == r) + .count() as i32; + + common_prefix * 3 } // ========================================================================= @@ -699,10 +734,22 @@ impl CallGraphBuilder { .map(|f| f.id.clone()); if let Some(caller) = caller_id { + let caller_scope = caller + .rsplit("::") + .nth(1) + .filter(|segment| *segment != file_path) + .map(|segment| segment.to_string()); + // Check if this is a local function call if local_function_names.contains(callee_name) { - // Local call - create direct edge - let target_id = format!("{}::{}", file_path, callee_name); + // Local call - create direct edge; prefer class-scoped method if present. + let class_scoped_target = caller_scope + .as_ref() + .map(|scope| format!("{}::{}::{}", file_path, scope, callee_name)); + let target_id = class_scoped_target + .filter(|candidate| self.graph.get_function(candidate).is_some()) + .unwrap_or_else(|| format!("{}::{}", file_path, callee_name)); + let call_site = CallSite { target_id, target_name: callee_name.to_string(), @@ -722,6 +769,8 @@ impl CallGraphBuilder { caller_id: caller.clone(), callee_name: callee_name.to_string(), module_hint, + caller_file_path: file_path.to_string(), + caller_scope, line: m.start_position.0 as u32 + 1, column: m.start_position.1 as u32, }; diff --git a/vulnera-sast/src/infrastructure/mod.rs b/vulnera-sast/src/infrastructure/mod.rs index 799b8eb6..673e1101 100644 --- a/vulnera-sast/src/infrastructure/mod.rs +++ b/vulnera-sast/src/infrastructure/mod.rs @@ -14,6 +14,8 @@ pub mod call_graph_queries; pub mod data_flow; pub mod incremental; pub mod metavar_patterns; +pub mod oxc_frontend; +pub mod parser_frontend; pub mod parsers; pub mod query_engine; pub mod regex_cache; @@ -29,6 +31,8 @@ pub use ast_cache::*; pub use call_graph::*; pub use data_flow::*; pub use incremental::*; +pub use oxc_frontend::*; +pub use parser_frontend::*; pub use query_engine::*; pub use rules::RuleRepository; pub use sast_engine::*; diff --git a/vulnera-sast/src/infrastructure/oxc_frontend.rs b/vulnera-sast/src/infrastructure/oxc_frontend.rs new file mode 100644 index 00000000..5bdc6526 --- /dev/null +++ b/vulnera-sast/src/infrastructure/oxc_frontend.rs @@ -0,0 +1,57 @@ +use std::path::Path; + +use oxc_allocator::Allocator; +use oxc_parser::Parser; +use oxc_span::SourceType; + +use crate::domain::value_objects::Language; + +/// OXC-powered parser frontend for JavaScript/TypeScript family files. +#[derive(Debug, Default, Clone, Copy)] +pub struct OxcFrontend; + +impl OxcFrontend { + /// Returns whether this frontend supports the given language. + pub fn supports(language: Language) -> bool { + matches!(language, Language::JavaScript | Language::TypeScript) + } + + /// Performs syntax parsing with OXC and returns detailed parse errors. + pub fn parse_file(&self, file_path: &Path, source: &str) -> Result<(), String> { + let source_type = SourceType::from_path(file_path).map_err(|err| { + format!( + "Unsupported JS/TS source type for '{}': {}", + file_path.display(), + err + ) + })?; + + let allocator = Allocator::default(); + let parser_return = Parser::new(&allocator, source, source_type).parse(); + + if parser_return.panicked { + return Err(format!( + "OXC parser panicked for '{}'", + file_path.display() + )); + } + + if parser_return.errors.is_empty() { + return Ok(()); + } + + let error_summary = parser_return + .errors + .iter() + .take(5) + .map(ToString::to_string) + .collect::>() + .join(" | "); + + Err(format!( + "OXC parse errors in '{}': {}", + file_path.display(), + error_summary + )) + } +} diff --git a/vulnera-sast/src/infrastructure/parser_frontend.rs b/vulnera-sast/src/infrastructure/parser_frontend.rs new file mode 100644 index 00000000..2eb27060 --- /dev/null +++ b/vulnera-sast/src/infrastructure/parser_frontend.rs @@ -0,0 +1,46 @@ +use serde::{Deserialize, Serialize}; + +use crate::domain::value_objects::Language; + +/// Preferred JS/TS parser frontend strategy. +#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default)] +#[serde(rename_all = "snake_case")] +pub enum JavaScriptFrontend { + /// Use tree-sitter for JavaScript/TypeScript parsing and analysis. + #[default] + TreeSitter, + /// Prefer OXC for JavaScript/TypeScript when available. + /// + /// Current runtime keeps tree-sitter as execution backend and uses this + /// as routing intent for progressive rollout. + OxcPreferred, +} + +/// Effective parser frontend selected for a file. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ParserFrontend { + TreeSitter, + Oxc, +} + +/// Routing selector from language + config to parser frontend. +#[derive(Debug, Clone, Copy)] +pub struct ParserFrontendSelector { + js_ts_frontend: JavaScriptFrontend, +} + +impl ParserFrontendSelector { + pub fn new(js_ts_frontend: JavaScriptFrontend) -> Self { + Self { js_ts_frontend } + } + + pub fn select(&self, language: Language) -> ParserFrontend { + match language { + Language::JavaScript | Language::TypeScript => match self.js_ts_frontend { + JavaScriptFrontend::TreeSitter => ParserFrontend::TreeSitter, + JavaScriptFrontend::OxcPreferred => ParserFrontend::Oxc, + }, + _ => ParserFrontend::TreeSitter, + } + } +} diff --git a/vulnera-sast/src/module.rs b/vulnera-sast/src/module.rs index 7761d8e6..3316072b 100644 --- a/vulnera-sast/src/module.rs +++ b/vulnera-sast/src/module.rs @@ -8,6 +8,7 @@ use vulnera_core::config::SastConfig; use vulnera_core::domain::module::{ AnalysisModule, Finding, FindingConfidence, FindingSeverity, FindingType, Location, ModuleConfig, ModuleExecutionError, ModuleResult, ModuleResultMetadata, ModuleType, + VulnerabilityDataFlowNode, VulnerabilityDataFlowPath, VulnerabilityFindingMetadata, }; use crate::application::use_cases::{AnalysisConfig, ScanProjectUseCase}; @@ -23,6 +24,7 @@ use crate::infrastructure::ast_cache::AstCacheService; /// ``` pub struct SastModule { use_case: Arc, + sast_config: SastConfig, } /// Builder for [`SastModule`]. @@ -68,16 +70,22 @@ impl SastModuleBuilder { /// Build the `SastModule`. pub fn build(self) -> SastModule { - let use_case = if let Some(uc) = self.use_case_override { + let SastModuleBuilder { + sast_config, + analysis_config, + ast_cache, + use_case_override, + } = self; + + let sast_config = sast_config.unwrap_or_default(); + + let use_case = if let Some(uc) = use_case_override { uc } else { - let sast_cfg = self.sast_config.unwrap_or_default(); - let analysis_cfg = self - .analysis_config - .unwrap_or_else(|| AnalysisConfig::from(&sast_cfg)); + let analysis_cfg = analysis_config.unwrap_or_else(|| AnalysisConfig::from(&sast_config)); - let uc = ScanProjectUseCase::with_config(&sast_cfg, analysis_cfg); - let uc = if let Some(cache) = self.ast_cache { + let uc = ScanProjectUseCase::with_config(&sast_config, analysis_cfg); + let uc = if let Some(cache) = ast_cache { uc.with_ast_cache(cache) } else { uc @@ -85,7 +93,10 @@ impl SastModuleBuilder { Arc::new(uc) }; - SastModule { use_case } + SastModule { + use_case, + sast_config, + } } } @@ -138,10 +149,51 @@ impl AnalysisModule for SastModule { .await .map_err(|e| ModuleExecutionError::ExecutionFailed(e.to_string()))?; + let policy = SastFindingPolicy::from_config(&self.sast_config, config)?; + let mut filtered_by_severity = 0usize; + let mut filtered_by_confidence = 0usize; + let mut filtered_by_dataflow = 0usize; + let mut filtered_by_quality = 0usize; + // Convert SAST findings to orchestrator findings let findings: Vec = scan_result .findings .into_iter() + .filter(|f| { + if f.description.trim().is_empty() { + filtered_by_quality += 1; + return false; + } + + if policy.require_recommendation + && f.recommendation + .as_ref() + .is_none_or(|r| r.trim().is_empty()) + { + filtered_by_quality += 1; + return false; + } + + if severity_rank(&f.severity) < severity_rank(&policy.min_severity) { + filtered_by_severity += 1; + return false; + } + + if confidence_rank(&f.confidence) < confidence_rank(&policy.min_confidence) { + filtered_by_confidence += 1; + return false; + } + + if policy.require_data_flow_evidence_for_dataflow + && is_data_flow_rule(&f.rule_id) + && f.data_flow_path.is_none() + { + filtered_by_dataflow += 1; + return false; + } + + true + }) .map(|f| Finding { id: f.id, r#type: FindingType::Vulnerability, @@ -165,15 +217,95 @@ impl AnalysisModule for SastModule { crate::domain::value_objects::Confidence::Medium => FindingConfidence::Medium, crate::domain::value_objects::Confidence::Low => FindingConfidence::Low, }, - description: f.description, - recommendation: f.recommendation, + description: f.description.trim().to_string(), + recommendation: f + .recommendation + .map(|r| r.trim().to_string()) + .filter(|r| !r.is_empty()), secret_metadata: None, + vulnerability_metadata: VulnerabilityFindingMetadata { + snippet: f.snippet, + bindings: f.bindings, + data_flow_path: f.data_flow_path.map(|path| VulnerabilityDataFlowPath { + source: VulnerabilityDataFlowNode { + location: Location { + path: path.source.location.file_path, + line: Some(path.source.location.line), + column: path.source.location.column, + end_line: path.source.location.end_line, + end_column: path.source.location.end_column, + }, + description: path.source.description, + expression: path.source.expression, + }, + steps: path + .steps + .into_iter() + .map(|step| VulnerabilityDataFlowNode { + location: Location { + path: step.location.file_path, + line: Some(step.location.line), + column: step.location.column, + end_line: step.location.end_line, + end_column: step.location.end_column, + }, + description: step.description, + expression: step.expression, + }) + .collect(), + sink: VulnerabilityDataFlowNode { + location: Location { + path: path.sink.location.file_path, + line: Some(path.sink.location.line), + column: path.sink.location.column, + end_line: path.sink.location.end_line, + end_column: path.sink.location.end_column, + }, + description: path.sink.description, + expression: path.sink.expression, + }, + }), + }, enrichment: None, }) .collect(); let duration = start_time.elapsed(); + let mut additional_info = std::collections::HashMap::new(); + additional_info.insert( + "policy_min_severity".to_string(), + format!("{}", policy.min_severity), + ); + additional_info.insert( + "policy_min_confidence".to_string(), + confidence_name(&policy.min_confidence).to_string(), + ); + additional_info.insert( + "policy_require_data_flow_evidence_for_dataflow".to_string(), + policy.require_data_flow_evidence_for_dataflow.to_string(), + ); + additional_info.insert( + "policy_require_recommendation".to_string(), + policy.require_recommendation.to_string(), + ); + additional_info.insert( + "filtered_by_severity".to_string(), + filtered_by_severity.to_string(), + ); + additional_info.insert( + "filtered_by_confidence".to_string(), + filtered_by_confidence.to_string(), + ); + additional_info.insert( + "filtered_by_data_flow_evidence".to_string(), + filtered_by_dataflow.to_string(), + ); + additional_info.insert( + "filtered_by_quality".to_string(), + filtered_by_quality.to_string(), + ); + Ok(ModuleResult { job_id: config.job_id, module_type: ModuleType::SAST, @@ -181,13 +313,133 @@ impl AnalysisModule for SastModule { metadata: ModuleResultMetadata { files_scanned: scan_result.files_scanned, duration_ms: duration.as_millis() as u64, - additional_info: std::collections::HashMap::new(), + additional_info, }, error: None, }) } } +#[derive(Debug, Clone)] +struct SastFindingPolicy { + min_severity: SastSeverity, + min_confidence: crate::domain::value_objects::Confidence, + require_data_flow_evidence_for_dataflow: bool, + require_recommendation: bool, +} + +impl SastFindingPolicy { + fn from_config( + default_config: &SastConfig, + module_config: &ModuleConfig, + ) -> Result { + let default_min_severity = default_config + .min_finding_severity + .as_deref() + .map(parse_severity) + .transpose()?; + + let default_min_confidence = default_config + .min_finding_confidence + .as_deref() + .map(parse_confidence) + .transpose()?; + + let min_severity = module_config + .config + .get("sast.min_severity") + .and_then(serde_json::Value::as_str) + .map(parse_severity) + .transpose()? + .or(default_min_severity) + .unwrap_or(SastSeverity::Info); + + let min_confidence = module_config + .config + .get("sast.min_confidence") + .and_then(serde_json::Value::as_str) + .map(parse_confidence) + .transpose()? + .or(default_min_confidence) + .unwrap_or(crate::domain::value_objects::Confidence::Low); + + let require_data_flow_evidence_for_dataflow = module_config + .config + .get("sast.require_data_flow_evidence_for_dataflow") + .and_then(serde_json::Value::as_bool) + .unwrap_or(default_config.require_data_flow_evidence_for_dataflow); + + let require_recommendation = module_config + .config + .get("sast.require_recommendation") + .and_then(serde_json::Value::as_bool) + .unwrap_or(default_config.require_recommendation); + + Ok(Self { + min_severity, + min_confidence, + require_data_flow_evidence_for_dataflow, + require_recommendation, + }) + } +} + +fn parse_severity(input: &str) -> Result { + match input.trim().to_ascii_lowercase().as_str() { + "critical" => Ok(SastSeverity::Critical), + "high" => Ok(SastSeverity::High), + "medium" => Ok(SastSeverity::Medium), + "low" => Ok(SastSeverity::Low), + "info" => Ok(SastSeverity::Info), + other => Err(ModuleExecutionError::InvalidConfig(format!( + "Invalid SAST severity threshold: {other}" + ))), + } +} + +fn parse_confidence( + input: &str, +) -> Result { + match input.trim().to_ascii_lowercase().as_str() { + "high" => Ok(crate::domain::value_objects::Confidence::High), + "medium" => Ok(crate::domain::value_objects::Confidence::Medium), + "low" => Ok(crate::domain::value_objects::Confidence::Low), + other => Err(ModuleExecutionError::InvalidConfig(format!( + "Invalid SAST confidence threshold: {other}" + ))), + } +} + +fn severity_rank(severity: &SastSeverity) -> u8 { + match severity { + SastSeverity::Info => 0, + SastSeverity::Low => 1, + SastSeverity::Medium => 2, + SastSeverity::High => 3, + SastSeverity::Critical => 4, + } +} + +fn confidence_rank(confidence: &crate::domain::value_objects::Confidence) -> u8 { + match confidence { + crate::domain::value_objects::Confidence::Low => 0, + crate::domain::value_objects::Confidence::Medium => 1, + crate::domain::value_objects::Confidence::High => 2, + } +} + +fn confidence_name(confidence: &crate::domain::value_objects::Confidence) -> &'static str { + match confidence { + crate::domain::value_objects::Confidence::Low => "low", + crate::domain::value_objects::Confidence::Medium => "medium", + crate::domain::value_objects::Confidence::High => "high", + } +} + +fn is_data_flow_rule(rule_id: &str) -> bool { + rule_id.starts_with("data-flow-") || rule_id.contains("dataflow") || rule_id.contains("taint") +} + impl Default for SastModule { fn default() -> Self { Self::new() diff --git a/vulnera-sast/tests/data/cve-fixtures/javascript/cve-2019-10744-lodash-prototype-pollution.yaml b/vulnera-sast/tests/data/cve-fixtures/javascript/cve-2019-10744-lodash-prototype-pollution.yaml new file mode 100644 index 00000000..d540cc2f --- /dev/null +++ b/vulnera-sast/tests/data/cve-fixtures/javascript/cve-2019-10744-lodash-prototype-pollution.yaml @@ -0,0 +1,58 @@ +# CVE-2019-10744: Prototype Pollution in lodash merge-style APIs +# Impact: Prototype manipulation leading to auth bypass / logic tampering + +id: "CVE-2019-10744" +name: "Lodash Merge Prototype Pollution" +language: "javascript" +vulnerability_type: "prototype_pollution" +severity: "high" +cwe: + - "CWE-1321" +impact: "Prototype chain poisoning can alter security-sensitive object behavior across the process" +description: | + Prototype pollution happens when untrusted object keys like __proto__, constructor, + or prototype are merged into application objects. This can corrupt global object + behavior and bypass authorization or validation logic. + +test_cases: + - name: "lodash merge with untrusted JSON payload" + vulnerable: true + code: | + const _ = require('lodash'); + + function mergeUserConfig(input) { + const defaults = { role: 'user' }; + const payload = JSON.parse(input); + return _.merge({}, defaults, payload); + } + expected_findings: + - rule_id: "prototype_pollution" + line: 6 + severity: "high" + message_contains: "merge" + + - name: "defaultsDeep with request body" + vulnerable: true + code: | + const _ = require('lodash'); + + function buildConfig(req) { + return _.defaultsDeep({}, req.body, { secure: true }); + } + expected_findings: + - rule_id: "lodash-prototype-pollution" + line: 4 + severity: "high" + + - name: "safe explicit allowlist assignment" + vulnerable: false + code: | + function safeMerge(input) { + const payload = JSON.parse(input); + const safe = { + theme: payload.theme, + language: payload.language, + }; + return { role: 'user', ...safe }; + } + expected_findings: [] diff --git a/vulnera-sast/tests/data/cve-fixtures/python/cve-2007-4559-tarfile-path-traversal.yaml b/vulnera-sast/tests/data/cve-fixtures/python/cve-2007-4559-tarfile-path-traversal.yaml new file mode 100644 index 00000000..bceea7b3 --- /dev/null +++ b/vulnera-sast/tests/data/cve-fixtures/python/cve-2007-4559-tarfile-path-traversal.yaml @@ -0,0 +1,53 @@ +# CVE-2007-4559: Python tarfile path traversal during extraction +# Impact: Arbitrary file overwrite via crafted archive member names + +id: "CVE-2007-4559" +name: "Python Tarfile Extraction Path Traversal" +language: "python" +vulnerability_type: "path_traversal" +severity: "critical" +cwe: + - "CWE-22" + - "CWE-73" +impact: "Attackers can overwrite files outside extraction directory by using ../ paths in tar headers" +description: | + Extracting attacker-controlled tar archives with extractall() can write files + outside the target directory unless member paths are validated. + +test_cases: + - name: "unsafe tar.extractall on untrusted upload" + vulnerable: true + code: | + import tarfile + + def restore_backup(archive_path, output_dir): + with tarfile.open(archive_path, 'r:gz') as tar: + tar.extractall(output_dir) + expected_findings: + - rule_id: "tarfile-path-traversal" + line: 5 + severity: "critical" + message_contains: "extract" + + - name: "unsafe tar.extractall with user-controlled destination" + vulnerable: true + code: | + import tarfile + + def unpack(archive, req): + with tarfile.open(archive, 'r') as tf: + tf.extractall(req.args.get('dest', '/tmp/out')) + expected_findings: + - rule_id: "path_traversal" + line: 5 + severity: "high" + + - name: "safe inspection only without extraction" + vulnerable: false + code: | + import tarfile + + def list_members(archive_path): + with tarfile.open(archive_path, 'r:*') as tf: + return [m.name for m in tf.getmembers()] + expected_findings: [] diff --git a/vulnera-secrets/Cargo.toml b/vulnera-secrets/Cargo.toml index c3f15d49..530e33e9 100644 --- a/vulnera-secrets/Cargo.toml +++ b/vulnera-secrets/Cargo.toml @@ -2,6 +2,7 @@ name = "vulnera-secrets" version.workspace = true edition.workspace = true +rust-version.workspace = true authors.workspace = true license.workspace = true repository.workspace = true diff --git a/vulnera-secrets/src/module.rs b/vulnera-secrets/src/module.rs index 4ac2d2d7..9acd5a9b 100644 --- a/vulnera-secrets/src/module.rs +++ b/vulnera-secrets/src/module.rs @@ -8,7 +8,7 @@ use vulnera_core::config::SecretDetectionConfig; use vulnera_core::domain::module::{ AnalysisModule, Finding, FindingConfidence, FindingSeverity, FindingType, Location, ModuleConfig, ModuleExecutionError, ModuleResult, ModuleResultMetadata, ModuleType, - SecretFindingMetadata, SecretVerificationState, + SecretFindingMetadata, SecretVerificationState, VulnerabilityFindingMetadata, }; use crate::application::use_cases::ScanForSecretsUseCase; @@ -108,6 +108,7 @@ impl AnalysisModule for SecretDetectionModule { entropy: f.entropy, evidence: f.evidence, }), + vulnerability_metadata: VulnerabilityFindingMetadata::default(), enrichment: None, }) .collect(); From 9d76c8c165d68c190093eb819c00384dee9ebaf9 Mon Sep 17 00:00:00 2001 From: k5602 <188656344+k5602@users.noreply.github.com> Date: Fri, 13 Feb 2026 19:07:31 +0200 Subject: [PATCH 3/9] chore: add workspace git hooks and docs index - Add .githooks/pre-commit for formatting, lint, and test enforcement - Add .pre-commit-config.yaml for pre-commit integration - Introduce docs/README.md as workspace onboarding and crate index - Update .gitignore to allow vulnera-cli workspace tracking - Remove tree-sitter-grep from Cargo.toml dependencies - Clean up README.md: add workspace docs link, remove team/community section - Update Dockerfile comment for NVD data directory - Expand CHANGELOG.md for CLI architecture, SAST remediation, and test coverage --- .githooks/pre-commit | 7 +++++ .gitignore | 1 - .pre-commit-config.yaml | 23 ++++++++++++++++ CHANGELOG.md | 21 +++++++++++++++ Cargo.toml | 1 - Dockerfile | 2 +- README.md | 39 +-------------------------- docs/README.md | 60 +++++++++++++++++++++++++++++++++++++++++ 8 files changed, 113 insertions(+), 41 deletions(-) create mode 100644 .githooks/pre-commit create mode 100644 .pre-commit-config.yaml create mode 100644 docs/README.md diff --git a/.githooks/pre-commit b/.githooks/pre-commit new file mode 100644 index 00000000..c9f4c8a0 --- /dev/null +++ b/.githooks/pre-commit @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +set -euo pipefail + +cargo fmt --all --check +cargo check +cargo clippy --all-targets --all-features -- -D warnings +cargo test --lib --tests diff --git a/.gitignore b/.gitignore index d4e2a2a7..9b2b4f6f 100644 --- a/.gitignore +++ b/.gitignore @@ -157,5 +157,4 @@ vulnera-sast/tests/snapshots/* /.vulnera_data curls.txt # Decoupled components -/vulnera-cli/ docs/modules.md diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..bf6ec6d4 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,23 @@ +repos: + - repo: local + hooks: + - id: cargo-fmt + name: cargo fmt + entry: cargo fmt --all + language: system + pass_filenames: false + - id: cargo-check + name: cargo check + entry: cargo check + language: system + pass_filenames: false + - id: cargo-clippy + name: cargo clippy + entry: cargo clippy --all-targets --all-features -- -D warnings + language: system + pass_filenames: false + - id: cargo-test + name: cargo test + entry: cargo test --lib --tests + language: system + pass_filenames: false diff --git a/CHANGELOG.md b/CHANGELOG.md index 719dc0fe..0b40d27e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,16 @@ The format is based on Keep a Changelog and this project adheres to Semantic Ver ## [0.5.1] - 2026-02-13 ### Added +- **CLI Architecture & UX (vulnera-cli):** + - Introduced application use-case orchestration for CLI command flows (`analyze`, `sast`, `secrets`, `deps`, `auth`, `quota`, `api`, `generate-fix`) to reduce command-layer coupling. + - Added shared CLI services for watch execution and scan-target resolution (changed files, explicit file list, excludes, language filters). + - Added user-facing project hook management via `vulnera config hooks` with `install`, `status`, and `remove` subcommands. + - Added backend selection for hook install (`git` or `pre-commit`) with idempotent managed blocks. +- **SAST Remediation Pipeline (vulnera-cli):** + - Implemented true bulk LLM-backed fix generation for `vulnera sast --fix`. + - Added remediation aggregation in SAST output including generated LLM fixes, SAST-native suggestions, and dependency upgrade suggestions. + - Added SARIF emission with fix payloads when bulk fixes are generated. + - **Dependency Analysis Improvements:** - Precise semver interval intersection in `VersionRange::overlaps_with` for better vulnerability matching. - Support for Git commit range matching in vulnerability checks via `matches_git_range`. @@ -50,6 +60,13 @@ The format is based on Keep a Changelog and this project adheres to Semantic Ver - Replaced `.map/.any` patterns with `.contains/.iter` where clearer. ### Fixed +- **CLI Quality & Reliability:** + - Unified API client construction in `CliContext` to eliminate per-command drift. + - Improved watch-mode safety by centralizing scan execution behavior. + - Replaced brittle CLI version assertion with package-version-based assertion in tests. +- **CLI Lint/Tooling:** + - Addressed strict clippy findings in CLI paths touched during refactor (including pointer-arg and conditional simplifications). + - **Robustness:** Added resilient regex compilation in API analyzers with graceful error handling. - **Error Handling:** Replaced unchecked `unwrap()` calls with explicit error propagation in `app.rs`. - **Startup:** Added validation for loaded configuration at startup with clear diagnostics. @@ -59,6 +76,10 @@ The format is based on Keep a Changelog and this project adheres to Semantic Ver ### Dependencies Added - `globset` for robust glob pattern matching. +### Tests +- Extended `vulnera-cli` command smoke tests to cover `config hooks` command surface and subcommand help. +- Added focused unit tests for SAST bulk-fix suggestion builders and dependency-suggestion deduplication logic. + ## [0.5.0] - 2026-02-11 ### Added diff --git a/Cargo.toml b/Cargo.toml index 8b873fef..3be78ece 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -209,7 +209,6 @@ tree-sitter-json = "0.24.8" tree-sitter-go = "0.25.0" tree-sitter-c = "0.24.1" tree-sitter-cpp = "0.23.4" -tree-sitter-grep = "0.1.0" globset = "0.4" oxc_allocator = "0.113.0" oxc_parser = "0.113.0" diff --git a/Dockerfile b/Dockerfile index b34256e2..6fbe1bbe 100644 --- a/Dockerfile +++ b/Dockerfile @@ -139,7 +139,7 @@ COPY --from=builder /app/migrations ./migrations COPY docker-entrypoint.sh /usr/local/bin/docker-entrypoint.sh RUN chmod +x /usr/local/bin/docker-entrypoint.sh -# Create NVD data directory (for SQLite database) +# Create NVD data directory RUN mkdir -p .vulnera_data && chown vulnera:vulnera .vulnera_data # Ensure PATH includes /usr/local/bin for any other subprocess calls diff --git a/README.md b/README.md index 6c99df74..9035ffd2 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ _Multi-ecosystem vulnerability analysis with dependency scanning, SAST, secrets detection, and API security auditing_ -[Quick Start](#-quick-start) • [Web Dashboard](https://vulnera.studio) • [Documentation](https://k5602.github.io/Vulnera/) +[Quick Start](#-quick-start) • [Web Dashboard](https://vulnera.studio) • [Documentation](https://k5602.github.io/Vulnera/) • [Workspace Index](docs/README.md) @@ -119,19 +119,6 @@ cargo run **Verify:** `curl http://localhost:3000/health` • **API Docs:** ---- - -## 📖 Documentation - -| Guide | Description | -| ------------------------------------------------------------------------------- | --------------------------------------- | -| [Quick Start](https://k5602.github.io/Vulnera/getting-started/quick-start.html) | Installation and first scan | -| [CLI Reference](https://k5602.github.io/Vulnera/guide/cli-reference.html) | Command-line usage | -| [Configuration](https://k5602.github.io/Vulnera/guide/configuration.html) | Environment variables and TOML config | -| [Authentication](https://k5602.github.io/Vulnera/guide/authentication.html) | JWT and API key setup | -| [Analysis Modules](https://k5602.github.io/Vulnera/modules/overview.html) | Module-specific documentation | -| [Architecture](https://k5602.github.io/Vulnera/architecture/overview.html) | System design and DDD patterns | -| [CI/CD Integration](https://k5602.github.io/Vulnera/integration/cicd.html) | GitHub Actions, GitLab CI, Azure DevOps | --- @@ -271,24 +258,6 @@ Azure Front Door → API Management → App Service/Container Apps --- -## 👥 Team & Community - -| Name | Role | -| ------------------------ | ------------------------------- | -| **Khaled Mahmoud** | Project Lead, Backend Developer | -| **Abd El-Rahman Mossad** | Frontend, LSP Server Maintainer | -| **Amr Medhat** | Cloud Engineer | -| **Gasser Mohammed** | Frontend Engineer | - -**Resources:** - -- **Web Dashboard:** [vulnera.studio](https://vulnera.studio) — Full-featured team collaboration platform -- **Documentation:** [k5602.github.io/Vulnera](https://k5602.github.io/Vulnera/) — Complete guides and tutorials -- **GitHub:** [k5602/Vulnera](https://github.com/k5602/Vulnera) — Source code and issues -- **Community:** [GitHub Discussions](https://github.com/k5602/Vulnera/discussions) — Q&A and feedback - ---- - ## 📜 License Core platform (server, analysis modules, orchestration): **Business Source License 1.1** with a @@ -299,9 +268,3 @@ CLI , Advisors and LSP (adapter): **AGPL-3.0-or-later**. See [LICENSE](./LICENSE) for details and commercial licensing. --- - -
- -**[Web Dashboard](https://vulnera.studio)** • **[Documentation](https://k5602.github.io/Vulnera/)** • **[API Reference](http://localhost:3000/docs)** • **[Contributing](CONTRIBUTING.md)** • **[Changelog](CHANGELOG.md)** - -
diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 00000000..49d86783 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,60 @@ +# Vulnera Workspace Docs Index + +This page is the onboarding index for contributors working in the monorepo. + +## Workspace Hook (Single Source) + +- Pre-commit hook config: `/.pre-commit-config.yaml` +- Native git hook script: `/.githooks/pre-commit` + +Use this once at the workspace root: + +```bash +chmod +x .githooks/pre-commit +git config core.hooksPath .githooks +``` + +## Crate Overview + +- Root server workspace: `/` (this repo) +- Adapter (LSP): `/adapter` +- Advisors: `/advisors` +- CLI: `/vulnera-cli` +- Core: `/vulnera-core` +- Orchestrator: `/vulnera-orchestrator` +- Dependencies module: `/vulnera-deps` +- SAST module: `/vulnera-sast` +- Secrets module: `/vulnera-secrets` +- API module: `/vulnera-api` +- LLM module: `/vulnera-llm` +- Sandbox module: `/vulnera-sandbox` + +## Crate Docs / Changelog Links + +### Root Workspace +- README: `/README.md` +- Changelog: `/CHANGELOG.md` +- Book/docs source: `/docs/src` + +### Adapter +- README: `/adapter/README.md` + +### Advisors +- README: `/advisors/README.md` +- Docs: `/advisors/docs/README.md` +- Changelog: `/advisors/CHANGELOG.md` + +### CLI +- README: `/vulnera-cli/README.md` +- Docs index: `/vulnera-cli/docs/README.md` +- Changelog: `/vulnera-cli/CHANGELOG.md` + +### SAST +- README: `/vulnera-sast/README.md` + +## Contributor Quick Path + +1. Start at `/README.md` and this file (`/docs/README.md`). +2. Set root git hooks once. +3. Use crate-local README/docs for module-specific workflows. +4. Track changes in `/CHANGELOG.md` (root) and crate-specific changelogs where applicable. From 10419a22bae6c1c1a4a40acc60471806989b6668 Mon Sep 17 00:00:00 2001 From: k5602 <188656344+k5602@users.noreply.github.com> Date: Fri, 13 Feb 2026 21:07:09 +0200 Subject: [PATCH 4/9] refactor: use let-chains to simplify conditionals - Replace nested if/let patterns with Rust let-chains to flatten conditionals across many modules (vulnera-api, vulnera-core, vulnera-deps, vulnera-orchestrator, vulnera-sast, vulnera-secrets, vulnera-llm, vulnera-deps, vulnera-adapter/cli tests, etc.) - Add unit tests for batch dependency analysis to validate request/response file_id contract - Update docs: add vulnera-advisor and vulnera-adapter to workspace overview - Import chrono::Utc where required and apply minor formatting/whitespace cleanups Preserve existing behavior; this is a readability/maintainability refactor with no breaking changes. --- .github/copilot-instructions.md | 24 +- vulnera-api/src/application/use_cases.rs | 8 +- .../analyzers/authorization_analyzer.rs | 32 +- .../resource_restriction_analyzer.rs | 8 +- .../infrastructure/parser/openapi_parser.rs | 189 +++++----- .../analytics/analytics_service.rs | 44 ++- vulnera-core/src/config/mod.rs | 2 +- vulnera-core/src/config/validation.rs | 24 +- .../auth/organization_member_repository.rs | 14 +- .../auth/organization_repository.rs | 17 +- .../infrastructure/auth/user_repository.rs | 24 +- vulnera-core/src/infrastructure/parsers/go.rs | 8 +- .../src/infrastructure/parsers/gradle_pest.rs | 126 ++++--- .../src/infrastructure/parsers/java.rs | 34 +- .../src/infrastructure/parsers/npm.rs | 109 +++--- .../src/infrastructure/parsers/php.rs | 9 +- .../src/infrastructure/parsers/python.rs | 70 ++-- .../src/infrastructure/parsers/ruby.rs | 37 +- .../src/infrastructure/parsers/rust.rs | 60 ++-- .../infrastructure/parsers/tree_sitter/go.rs | 8 +- .../parsers/tree_sitter/json.rs | 27 +- .../src/infrastructure/parsers/yarn_pest.rs | 63 ++-- .../infrastructure/rate_limiter/storage.rs | 24 +- .../registries/vulnera_registry_adapter.rs | 8 +- .../repository_source/github_client.rs | 26 +- .../repository_source/url_parser.rs | 8 +- .../vulnerability_advisor/mod.rs | 8 +- .../src/application/analysis_context.rs | 10 +- vulnera-deps/src/domain/dependency_graph.rs | 16 +- vulnera-deps/src/domain/source_location.rs | 8 +- .../src/services/repository_analysis.rs | 16 +- vulnera-deps/src/use_cases.rs | 42 +-- .../application/use_cases/enrich_findings.rs | 22 +- .../src/infrastructure/providers/google_ai.rs | 93 +++-- .../src/infrastructure/providers/openai.rs | 44 +-- .../src/infrastructure/providers/resilient.rs | 8 +- .../src/application/use_cases.rs | 8 +- .../src/infrastructure/job_queue.rs | 18 +- .../src/presentation/auth/controller.rs | 45 ++- .../src/presentation/controllers.rs | 330 +++++++++++++----- .../src/presentation/controllers/jobs.rs | 26 +- .../src/presentation/controllers/llm.rs | 8 +- .../src/presentation/middleware/mod.rs | 21 +- .../src/presentation/models.rs | 13 + .../tests/test_api_endpoints.rs | 97 ++++- vulnera-sast/examples/scale_benchmark.rs | 14 +- vulnera-sast/src/application/use_cases.rs | 177 +++++----- vulnera-sast/src/infrastructure/call_graph.rs | 164 +++++---- vulnera-sast/src/infrastructure/data_flow.rs | 41 ++- .../src/infrastructure/metavar_patterns.rs | 55 +-- .../src/infrastructure/oxc_frontend.rs | 5 +- vulnera-sast/src/infrastructure/sarif.rs | 24 +- vulnera-sast/src/infrastructure/scanner.rs | 31 +- .../src/infrastructure/symbol_table.rs | 104 +++--- vulnera-sast/src/module.rs | 3 +- vulnera-secrets/src/application/use_cases.rs | 21 +- vulnera-secrets/src/domain/value_objects.rs | 2 +- .../src/infrastructure/baseline/repository.rs | 23 +- .../infrastructure/detectors/ast_extractor.rs | 14 +- .../src/infrastructure/git/scanner.rs | 40 +-- vulnera-secrets/src/infrastructure/scanner.rs | 17 +- .../verification/aws_verifier.rs | 16 +- 62 files changed, 1396 insertions(+), 1191 deletions(-) diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 693b5b8b..d68b7e1f 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -17,7 +17,9 @@ vulnera-rust (binary - HTTP API server) │ └─ vulnera-llm [Gemini-powered explanations & auto-fixes] └─ vulnera-core [domain models, shared traits, infra, config] -vulnera-cli (standalone workspace - offline analysis + server API calls) +vulnera-cli (standalone workspace and repository - offline analysis cli client + server API calls) +vulnera-advisor (standalone workspace and repository - advisors crate + server API calls) +vulnera-adapter (standalone workspace and repository - lsp crate + server API calls) ``` **Composition Root**: `src/app.rs` is the **single composition root**. It delegates module setup to `src/modules/mod.rs` and wires all use cases, repositories, caches, and HTTP state. Never instantiate services (PgPool, Cache, etc.) inside crate internals—wire everything at the top level and inject via `Arc`. @@ -31,16 +33,16 @@ vulnera-cli (standalone workspace - offline analysis + server API calls) ## Critical Files & Patterns -| Task | Key Files | Pattern | -| :----------------- | :------------------------------------------------------------------------ | :---------------------------------------------------------------------------------------- | -| **New Module** | `vulnera-core/.../traits.rs`, `src/modules/mod.rs` | Implement `AnalysisModule`; register in `ModuleRegistry` | -| **Sandbox Policy** | `vulnera-sandbox/src/domain/policy.rs`, `.../application/use_cases.rs` | Build `SandboxPolicy::for_profile(SandboxPolicyProfile::...)`; execution via `SandboxExecutor` | -| **SAST Rules** | `vulnera-sast/src/infrastructure/rules/` | Tree-sitter queries + visitor pattern for taint/data-flow | -| **Job Lifecycle** | `vulnera-orchestrator/src/infrastructure/job_queue.rs` | Dragonfly-backed queue -> worker pool -> `ExecuteAnalysisJobUseCase` -> Sandbox | -| **Job Storage** | `vulnera-orchestrator/src/infrastructure/job_store/` | Persist snapshots (`FindingsSummary`, metadata) with optional webhook delivery | -| **Module Selection** | `vulnera-orchestrator/src/infrastructure/module_selector.rs` | `RuleBasedModuleSelector` decides modules by `AnalysisDepth` + project metadata | -| **Auth/API Keys** | `vulnera-core/src/infrastructure/auth/`, `.../presentation/auth/` | JWT + Argon2; cookie auth with CSRF; API key endpoints under `/api/v1/auth/api-keys` | -| **Database** | `migrations/`, `vulnera-core/.../infrastructure/` | SQLx `query!` macros (compile-time checked); `IEntityRepository` traits | +| Task | Key Files | Pattern | +| :------------------- | :--------------------------------------------------------------------- | :--------------------------------------------------------------------------------------------- | +| **New Module** | `vulnera-core/.../traits.rs`, `src/modules/mod.rs` | Implement `AnalysisModule`; register in `ModuleRegistry` | +| **Sandbox Policy** | `vulnera-sandbox/src/domain/policy.rs`, `.../application/use_cases.rs` | Build `SandboxPolicy::for_profile(SandboxPolicyProfile::...)`; execution via `SandboxExecutor` | +| **SAST Rules** | `vulnera-sast/src/infrastructure/rules/` | Tree-sitter queries + visitor pattern for taint/data-flow | +| **Job Lifecycle** | `vulnera-orchestrator/src/infrastructure/job_queue.rs` | Dragonfly-backed queue -> worker pool -> `ExecuteAnalysisJobUseCase` -> Sandbox | +| **Job Storage** | `vulnera-orchestrator/src/infrastructure/job_store/` | Persist snapshots (`FindingsSummary`, metadata) with optional webhook delivery | +| **Module Selection** | `vulnera-orchestrator/src/infrastructure/module_selector.rs` | `RuleBasedModuleSelector` decides modules by `AnalysisDepth` + project metadata | +| **Auth/API Keys** | `vulnera-core/src/infrastructure/auth/`, `.../presentation/auth/` | JWT + Argon2; cookie auth with CSRF; API key endpoints under `/api/v1/auth/api-keys` | +| **Database** | `migrations/`, `vulnera-core/.../infrastructure/` | SQLx `query!` macros (compile-time checked); `IEntityRepository` traits | ## Analysis Capabilities diff --git a/vulnera-api/src/application/use_cases.rs b/vulnera-api/src/application/use_cases.rs index 12756bd2..abb66fba 100644 --- a/vulnera-api/src/application/use_cases.rs +++ b/vulnera-api/src/application/use_cases.rs @@ -128,10 +128,10 @@ impl ScanApiSpecificationUseCase { if !self.config.severity_overrides.is_empty() { for finding in &mut all_findings { let vuln_type_str = format!("{:?}", finding.vulnerability_type); - if let Some(severity_str) = self.config.severity_overrides.get(&vuln_type_str) { - if let Ok(new_severity) = Self::parse_severity(severity_str) { - finding.severity = new_severity; - } + if let Some(severity_str) = self.config.severity_overrides.get(&vuln_type_str) + && let Ok(new_severity) = Self::parse_severity(severity_str) + { + finding.severity = new_severity; } } } diff --git a/vulnera-api/src/infrastructure/analyzers/authorization_analyzer.rs b/vulnera-api/src/infrastructure/analyzers/authorization_analyzer.rs index a1e0e833..e142e45e 100644 --- a/vulnera-api/src/infrastructure/analyzers/authorization_analyzer.rs +++ b/vulnera-api/src/infrastructure/analyzers/authorization_analyzer.rs @@ -55,22 +55,23 @@ impl AuthorizationAnalyzer { } // BOLA Detection: ID in path but generic or missing scopes - if let Some(id_pattern) = &id_pattern { - if id_pattern.is_match(&path.path) { - let is_bola_prone = if !has_scopes { - true - } else { - // Check if scopes are generic - let generic_scopes = ["read", "write", "user", "access"]; - operation.security.iter().any(|req| { - req.scopes - .iter() - .any(|s| generic_scopes.contains(&s.as_str())) - }) - }; + if let Some(id_pattern) = &id_pattern + && id_pattern.is_match(&path.path) + { + let is_bola_prone = if !has_scopes { + true + } else { + // Check if scopes are generic + let generic_scopes = ["read", "write", "user", "access"]; + operation.security.iter().any(|req| { + req.scopes + .iter() + .any(|s| generic_scopes.contains(&s.as_str())) + }) + }; - if is_bola_prone { - findings.push(ApiFinding { + if is_bola_prone { + findings.push(ApiFinding { id: format!("bola-risk-{}-{}", path.path, operation.method), vulnerability_type: ApiVulnerabilityType::BolaRisk, location: ApiLocation { @@ -88,7 +89,6 @@ impl AuthorizationAnalyzer { path: Some(path.path.clone()), method: Some(operation.method.clone()), }); - } } } } diff --git a/vulnera-api/src/infrastructure/analyzers/resource_restriction_analyzer.rs b/vulnera-api/src/infrastructure/analyzers/resource_restriction_analyzer.rs index 29b6372b..86f122ef 100644 --- a/vulnera-api/src/infrastructure/analyzers/resource_restriction_analyzer.rs +++ b/vulnera-api/src/infrastructure/analyzers/resource_restriction_analyzer.rs @@ -51,10 +51,10 @@ impl ResourceRestrictionAnalyzer { for response in &operation.responses { if response.status_code.starts_with('2') { for content in &response.content { - if let Some(schema) = &content.schema { - if Self::schema_contains_array(schema) { - returns_array = true; - } + if let Some(schema) = &content.schema + && Self::schema_contains_array(schema) + { + returns_array = true; } } // Check for rate limit headers in 2xx or 429 response diff --git a/vulnera-api/src/infrastructure/parser/openapi_parser.rs b/vulnera-api/src/infrastructure/parser/openapi_parser.rs index f8bf390f..8b21e6dd 100644 --- a/vulnera-api/src/infrastructure/parser/openapi_parser.rs +++ b/vulnera-api/src/infrastructure/parser/openapi_parser.rs @@ -881,25 +881,25 @@ impl OpenApiParser { Some(spec) }; - if let Some(obj) = target { - if let Some(security_array) = obj.get("security").and_then(|s| s.as_array()) { - for security_item in security_array { - if let Some(security_obj) = security_item.as_object() { - for (scheme_name, scopes_value) in security_obj { - let scopes = if let Some(scopes_array) = scopes_value.as_array() { - scopes_array - .iter() - .filter_map(|s| s.as_str().map(|s| s.to_string())) - .collect() - } else { - Vec::new() - }; + if let Some(obj) = target + && let Some(security_array) = obj.get("security").and_then(|s| s.as_array()) + { + for security_item in security_array { + if let Some(security_obj) = security_item.as_object() { + for (scheme_name, scopes_value) in security_obj { + let scopes = if let Some(scopes_array) = scopes_value.as_array() { + scopes_array + .iter() + .filter_map(|s| s.as_str().map(|s| s.to_string())) + .collect() + } else { + Vec::new() + }; - requirements.push(SecurityRequirement { - scheme_name: scheme_name.clone(), - scopes, - }); - } + requirements.push(SecurityRequirement { + scheme_name: scheme_name.clone(), + scopes, + }); } } } @@ -933,40 +933,36 @@ impl OpenApiParser { for op_method in &[ "get", "post", "put", "delete", "patch", "head", "options", "trace", ] { - if let Some(op_value) = path_obj.get(*op_method) { - if let Some(op_obj) = op_value.as_object() { - if let Some(security_array) = - op_obj.get("security").and_then(|s| s.as_array()) - { - let mut op_requirements = Vec::new(); - for security_item in security_array { - if let Some(security_obj) = security_item.as_object() { - for (scheme_name, scopes_value) in security_obj { - let scopes = if let Some(scopes_array) = - scopes_value.as_array() - { - scopes_array - .iter() - .filter_map(|s| { - s.as_str().map(|s| s.to_string()) - }) - .collect() - } else { - Vec::new() - }; - - op_requirements.push(SecurityRequirement { - scheme_name: scheme_name.clone(), - scopes, - }); - } - } - } - if !op_requirements.is_empty() { - path_ops.insert(op_method.to_string(), op_requirements); + if let Some(op_value) = path_obj.get(*op_method) + && let Some(op_obj) = op_value.as_object() + && let Some(security_array) = + op_obj.get("security").and_then(|s| s.as_array()) + { + let mut op_requirements = Vec::new(); + for security_item in security_array { + if let Some(security_obj) = security_item.as_object() { + for (scheme_name, scopes_value) in security_obj { + let scopes = if let Some(scopes_array) = + scopes_value.as_array() + { + scopes_array + .iter() + .filter_map(|s| s.as_str().map(|s| s.to_string())) + .collect() + } else { + Vec::new() + }; + + op_requirements.push(SecurityRequirement { + scheme_name: scheme_name.clone(), + scopes, + }); } } } + if !op_requirements.is_empty() { + path_ops.insert(op_method.to_string(), op_requirements); + } } } } @@ -987,59 +983,38 @@ impl OpenApiParser { ) -> std::collections::HashMap> { let mut result = std::collections::HashMap::new(); - if let Some(components) = spec.get("components") { - if let Some(security_schemes) = components.get("securitySchemes") { - if let Some(schemes_obj) = security_schemes.as_object() { - for (scheme_name, scheme_value) in schemes_obj { - if let Some(scheme_obj) = scheme_value.as_object() { - if let Some(type_str) = scheme_obj.get("type").and_then(|t| t.as_str()) - { - if type_str == "oauth2" { - if let Some(flows_obj) = - scheme_obj.get("flows").and_then(|f| f.as_object()) - { - let mut flow_urls = std::collections::HashMap::new(); - - // Extract clientCredentials token URL - if let Some(client_creds) = - flows_obj.get("clientCredentials") - { - if let Some(client_creds_obj) = client_creds.as_object() - { - if let Some(token_url) = client_creds_obj - .get("tokenUrl") - .and_then(|u| u.as_str()) - { - flow_urls.insert( - "clientCredentials".to_string(), - token_url.to_string(), - ); - } - } - } - - // Extract password token URL - if let Some(password) = flows_obj.get("password") { - if let Some(password_obj) = password.as_object() { - if let Some(token_url) = password_obj - .get("tokenUrl") - .and_then(|u| u.as_str()) - { - flow_urls.insert( - "password".to_string(), - token_url.to_string(), - ); - } - } - } - - if !flow_urls.is_empty() { - result.insert(scheme_name.clone(), flow_urls); - } - } - } - } - } + if let Some(components) = spec.get("components") + && let Some(security_schemes) = components.get("securitySchemes") + && let Some(schemes_obj) = security_schemes.as_object() + { + for (scheme_name, scheme_value) in schemes_obj { + if let Some(scheme_obj) = scheme_value.as_object() + && let Some(type_str) = scheme_obj.get("type").and_then(|t| t.as_str()) + && type_str == "oauth2" + && let Some(flows_obj) = scheme_obj.get("flows").and_then(|f| f.as_object()) + { + let mut flow_urls = std::collections::HashMap::new(); + + // Extract clientCredentials token URL + if let Some(client_creds) = flows_obj.get("clientCredentials") + && let Some(client_creds_obj) = client_creds.as_object() + && let Some(token_url) = + client_creds_obj.get("tokenUrl").and_then(|u| u.as_str()) + { + flow_urls.insert("clientCredentials".to_string(), token_url.to_string()); + } + + // Extract password token URL + if let Some(password) = flows_obj.get("password") + && let Some(password_obj) = password.as_object() + && let Some(token_url) = + password_obj.get("tokenUrl").and_then(|u| u.as_str()) + { + flow_urls.insert("password".to_string(), token_url.to_string()); + } + + if !flow_urls.is_empty() { + result.insert(scheme_name.clone(), flow_urls); } } } @@ -1052,11 +1027,11 @@ impl OpenApiParser { fn extract_schemas_from_json(spec: &JsonValue) -> SchemaMap { let mut schemas = SchemaMap::new(); - if let Some(components) = spec.get("components") { - if let Some(schemas_obj) = components.get("schemas").and_then(|s| s.as_object()) { - for (schema_name, schema_def) in schemas_obj { - schemas.insert(schema_name.clone(), schema_def.clone()); - } + if let Some(components) = spec.get("components") + && let Some(schemas_obj) = components.get("schemas").and_then(|s| s.as_object()) + { + for (schema_name, schema_def) in schemas_obj { + schemas.insert(schema_name.clone(), schema_def.clone()); } } diff --git a/vulnera-core/src/application/analytics/analytics_service.rs b/vulnera-core/src/application/analytics/analytics_service.rs index d6887213..141e47fa 100644 --- a/vulnera-core/src/application/analytics/analytics_service.rs +++ b/vulnera-core/src/application/analytics/analytics_service.rs @@ -256,29 +256,27 @@ impl AnalyticsAggregationService { .await?; // Also track personal stats when subject is organization and user_id is available - if let (StatsSubject::Organization(_), Some(user)) = (&subject, &user_id) { - if self.enable_user_level_tracking { - if let Err(e) = self - .add_findings_for_subject( - &StatsSubject::User(*user), - &year_month, - findings.critical, - findings.high, - findings.medium, - findings.low, - findings.info, - ) - .await - { - // Log but don't fail - personal stats are secondary - tracing::warn!( - job_id = %job_id, - user_id = %user, - error = %e, - "Failed to record personal stats (non-fatal)" - ); - } - } + if let (StatsSubject::Organization(_), Some(user)) = (&subject, &user_id) + && self.enable_user_level_tracking + && let Err(e) = self + .add_findings_for_subject( + &StatsSubject::User(*user), + &year_month, + findings.critical, + findings.high, + findings.medium, + findings.low, + findings.info, + ) + .await + { + // Log but don't fail - personal stats are secondary + tracing::warn!( + job_id = %job_id, + user_id = %user, + error = %e, + "Failed to record personal stats (non-fatal)" + ); } Ok(()) diff --git a/vulnera-core/src/config/mod.rs b/vulnera-core/src/config/mod.rs index f59902d0..4859c7c4 100644 --- a/vulnera-core/src/config/mod.rs +++ b/vulnera-core/src/config/mod.rs @@ -763,7 +763,7 @@ impl Default for SastConfig { per_file_timeout_seconds: Some(30), scan_timeout_seconds: None, // No overall limit by default max_findings_per_file: Some(100), - max_total_findings: None, // No limit by default + max_total_findings: None, // No limit by default js_ts_frontend: Some("oxc_preferred".to_string()), min_finding_severity: Some("info".to_string()), min_finding_confidence: Some("low".to_string()), diff --git a/vulnera-core/src/config/validation.rs b/vulnera-core/src/config/validation.rs index 35ab406f..47612ae9 100644 --- a/vulnera-core/src/config/validation.rs +++ b/vulnera-core/src/config/validation.rs @@ -407,12 +407,12 @@ impl Validate for SecretDetectionConfig { } // Validate max_commits_to_scan > 0 if Some - if let Some(max_commits) = self.max_commits_to_scan { - if max_commits == 0 { - return Err(ValidationError::secret_detection( - "max_commits_to_scan must be greater than 0 if specified".to_string(), - )); - } + if let Some(max_commits) = self.max_commits_to_scan + && max_commits == 0 + { + return Err(ValidationError::secret_detection( + "max_commits_to_scan must be greater than 0 if specified".to_string(), + )); } // Validate file_read_timeout_seconds > 0 @@ -451,12 +451,12 @@ impl Validate for SecretDetectionConfig { } // Validate scan_timeout_seconds > 0 if Some - if let Some(scan_timeout) = self.scan_timeout_seconds { - if scan_timeout == 0 { - return Err(ValidationError::secret_detection( - "scan_timeout_seconds must be greater than 0 if specified".to_string(), - )); - } + if let Some(scan_timeout) = self.scan_timeout_seconds + && scan_timeout == 0 + { + return Err(ValidationError::secret_detection( + "scan_timeout_seconds must be greater than 0 if specified".to_string(), + )); } Ok(()) diff --git a/vulnera-core/src/infrastructure/auth/organization_member_repository.rs b/vulnera-core/src/infrastructure/auth/organization_member_repository.rs index 38c161d9..8b76d245 100644 --- a/vulnera-core/src/infrastructure/auth/organization_member_repository.rs +++ b/vulnera-core/src/infrastructure/auth/organization_member_repository.rs @@ -45,13 +45,13 @@ impl IOrganizationMemberRepository for SqlxOrganizationMemberRepository { .await .map_err(|e| { tracing::error!("Database error adding member: {}", e); - if let Some(db_err) = e.as_database_error() { - if db_err.constraint() == Some("idx_organization_members_org_user") { - return OrganizationError::AlreadyMember { - user_id: user_id.to_string(), - org_id: org_id.to_string(), - }; - } + if let Some(db_err) = e.as_database_error() + && db_err.constraint() == Some("idx_organization_members_org_user") + { + return OrganizationError::AlreadyMember { + user_id: user_id.to_string(), + org_id: org_id.to_string(), + }; } OrganizationError::DatabaseError { message: e.to_string(), diff --git a/vulnera-core/src/infrastructure/auth/organization_repository.rs b/vulnera-core/src/infrastructure/auth/organization_repository.rs index 4365e092..e9ae82e3 100644 --- a/vulnera-core/src/infrastructure/auth/organization_repository.rs +++ b/vulnera-core/src/infrastructure/auth/organization_repository.rs @@ -165,13 +165,12 @@ impl IOrganizationRepository for SqlxOrganizationRepository { .await .map_err(|e| { tracing::error!("Database error creating organization: {}", e); - if let Some(db_err) = e.as_database_error() { - if db_err.constraint() == Some("idx_organizations_owner_name") { + if let Some(db_err) = e.as_database_error() + && db_err.constraint() == Some("idx_organizations_owner_name") { return OrganizationError::NameAlreadyExists { name: org.name.clone(), }; } - } OrganizationError::DatabaseError { message: e.to_string(), } @@ -203,12 +202,12 @@ impl IOrganizationRepository for SqlxOrganizationRepository { .await .map_err(|e| { tracing::error!("Database error updating organization: {}", e); - if let Some(db_err) = e.as_database_error() { - if db_err.constraint() == Some("idx_organizations_owner_name") { - return OrganizationError::NameAlreadyExists { - name: org.name.clone(), - }; - } + if let Some(db_err) = e.as_database_error() + && db_err.constraint() == Some("idx_organizations_owner_name") + { + return OrganizationError::NameAlreadyExists { + name: org.name.clone(), + }; } OrganizationError::DatabaseError { message: e.to_string(), diff --git a/vulnera-core/src/infrastructure/auth/user_repository.rs b/vulnera-core/src/infrastructure/auth/user_repository.rs index 8a1ad16d..bd2e3b8e 100644 --- a/vulnera-core/src/infrastructure/auth/user_repository.rs +++ b/vulnera-core/src/infrastructure/auth/user_repository.rs @@ -195,12 +195,12 @@ impl IUserRepository for SqlxUserRepository { .await .map_err(|e| { tracing::error!("Database error creating user: {}", e); - if let Some(db_err) = e.as_database_error() { - if db_err.constraint() == Some("users_email_key") { - return AuthError::EmailAlreadyExists { - email: email_str.to_string(), - }; - } + if let Some(db_err) = e.as_database_error() + && db_err.constraint() == Some("users_email_key") + { + return AuthError::EmailAlreadyExists { + email: email_str.to_string(), + }; } AuthError::InvalidEmail { email: email_str.to_string(), @@ -249,12 +249,12 @@ impl IUserRepository for SqlxUserRepository { .await .map_err(|e| { tracing::error!("Database error updating user: {}", e); - if let Some(db_err) = e.as_database_error() { - if db_err.constraint() == Some("users_email_key") { - return AuthError::EmailAlreadyExists { - email: email_str.to_string(), - }; - } + if let Some(db_err) = e.as_database_error() + && db_err.constraint() == Some("users_email_key") + { + return AuthError::EmailAlreadyExists { + email: email_str.to_string(), + }; } AuthError::UserIdNotFound { user_id: user.user_id.as_str(), diff --git a/vulnera-core/src/infrastructure/parsers/go.rs b/vulnera-core/src/infrastructure/parsers/go.rs index a068399e..6505e792 100644 --- a/vulnera-core/src/infrastructure/parsers/go.rs +++ b/vulnera-core/src/infrastructure/parsers/go.rs @@ -45,10 +45,10 @@ impl GoModParser { } // Parse require statements - if line.starts_with("require ") || in_require_block { - if let Some(package) = self.parse_require_line(line)? { - packages.push(package); - } + if (line.starts_with("require ") || in_require_block) + && let Some(package) = self.parse_require_line(line)? + { + packages.push(package); } } diff --git a/vulnera-core/src/infrastructure/parsers/gradle_pest.rs b/vulnera-core/src/infrastructure/parsers/gradle_pest.rs index 75fff032..23d02ae5 100644 --- a/vulnera-core/src/infrastructure/parsers/gradle_pest.rs +++ b/vulnera-core/src/infrastructure/parsers/gradle_pest.rs @@ -154,11 +154,11 @@ impl GradlePestParser { pest_impl::Rule::enclosed_coord => { // enclosed_coord contains a quoted_string for inner in p.into_inner() { - if let pest_impl::Rule::quoted_string = inner.as_rule() { - if let Some(t) = Self::parse_coord_string(inner.as_str()) { - result = Some(t); - break; - } + if let pest_impl::Rule::quoted_string = inner.as_rule() + && let Some(t) = Self::parse_coord_string(inner.as_str()) + { + result = Some(t); + break; } } if result.is_some() { @@ -219,10 +219,10 @@ impl GradlePestParser { let name = format!("{}:{}", g, a); let cleaned = Self::clean_version(v); let version = Version::parse(&cleaned).unwrap_or_else(|_| Version::new(0, 0, 0)); - if seen.insert((name.clone(), version.to_string())) { - if let Ok(pkg) = Package::new(name.clone(), version.clone(), Ecosystem::Maven) { - out.push(pkg); - } + if seen.insert((name.clone(), version.to_string())) + && let Ok(pkg) = Package::new(name.clone(), version.clone(), Ecosystem::Maven) + { + out.push(pkg); } } } @@ -243,10 +243,10 @@ impl GradlePestParser { let name = format!("{}:{}", g, a); let cleaned = Self::clean_version(v); let version = Version::parse(&cleaned).unwrap_or_else(|_| Version::new(0, 0, 0)); - if seen.insert((name.clone(), version.to_string())) { - if let Ok(pkg) = Package::new(name.clone(), version.clone(), Ecosystem::Maven) { - out.push(pkg); - } + if seen.insert((name.clone(), version.to_string())) + && let Ok(pkg) = Package::new(name.clone(), version.clone(), Ecosystem::Maven) + { + out.push(pkg); } } } @@ -267,10 +267,10 @@ impl GradlePestParser { let name = format!("{}:{}", g, a); let cleaned = Self::clean_version(v); let version = Version::parse(&cleaned).unwrap_or_else(|_| Version::new(0, 0, 0)); - if seen.insert((name.clone(), version.to_string())) { - if let Ok(pkg) = Package::new(name.clone(), version.clone(), Ecosystem::Maven) { - out.push(pkg); - } + if seen.insert((name.clone(), version.to_string())) + && let Ok(pkg) = Package::new(name.clone(), version.clone(), Ecosystem::Maven) + { + out.push(pkg); } } } @@ -291,10 +291,10 @@ impl GradlePestParser { let name = format!("{}:{}", g, a); let cleaned = Self::clean_version(v); let version = Version::parse(&cleaned).unwrap_or_else(|_| Version::new(0, 0, 0)); - if seen.insert((name.clone(), version.to_string())) { - if let Ok(pkg) = Package::new(name.clone(), version.clone(), Ecosystem::Maven) { - out.push(pkg); - } + if seen.insert((name.clone(), version.to_string())) + && let Ok(pkg) = Package::new(name.clone(), version.clone(), Ecosystem::Maven) + { + out.push(pkg); } } } @@ -315,10 +315,10 @@ impl GradlePestParser { let name = format!("{}:{}", g, a); let cleaned = Self::clean_version(v); let version = Version::parse(&cleaned).unwrap_or_else(|_| Version::new(0, 0, 0)); - if seen.insert((name.clone(), version.to_string())) { - if let Ok(pkg) = Package::new(name.clone(), version.clone(), Ecosystem::Maven) { - out.push(pkg); - } + if seen.insert((name.clone(), version.to_string())) + && let Ok(pkg) = Package::new(name.clone(), version.clone(), Ecosystem::Maven) + { + out.push(pkg); } } } @@ -345,21 +345,21 @@ impl PackageFileParser for GradlePestParser { match inner.as_rule() { pest_impl::Rule::dependencies_block => { for stmt in inner.into_inner() { - if stmt.as_rule() == pest_impl::Rule::dep_stmt { - if let Some((g, a, v)) = Self::process_dep_stmt(stmt) { - let name = format!("{}:{}", g, a); - let cleaned = Self::clean_version(&v); - let version = Version::parse(&cleaned) - .unwrap_or_else(|_| Version::new(0, 0, 0)); - if seen.insert((name.clone(), version.to_string())) { - if let Ok(pkg) = Package::new( - name.clone(), - version.clone(), - Ecosystem::Maven, - ) { - packages.push(pkg); - } - } + if stmt.as_rule() == pest_impl::Rule::dep_stmt + && let Some((g, a, v)) = Self::process_dep_stmt(stmt) + { + let name = format!("{}:{}", g, a); + let cleaned = Self::clean_version(&v); + let version = Version::parse(&cleaned) + .unwrap_or_else(|_| Version::new(0, 0, 0)); + if seen.insert((name.clone(), version.to_string())) + && let Ok(pkg) = Package::new( + name.clone(), + version.clone(), + Ecosystem::Maven, + ) + { + packages.push(pkg); } } } @@ -370,14 +370,14 @@ impl PackageFileParser for GradlePestParser { let cleaned = Self::clean_version(&v); let version = Version::parse(&cleaned) .unwrap_or_else(|_| Version::new(0, 0, 0)); - if seen.insert((name.clone(), version.to_string())) { - if let Ok(pkg) = Package::new( + if seen.insert((name.clone(), version.to_string())) + && let Ok(pkg) = Package::new( name.clone(), version.clone(), Ecosystem::Maven, - ) { - packages.push(pkg); - } + ) + { + packages.push(pkg); } } } @@ -387,21 +387,18 @@ impl PackageFileParser for GradlePestParser { } pest_impl::Rule::dependencies_block => { for stmt in top.into_inner() { - if stmt.as_rule() == pest_impl::Rule::dep_stmt { - if let Some((g, a, v)) = Self::process_dep_stmt(stmt) { - let name = format!("{}:{}", g, a); - let cleaned = Self::clean_version(&v); - let version = Version::parse(&cleaned) - .unwrap_or_else(|_| Version::new(0, 0, 0)); - if seen.insert((name.clone(), version.to_string())) { - if let Ok(pkg) = Package::new( - name.clone(), - version.clone(), - Ecosystem::Maven, - ) { - packages.push(pkg); - } - } + if stmt.as_rule() == pest_impl::Rule::dep_stmt + && let Some((g, a, v)) = Self::process_dep_stmt(stmt) + { + let name = format!("{}:{}", g, a); + let cleaned = Self::clean_version(&v); + let version = + Version::parse(&cleaned).unwrap_or_else(|_| Version::new(0, 0, 0)); + if seen.insert((name.clone(), version.to_string())) + && let Ok(pkg) = + Package::new(name.clone(), version.clone(), Ecosystem::Maven) + { + packages.push(pkg); } } } @@ -412,12 +409,11 @@ impl PackageFileParser for GradlePestParser { let cleaned = Self::clean_version(&v); let version = Version::parse(&cleaned).unwrap_or_else(|_| Version::new(0, 0, 0)); - if seen.insert((name.clone(), version.to_string())) { - if let Ok(pkg) = + if seen.insert((name.clone(), version.to_string())) + && let Ok(pkg) = Package::new(name.clone(), version.clone(), Ecosystem::Maven) - { - packages.push(pkg); - } + { + packages.push(pkg); } } } diff --git a/vulnera-core/src/infrastructure/parsers/java.rs b/vulnera-core/src/infrastructure/parsers/java.rs index 97a3ff1c..e7dcc14e 100644 --- a/vulnera-core/src/infrastructure/parsers/java.rs +++ b/vulnera-core/src/infrastructure/parsers/java.rs @@ -108,20 +108,18 @@ impl MavenParser { } } Ok(Event::Text(t)) => { - if in_dependency { - if let Some(tag) = current_tag.as_deref() { - let txt = reader - .decoder() - .decode(t.as_ref()) - .unwrap_or_default() - .trim() - .to_string(); - match tag { - "groupId" => group_id = Some(txt.trim().to_string()), - "artifactId" => artifact_id = Some(txt.trim().to_string()), - "version" => version_str = Some(txt.trim().to_string()), - _ => {} - } + if in_dependency && let Some(tag) = current_tag.as_deref() { + let txt = reader + .decoder() + .decode(t.as_ref()) + .unwrap_or_default() + .trim() + .to_string(); + match tag { + "groupId" => group_id = Some(txt.trim().to_string()), + "artifactId" => artifact_id = Some(txt.trim().to_string()), + "version" => version_str = Some(txt.trim().to_string()), + _ => {} } } } @@ -254,10 +252,10 @@ impl MavenParser { .trim() .to_string(); - if let Some(property) = current_property.as_ref() { - if !value.is_empty() { - properties.insert(property.clone(), value.clone()); - } + if let Some(property) = current_property.as_ref() + && !value.is_empty() + { + properties.insert(property.clone(), value.clone()); } if stack.len() == 2 diff --git a/vulnera-core/src/infrastructure/parsers/npm.rs b/vulnera-core/src/infrastructure/parsers/npm.rs index 2b5599f6..4014d556 100644 --- a/vulnera-core/src/infrastructure/parsers/npm.rs +++ b/vulnera-core/src/infrastructure/parsers/npm.rs @@ -315,17 +315,14 @@ impl PackageLockParser { // Recursively process nested dependencies (physical tree) // Skip for lockfileVersion 2/3 as they use a flat packages structure instead of nested dependencies - if !is_packages_section { - if let Some(nested_deps) = dep_info.get("dependencies") { - // Recurse when v1 dependencies map contains nested package objects. - if let Some(deps_obj) = nested_deps.as_object() { - if deps_obj.values().any(Value::is_object) { - let nested_result = - Self::extract_lockfile_data(nested_deps, false)?; - packages.extend(nested_result.packages); - dependencies.extend(nested_result.dependencies); - } - } + if !is_packages_section && let Some(nested_deps) = dep_info.get("dependencies") { + // Recurse when v1 dependencies map contains nested package objects. + if let Some(deps_obj) = nested_deps.as_object() + && deps_obj.values().any(Value::is_object) + { + let nested_result = Self::extract_lockfile_data(nested_deps, false)?; + packages.extend(nested_result.packages); + dependencies.extend(nested_result.dependencies); } } } @@ -464,31 +461,31 @@ impl YarnLockParser { if indent == 0 { // New entry // Save previous - if let Some(version) = ¤t_version { - if let Ok(parsed_version) = Version::parse(version) { - for name in ¤t_package_names { - // Name might be "pkg@range", extract just name - let pkg_name = if let Some(at_pos) = name.rfind('@') { - if at_pos > 0 { &name[..at_pos] } else { name } - } else { - name - }; - - if let Ok(package) = Package::new( - pkg_name.to_string(), - parsed_version.clone(), - Ecosystem::Npm, - ) { - packages.push(package.clone()); - - // Add dependencies - for (dep_name, dep_req) in ¤t_dependencies { - pending_dependencies.push(( - package.clone(), - dep_name.clone(), - dep_req.clone(), - )); - } + if let Some(version) = ¤t_version + && let Ok(parsed_version) = Version::parse(version) + { + for name in ¤t_package_names { + // Name might be "pkg@range", extract just name + let pkg_name = if let Some(at_pos) = name.rfind('@') { + if at_pos > 0 { &name[..at_pos] } else { name } + } else { + name + }; + + if let Ok(package) = Package::new( + pkg_name.to_string(), + parsed_version.clone(), + Ecosystem::Npm, + ) { + packages.push(package.clone()); + + // Add dependencies + for (dep_name, dep_req) in ¤t_dependencies { + pending_dependencies.push(( + package.clone(), + dep_name.clone(), + dep_req.clone(), + )); } } } @@ -532,26 +529,26 @@ impl YarnLockParser { } // Save last - if let Some(version) = ¤t_version { - if let Ok(parsed_version) = Version::parse(version) { - for name in ¤t_package_names { - let pkg_name = if let Some(at_pos) = name.rfind('@') { - if at_pos > 0 { &name[..at_pos] } else { name } - } else { - name - }; - - if let Ok(package) = - Package::new(pkg_name.to_string(), parsed_version.clone(), Ecosystem::Npm) - { - packages.push(package.clone()); - for (dep_name, dep_req) in ¤t_dependencies { - pending_dependencies.push(( - package.clone(), - dep_name.clone(), - dep_req.clone(), - )); - } + if let Some(version) = ¤t_version + && let Ok(parsed_version) = Version::parse(version) + { + for name in ¤t_package_names { + let pkg_name = if let Some(at_pos) = name.rfind('@') { + if at_pos > 0 { &name[..at_pos] } else { name } + } else { + name + }; + + if let Ok(package) = + Package::new(pkg_name.to_string(), parsed_version.clone(), Ecosystem::Npm) + { + packages.push(package.clone()); + for (dep_name, dep_req) in ¤t_dependencies { + pending_dependencies.push(( + package.clone(), + dep_name.clone(), + dep_req.clone(), + )); } } } diff --git a/vulnera-core/src/infrastructure/parsers/php.rs b/vulnera-core/src/infrastructure/parsers/php.rs index de6e9fce..8290e5a7 100644 --- a/vulnera-core/src/infrastructure/parsers/php.rs +++ b/vulnera-core/src/infrastructure/parsers/php.rs @@ -292,12 +292,11 @@ impl ComposerLockParser { continue; } - if let Some(inferred_version) = Self::infer_dependency_version(&dep_req) { - if let Ok(inferred_target) = + if let Some(inferred_version) = Self::infer_dependency_version(&dep_req) + && let Ok(inferred_target) = Package::new(dep_name, inferred_version, Ecosystem::Packagist) - { - dependencies.push(Dependency::new(from, inferred_target, dep_req, false)); - } + { + dependencies.push(Dependency::new(from, inferred_target, dep_req, false)); } } diff --git a/vulnera-core/src/infrastructure/parsers/python.rs b/vulnera-core/src/infrastructure/parsers/python.rs index 4bc1ae50..2a9392f7 100644 --- a/vulnera-core/src/infrastructure/parsers/python.rs +++ b/vulnera-core/src/infrastructure/parsers/python.rs @@ -394,10 +394,10 @@ impl PyProjectTomlParser { if let Some(project) = toml_value.get("project") { if let Some(deps) = project.get("dependencies").and_then(|d| d.as_array()) { for dep in deps { - if let Some(dep_str) = dep.as_str() { - if let Some(package) = self.parse_dependency_string(dep_str)? { - packages.push(package); - } + if let Some(dep_str) = dep.as_str() + && let Some(package) = self.parse_dependency_string(dep_str)? + { + packages.push(package); } } } @@ -410,10 +410,10 @@ impl PyProjectTomlParser { for (_, deps_array) in optional_deps { if let Some(deps) = deps_array.as_array() { for dep in deps { - if let Some(dep_str) = dep.as_str() { - if let Some(package) = self.parse_dependency_string(dep_str)? { - packages.push(package); - } + if let Some(dep_str) = dep.as_str() + && let Some(package) = self.parse_dependency_string(dep_str)? + { + packages.push(package); } } } @@ -422,39 +422,37 @@ impl PyProjectTomlParser { } // Extract from tool.poetry.dependencies (Poetry format) - if let Some(tool) = toml_value.get("tool") { - if let Some(poetry) = tool.get("poetry") { - if let Some(deps) = poetry.get("dependencies").and_then(|d| d.as_table()) { - for (name, version_info) in deps { - if name == "python" { - continue; // Skip Python version requirement - } + if let Some(tool) = toml_value.get("tool") + && let Some(poetry) = tool.get("poetry") + && let Some(deps) = poetry.get("dependencies").and_then(|d| d.as_table()) + { + for (name, version_info) in deps { + if name == "python" { + continue; // Skip Python version requirement + } - let version_str = match version_info { - toml::Value::String(v) => v.clone(), - toml::Value::Table(t) => { - if let Some(version) = t.get("version").and_then(|v| v.as_str()) { - version.to_string() - } else { - "0.0.0".to_string() - } - } - _ => "0.0.0".to_string(), - }; + let version_str = match version_info { + toml::Value::String(v) => v.clone(), + toml::Value::Table(t) => { + if let Some(version) = t.get("version").and_then(|v| v.as_str()) { + version.to_string() + } else { + "0.0.0".to_string() + } + } + _ => "0.0.0".to_string(), + }; - let clean_version = self.clean_poetry_version(&version_str)?; + let clean_version = self.clean_poetry_version(&version_str)?; - let version = - Version::parse(&clean_version).map_err(|_| ParseError::Version { - version: version_str.clone(), - })?; + let version = Version::parse(&clean_version).map_err(|_| ParseError::Version { + version: version_str.clone(), + })?; - let package = Package::new(name.clone(), version, Ecosystem::PyPI) - .map_err(|e| ParseError::MissingField { field: e })?; + let package = Package::new(name.clone(), version, Ecosystem::PyPI) + .map_err(|e| ParseError::MissingField { field: e })?; - packages.push(package); - } - } + packages.push(package); } } diff --git a/vulnera-core/src/infrastructure/parsers/ruby.rs b/vulnera-core/src/infrastructure/parsers/ruby.rs index c035bc6a..b3a0cdd6 100644 --- a/vulnera-core/src/infrastructure/parsers/ruby.rs +++ b/vulnera-core/src/infrastructure/parsers/ruby.rs @@ -271,18 +271,18 @@ impl GemfileLockParser { } // Check for dependency definition (indentation 6 spaces) - if let Some(pkg) = ¤t_package { - if let Some(caps) = RE_DEP_LINE.captures(line) { - let dep_name = caps.get(1).map(|m| m.as_str()).unwrap_or("").trim(); - let dep_req = caps.get(2).map(|m| m.as_str()).unwrap_or("*").trim(); - - if !dep_name.is_empty() { - pending_dependencies.push(( - pkg.clone(), - dep_name.to_string(), - dep_req.to_string(), - )); - } + if let Some(pkg) = ¤t_package + && let Some(caps) = RE_DEP_LINE.captures(line) + { + let dep_name = caps.get(1).map(|m| m.as_str()).unwrap_or("").trim(); + let dep_req = caps.get(2).map(|m| m.as_str()).unwrap_or("*").trim(); + + if !dep_name.is_empty() { + pending_dependencies.push(( + pkg.clone(), + dep_name.to_string(), + dep_req.to_string(), + )); } } } @@ -305,14 +305,11 @@ impl GemfileLockParser { continue; } - if let Some(base_version) = extract_base_version(&dep_req) { - if let Ok(version) = parse_version_lenient(&base_version) { - if let Ok(inferred_target) = - Package::new(dep_name, version, Ecosystem::RubyGems) - { - dependencies.push(Dependency::new(from, inferred_target, dep_req, false)); - } - } + if let Some(base_version) = extract_base_version(&dep_req) + && let Ok(version) = parse_version_lenient(&base_version) + && let Ok(inferred_target) = Package::new(dep_name, version, Ecosystem::RubyGems) + { + dependencies.push(Dependency::new(from, inferred_target, dep_req, false)); } } diff --git a/vulnera-core/src/infrastructure/parsers/rust.rs b/vulnera-core/src/infrastructure/parsers/rust.rs index 104d0a30..79264a33 100644 --- a/vulnera-core/src/infrastructure/parsers/rust.rs +++ b/vulnera-core/src/infrastructure/parsers/rust.rs @@ -240,38 +240,36 @@ impl CargoLockParser { if let Some(source_pkg) = package_map.get(&(name.to_string(), version_str.to_string())) - { - if let Some(deps) = + && let Some(deps) = package_table.get("dependencies").and_then(|d| d.as_array()) - { - for dep_val in deps { - if let Some(dep_str) = dep_val.as_str() { - // Format: "name version" or just "name" - let parts: Vec<&str> = dep_str.split_whitespace().collect(); - let dep_name = parts[0]; - - // If version is specified, use it. If not, we have to guess or find the only one. - // Cargo.lock usually specifies version if ambiguous. - let target_pkg: Option = if parts.len() >= 2 { - let dep_version = parts[1]; - package_map - .get(&(dep_name.to_string(), dep_version.to_string())) - .cloned() - } else { - package_map - .iter() - .find(|((n, _), _)| n == dep_name) - .map(|(_, p)| p.clone()) - }; - - if let Some(target) = target_pkg { - dependencies.push(Dependency::new( - source_pkg.clone(), - target.clone(), - target.version.to_string(), // Requirement is effectively the locked version - false, // We don't know if it's transitive from here easily, but in a lockfile everything is explicit - )); - } + { + for dep_val in deps { + if let Some(dep_str) = dep_val.as_str() { + // Format: "name version" or just "name" + let parts: Vec<&str> = dep_str.split_whitespace().collect(); + let dep_name = parts[0]; + + // If version is specified, use it. If not, we have to guess or find the only one. + // Cargo.lock usually specifies version if ambiguous. + let target_pkg: Option = if parts.len() >= 2 { + let dep_version = parts[1]; + package_map + .get(&(dep_name.to_string(), dep_version.to_string())) + .cloned() + } else { + package_map + .iter() + .find(|((n, _), _)| n == dep_name) + .map(|(_, p)| p.clone()) + }; + + if let Some(target) = target_pkg { + dependencies.push(Dependency::new( + source_pkg.clone(), + target.clone(), + target.version.to_string(), // Requirement is effectively the locked version + false, // We don't know if it's transitive from here easily, but in a lockfile everything is explicit + )); } } } diff --git a/vulnera-core/src/infrastructure/parsers/tree_sitter/go.rs b/vulnera-core/src/infrastructure/parsers/tree_sitter/go.rs index ef8edd50..f0489a7f 100644 --- a/vulnera-core/src/infrastructure/parsers/tree_sitter/go.rs +++ b/vulnera-core/src/infrastructure/parsers/tree_sitter/go.rs @@ -43,10 +43,10 @@ impl TreeSitterGoParser { } // Parse require statements - if line.starts_with("require ") || in_require_block { - if let Some(package) = self.parse_require_line(line)? { - packages.push(package); - } + if (line.starts_with("require ") || in_require_block) + && let Some(package) = self.parse_require_line(line)? + { + packages.push(package); } } diff --git a/vulnera-core/src/infrastructure/parsers/tree_sitter/json.rs b/vulnera-core/src/infrastructure/parsers/tree_sitter/json.rs index a8bc11a2..1b4a155e 100644 --- a/vulnera-core/src/infrastructure/parsers/tree_sitter/json.rs +++ b/vulnera-core/src/infrastructure/parsers/tree_sitter/json.rs @@ -50,16 +50,15 @@ impl TreeSitterJsonParser { while let Some(node) = stack.pop() { // Check if this is a pair node with the target key - if node.kind() == "pair" { - if let Some(key_node) = node.child_by_field_name("key") { - let key_text = &content[key_node.byte_range()]; - if key_text.trim_matches('"') == dep_type { - if let Some(value_node) = node.child_by_field_name("value") { - if value_node.kind() == "object" { - return Some(value_node); - } - } - } + if node.kind() == "pair" + && let Some(key_node) = node.child_by_field_name("key") + { + let key_text = &content[key_node.byte_range()]; + if key_text.trim_matches('"') == dep_type + && let Some(value_node) = node.child_by_field_name("value") + && value_node.kind() == "object" + { + return Some(value_node); } } @@ -85,10 +84,10 @@ impl TreeSitterJsonParser { let mut cursor = object_node.walk(); for child in object_node.children(&mut cursor) { - if child.kind() == "pair" { - if let Some(package) = self.parse_dependency_pair(&child, content)? { - packages.push(package); - } + if child.kind() == "pair" + && let Some(package) = self.parse_dependency_pair(&child, content)? + { + packages.push(package); } } diff --git a/vulnera-core/src/infrastructure/parsers/yarn_pest.rs b/vulnera-core/src/infrastructure/parsers/yarn_pest.rs index e66b5d77..ca90878b 100644 --- a/vulnera-core/src/infrastructure/parsers/yarn_pest.rs +++ b/vulnera-core/src/infrastructure/parsers/yarn_pest.rs @@ -212,11 +212,11 @@ impl YarnPestParser { if found.is_none() { // Fallback: scan raw text let raw = p.as_str(); - if let Some(start) = raw.find('"') { - if let Some(end_off) = raw[start + 1..].find('"') { - let end = start + 1 + end_off; - found = Some(raw[start + 1..end].to_string()); - } + if let Some(start) = raw.find('"') + && let Some(end_off) = raw[start + 1..].find('"') + { + let end = start + 1 + end_off; + found = Some(raw[start + 1..end].to_string()); } } version = found; @@ -388,10 +388,10 @@ impl YarnPestParser { let s = spec.trim(); // Drop surrounding quotes if present let s = s.trim_matches('"'); - if let Some(name) = Self::extract_name_from_key_spec(s) { - if !out.contains(&name) { - out.push(name); - } + if let Some(name) = Self::extract_name_from_key_spec(s) + && !out.contains(&name) + { + out.push(name); } } out @@ -401,13 +401,12 @@ impl YarnPestParser { fn fallback_extract_version_from_entry(entry_text: &str) -> Option { for line in entry_text.lines() { let t = line.trim_start(); - if t.starts_with("version ") { - if let Some(start) = t.find('"') { - if let Some(end_off) = t[start + 1..].find('"') { - let end = start + 1 + end_off; - return Some(t[start + 1..end].to_string()); - } - } + if t.starts_with("version ") + && let Some(start) = t.find('"') + && let Some(end_off) = t[start + 1..].find('"') + { + let end = start + 1 + end_off; + return Some(t[start + 1..end].to_string()); } } None @@ -448,13 +447,12 @@ impl YarnPestParser { // Version line (indented) let t = line.trim_start(); - if t.starts_with("version ") { - if let Some(start) = t.find('"') { - if let Some(end_off) = t[start + 1..].find('"') { - let end = start + 1 + end_off; - current_version = Some(t[start + 1..end].to_string()); - } - } + if t.starts_with("version ") + && let Some(start) = t.find('"') + && let Some(end_off) = t[start + 1..].find('"') + { + let end = start + 1 + end_off; + current_version = Some(t[start + 1..end].to_string()); } } @@ -501,14 +499,14 @@ impl PackageFileParser for YarnPestParser { }); for name in &parsed.names { - if seen.insert((name.clone(), version.to_string())) { - if let Ok(pkg) = Package::new( + if seen.insert((name.clone(), version.to_string())) + && let Ok(pkg) = Package::new( name.clone(), version.clone(), Ecosystem::Npm, - ) { - packages.push(pkg); - } + ) + { + packages.push(pkg); } } } @@ -525,12 +523,11 @@ impl PackageFileParser for YarnPestParser { }); for name in &parsed.names { - if seen.insert((name.clone(), version.to_string())) { - if let Ok(pkg) = + if seen.insert((name.clone(), version.to_string())) + && let Ok(pkg) = Package::new(name.clone(), version.clone(), Ecosystem::Npm) - { - packages.push(pkg); - } + { + packages.push(pkg); } } } diff --git a/vulnera-core/src/infrastructure/rate_limiter/storage.rs b/vulnera-core/src/infrastructure/rate_limiter/storage.rs index 454cdc0f..d22572a8 100644 --- a/vulnera-core/src/infrastructure/rate_limiter/storage.rs +++ b/vulnera-core/src/infrastructure/rate_limiter/storage.rs @@ -261,10 +261,10 @@ impl Default for InMemoryRateLimitStorage { impl RateLimitStorage for InMemoryRateLimitStorage { async fn get_token_bucket(&self, key: &str) -> Result, String> { let buckets = self.token_buckets.read().await; - if let Some(entry) = buckets.get(key) { - if Self::current_time() < entry.expires_at { - return Ok(Some(entry.value.clone())); - } + if let Some(entry) = buckets.get(key) + && Self::current_time() < entry.expires_at + { + return Ok(Some(entry.value.clone())); } Ok(None) } @@ -288,10 +288,10 @@ impl RateLimitStorage for InMemoryRateLimitStorage { async fn get_sliding_window(&self, key: &str) -> Result, String> { let windows = self.sliding_windows.read().await; - if let Some(entry) = windows.get(key) { - if Self::current_time() < entry.expires_at { - return Ok(Some(entry.value.clone())); - } + if let Some(entry) = windows.get(key) + && Self::current_time() < entry.expires_at + { + return Ok(Some(entry.value.clone())); } Ok(None) } @@ -316,10 +316,10 @@ impl RateLimitStorage for InMemoryRateLimitStorage { async fn get_lockout(&self, key: &str) -> Result, String> { let lockout_key = format!("{}:lockout", key); let lockouts = self.lockouts.read().await; - if let Some(entry) = lockouts.get(&lockout_key) { - if Self::current_time() < entry.expires_at { - return Ok(Some(entry.value)); - } + if let Some(entry) = lockouts.get(&lockout_key) + && Self::current_time() < entry.expires_at + { + return Ok(Some(entry.value)); } Ok(None) } diff --git a/vulnera-core/src/infrastructure/registries/vulnera_registry_adapter.rs b/vulnera-core/src/infrastructure/registries/vulnera_registry_adapter.rs index fb6465b8..27e6ff95 100644 --- a/vulnera-core/src/infrastructure/registries/vulnera_registry_adapter.rs +++ b/vulnera-core/src/infrastructure/registries/vulnera_registry_adapter.rs @@ -273,10 +273,10 @@ fn parse_version_lenient(s: &str) -> Option { let nums: Vec<&str> = core.split('.').collect(); if nums.len() > 3 { let mut base = format!("{}.{}.{}", nums[0], nums[1], nums[2]); - if let Some(preid) = pre { - if !preid.is_empty() { - base = format!("{}-{}", base, preid); - } + if let Some(preid) = pre + && !preid.is_empty() + { + base = format!("{}-{}", base, preid); } Version::parse(&base).ok() } else { diff --git a/vulnera-core/src/infrastructure/repository_source/github_client.rs b/vulnera-core/src/infrastructure/repository_source/github_client.rs index bd3d9994..174efaa5 100644 --- a/vulnera-core/src/infrastructure/repository_source/github_client.rs +++ b/vulnera-core/src/infrastructure/repository_source/github_client.rs @@ -58,10 +58,10 @@ impl GitHubRepositoryClient { .base_uri(&normalized) .map_err(|e| RepositorySourceError::Configuration(e.to_string()))?; } - if let Some(t) = token { - if !t.trim().is_empty() { - builder = builder.personal_token(t); - } + if let Some(t) = token + && !t.trim().is_empty() + { + builder = builder.personal_token(t); } let octo = match builder.build() { Ok(o) => o, @@ -131,17 +131,17 @@ impl RepositorySourceClient for GitHubRepositoryClient { if files.len() as u32 >= max_files { break; } - if entry.get("type").and_then(|v| v.as_str()) == Some("blob") { - if let (Some(path), Some(size)) = ( + if entry.get("type").and_then(|v| v.as_str()) == Some("blob") + && let (Some(path), Some(size)) = ( entry.get("path").and_then(|v| v.as_str()), entry.get("size").and_then(|v| v.as_u64()), - ) { - files.push(RepositoryFile { - path: path.to_string(), - size, - is_text: true, - }); - } + ) + { + files.push(RepositoryFile { + path: path.to_string(), + size, + is_text: true, + }); } } } diff --git a/vulnera-core/src/infrastructure/repository_source/url_parser.rs b/vulnera-core/src/infrastructure/repository_source/url_parser.rs index c4b2c89b..3742c988 100644 --- a/vulnera-core/src/infrastructure/repository_source/url_parser.rs +++ b/vulnera-core/src/infrastructure/repository_source/url_parser.rs @@ -41,10 +41,10 @@ pub fn parse_github_repo_url(input: &str) -> Option { return None; } - if let Some(pos) = parts.last().and_then(|s| s.find(['?', '#'])) { - if let Some(last) = parts.last_mut() { - *last = &last[..pos]; - } + if let Some(pos) = parts.last().and_then(|s| s.find(['?', '#'])) + && let Some(last) = parts.last_mut() + { + *last = &last[..pos]; } let owner = parts[0]; diff --git a/vulnera-core/src/infrastructure/vulnerability_advisor/mod.rs b/vulnera-core/src/infrastructure/vulnerability_advisor/mod.rs index c1c25a2e..21240872 100644 --- a/vulnera-core/src/infrastructure/vulnerability_advisor/mod.rs +++ b/vulnera-core/src/infrastructure/vulnerability_advisor/mod.rs @@ -95,10 +95,10 @@ impl VulneraAdvisorRepository { builder = builder.with_nvd(config.nvd_api_key); // Add GHSA source if token is provided - if let Some(token) = config.ghsa_token { - if !token.trim().is_empty() { - builder = builder.with_ghsa(token); - } + if let Some(token) = config.ghsa_token + && !token.trim().is_empty() + { + builder = builder.with_ghsa(token); } // Add OSS Index if configured diff --git a/vulnera-deps/src/application/analysis_context.rs b/vulnera-deps/src/application/analysis_context.rs index bb1a43a6..85db3d28 100644 --- a/vulnera-deps/src/application/analysis_context.rs +++ b/vulnera-deps/src/application/analysis_context.rs @@ -138,11 +138,11 @@ impl AnalysisContext { // Check if file is in cache if let Some(cached_time) = self.cache.get(file_path) { // Check if file has been modified since last analysis - if let Ok(metadata) = std::fs::metadata(file_path) { - if let Ok(modified) = metadata.modified() { - // File needs re-analysis if it was modified after cache time - return modified > *cached_time; - } + if let Ok(metadata) = std::fs::metadata(file_path) + && let Ok(modified) = metadata.modified() + { + // File needs re-analysis if it was modified after cache time + return modified > *cached_time; } // If we can't get modification time, assume it needs analysis return true; diff --git a/vulnera-deps/src/domain/dependency_graph.rs b/vulnera-deps/src/domain/dependency_graph.rs index f81355f2..64bd2861 100644 --- a/vulnera-deps/src/domain/dependency_graph.rs +++ b/vulnera-deps/src/domain/dependency_graph.rs @@ -174,17 +174,17 @@ impl DependencyGraph { /// Add a dependency edge to the graph pub fn add_edge(&mut self, edge: DependencyEdge) { // Update the from node's direct dependencies - if let Some(from_node) = self.nodes.get_mut(&edge.from) { - if !from_node.direct_dependencies.contains(&edge.to) { - from_node.direct_dependencies.push(edge.to.clone()); - } + if let Some(from_node) = self.nodes.get_mut(&edge.from) + && !from_node.direct_dependencies.contains(&edge.to) + { + from_node.direct_dependencies.push(edge.to.clone()); } // Update the to node's dependents - if let Some(to_node) = self.nodes.get_mut(&edge.to) { - if !to_node.dependents.contains(&edge.from) { - to_node.dependents.push(edge.from.clone()); - } + if let Some(to_node) = self.nodes.get_mut(&edge.to) + && !to_node.dependents.contains(&edge.from) + { + to_node.dependents.push(edge.from.clone()); } self.edges.push(edge); diff --git a/vulnera-deps/src/domain/source_location.rs b/vulnera-deps/src/domain/source_location.rs index 8c603c84..eed96e43 100644 --- a/vulnera-deps/src/domain/source_location.rs +++ b/vulnera-deps/src/domain/source_location.rs @@ -72,10 +72,10 @@ impl SourceLocation { if line == self.line && column < self.column { return false; } - if line == end_line { - if let Some(end_column) = self.end_column { - return column <= end_column; - } + if line == end_line + && let Some(end_column) = self.end_column + { + return column <= end_column; } } else { // Single line location - must be on the same line diff --git a/vulnera-deps/src/services/repository_analysis.rs b/vulnera-deps/src/services/repository_analysis.rs index 47b44d1f..7142fcc9 100644 --- a/vulnera-deps/src/services/repository_analysis.rs +++ b/vulnera-deps/src/services/repository_analysis.rs @@ -146,15 +146,15 @@ where let filtered: Vec<_> = files .into_iter() .filter(|f| { - if let Some(ref includes) = input.include_paths { - if !includes.iter().any(|p| f.path.starts_with(p)) { - return false; - } + if let Some(ref includes) = input.include_paths + && !includes.iter().any(|p| f.path.starts_with(p)) + { + return false; } - if let Some(ref excludes) = input.exclude_paths { - if excludes.iter().any(|p| f.path.starts_with(p)) { - return false; - } + if let Some(ref excludes) = input.exclude_paths + && excludes.iter().any(|p| f.path.starts_with(p)) + { + return false; } true }) diff --git a/vulnera-deps/src/use_cases.rs b/vulnera-deps/src/use_cases.rs index 13305d03..b4487ac6 100644 --- a/vulnera-deps/src/use_cases.rs +++ b/vulnera-deps/src/use_cases.rs @@ -188,11 +188,11 @@ impl AnalyzeDependenciesUseCase { } // Detect workspace if not already detected - if ctx.workspace.is_none() { - if let Some(workspace) = detect_workspace(&ctx.project_root) { - // Note: We can't mutate ctx here, but we could update it in a future version - debug!("Detected workspace: {:?}", workspace); - } + if ctx.workspace.is_none() + && let Some(workspace) = detect_workspace(&ctx.project_root) + { + // Note: We can't mutate ctx here, but we could update it in a future version + debug!("Detected workspace: {:?}", workspace); } } @@ -405,14 +405,14 @@ impl AnalyzeDependenciesUseCase { while let Some(result) = join_set.join_next().await { match result { Ok(Ok((idx, Some(resolved_version)))) => { - if let Some(pkg) = packages.get_mut(idx) { - if resolved_version > pkg.version { - debug!( - "Resolved Cargo.toml spec for {}: {} -> {}", - pkg.name, pkg.version, resolved_version - ); - pkg.version = resolved_version; - } + if let Some(pkg) = packages.get_mut(idx) + && resolved_version > pkg.version + { + debug!( + "Resolved Cargo.toml spec for {}: {} -> {}", + pkg.name, pkg.version, resolved_version + ); + pkg.version = resolved_version; } } Ok(Ok((_idx, None))) => { @@ -556,14 +556,14 @@ impl AnalyzeDependenciesUseCase { filename: Option<&str>, ) -> Result { // Try to find a parser based on filename first - if let Some(filename) = filename { - if let Some(parser) = self.parser_factory.create_parser(filename) { - debug!("Using parser for filename: {}", filename); - return parser - .parse_file(file_content) - .await - .map_err(ApplicationError::Parse); - } + if let Some(filename) = filename + && let Some(parser) = self.parser_factory.create_parser(filename) + { + debug!("Using parser for filename: {}", filename); + return parser + .parse_file(file_content) + .await + .map_err(ApplicationError::Parse); } // Fall back to ecosystem-based parsing by trying common filenames for the ecosystem diff --git a/vulnera-llm/src/application/use_cases/enrich_findings.rs b/vulnera-llm/src/application/use_cases/enrich_findings.rs index a8311e12..c3cd5314 100644 --- a/vulnera-llm/src/application/use_cases/enrich_findings.rs +++ b/vulnera-llm/src/application/use_cases/enrich_findings.rs @@ -186,17 +186,17 @@ impl EnrichFindingsUseCase { } // Try to extract JSON from markdown code block - if let Some(json_content) = Self::extract_json_from_markdown(content) { - if let Ok(parsed) = serde_json::from_str::(&json_content) { - return Ok(FindingEnrichment { - explanation: Some(parsed.explanation), - remediation_suggestion: Some(parsed.remediation), - risk_summary: Some(parsed.risk_summary), - enrichment_successful: true, - error: None, - enriched_at: Some(chrono::Utc::now()), - }); - } + if let Some(json_content) = Self::extract_json_from_markdown(content) + && let Ok(parsed) = serde_json::from_str::(&json_content) + { + return Ok(FindingEnrichment { + explanation: Some(parsed.explanation), + remediation_suggestion: Some(parsed.remediation), + risk_summary: Some(parsed.risk_summary), + enrichment_successful: true, + error: None, + enriched_at: Some(chrono::Utc::now()), + }); } // Fallback: treat entire response as explanation diff --git a/vulnera-llm/src/infrastructure/providers/google_ai.rs b/vulnera-llm/src/infrastructure/providers/google_ai.rs index 88a6af6e..8ecaa2dc 100644 --- a/vulnera-llm/src/infrastructure/providers/google_ai.rs +++ b/vulnera-llm/src/infrastructure/providers/google_ai.rs @@ -276,51 +276,50 @@ impl LlmProvider for GoogleAIProvider { } // Parse the JSON chunk - if let Ok(chunk) = serde_json::from_str::(&line) { - if let Some(candidate) = chunk.candidates.into_iter().next() { - let text = candidate - .content - .parts - .into_iter() - .filter_map(|p| match p { - GeminiPart::Text { text } => Some(text), - _ => None, - }) - .collect::(); - - let is_final = candidate.finish_reason.is_some(); - let stop_reason = - candidate.finish_reason.as_deref().map(|r| match r { - "STOP" => StopReason::EndTurn, - "MAX_TOKENS" => StopReason::MaxTokens, - _ => StopReason::Other, - }); - - let chunk_result = StreamChunk { - index: idx, - delta: if text.is_empty() { - None - } else { - Some(ContentBlock::Text { text }) - }, - is_final, - stop_reason, - usage: chunk.usage_metadata.map(|u| Usage { - prompt_tokens: u.prompt_token_count, - completion_tokens: u.candidates_token_count, - total_tokens: u.total_token_count, - cached_tokens: u.cached_content_token_count, - }), - }; - - idx += 1; - - if is_final { - return Some((Ok(chunk_result), (byte_stream, buffer, idx))); - } - + if let Ok(chunk) = serde_json::from_str::(&line) + && let Some(candidate) = chunk.candidates.into_iter().next() + { + let text = candidate + .content + .parts + .into_iter() + .filter_map(|p| match p { + GeminiPart::Text { text } => Some(text), + _ => None, + }) + .collect::(); + + let is_final = candidate.finish_reason.is_some(); + let stop_reason = candidate.finish_reason.as_deref().map(|r| match r { + "STOP" => StopReason::EndTurn, + "MAX_TOKENS" => StopReason::MaxTokens, + _ => StopReason::Other, + }); + + let chunk_result = StreamChunk { + index: idx, + delta: if text.is_empty() { + None + } else { + Some(ContentBlock::Text { text }) + }, + is_final, + stop_reason, + usage: chunk.usage_metadata.map(|u| Usage { + prompt_tokens: u.prompt_token_count, + completion_tokens: u.candidates_token_count, + total_tokens: u.total_token_count, + cached_tokens: u.cached_content_token_count, + }), + }; + + idx += 1; + + if is_final { return Some((Ok(chunk_result), (byte_stream, buffer, idx))); } + + return Some((Ok(chunk_result), (byte_stream, buffer, idx))); } } @@ -427,10 +426,10 @@ struct GeminiStreamChunk { /// or data URI prefix. Falls back to `image/jpeg` for unknown formats. fn detect_image_mime_type(url: &str) -> String { // Handle data URIs: data:image/png;base64,... - if let Some(rest) = url.strip_prefix("data:") { - if let Some(mime_end) = rest.find(';').or_else(|| rest.find(',')) { - return rest[..mime_end].to_string(); - } + if let Some(rest) = url.strip_prefix("data:") + && let Some(mime_end) = rest.find(';').or_else(|| rest.find(',')) + { + return rest[..mime_end].to_string(); } // Extract extension from URL path (strip query string / fragment first) diff --git a/vulnera-llm/src/infrastructure/providers/openai.rs b/vulnera-llm/src/infrastructure/providers/openai.rs index 3b42ea91..41519f7a 100644 --- a/vulnera-llm/src/infrastructure/providers/openai.rs +++ b/vulnera-llm/src/infrastructure/providers/openai.rs @@ -336,28 +336,28 @@ impl LlmProvider for OpenAIProvider { )); } - if let Ok(chunk) = serde_json::from_str::(data) { - if let Some(choice) = chunk.choices.into_iter().next() { - let text = choice.delta.content.clone(); - let is_final = choice.finish_reason.is_some(); - let stop_reason = - choice.finish_reason.as_deref().map(|r| match r { - "stop" => StopReason::EndTurn, - "length" => StopReason::MaxTokens, - _ => StopReason::Other, - }); - - let chunk_result = StreamChunk { - index: idx, - delta: text.map(|t| ContentBlock::Text { text: t }), - is_final, - stop_reason, - usage: None, - }; - - idx += 1; - return Some((Ok(chunk_result), (byte_stream, buffer, idx))); - } + if let Ok(chunk) = serde_json::from_str::(data) + && let Some(choice) = chunk.choices.into_iter().next() + { + let text = choice.delta.content.clone(); + let is_final = choice.finish_reason.is_some(); + let stop_reason = + choice.finish_reason.as_deref().map(|r| match r { + "stop" => StopReason::EndTurn, + "length" => StopReason::MaxTokens, + _ => StopReason::Other, + }); + + let chunk_result = StreamChunk { + index: idx, + delta: text.map(|t| ContentBlock::Text { text: t }), + is_final, + stop_reason, + usage: None, + }; + + idx += 1; + return Some((Ok(chunk_result), (byte_stream, buffer, idx))); } } } diff --git a/vulnera-llm/src/infrastructure/providers/resilient.rs b/vulnera-llm/src/infrastructure/providers/resilient.rs index dafda2fa..6014410b 100644 --- a/vulnera-llm/src/infrastructure/providers/resilient.rs +++ b/vulnera-llm/src/infrastructure/providers/resilient.rs @@ -223,10 +223,10 @@ impl ResilientProvider

{ } // Honor retry-after if provided - if let Some(retry_after) = e.retry_after() { - if retry_after.as_millis() < self.config.max_backoff_ms as u128 { - backoff = retry_after.as_millis() as u64; - } + if let Some(retry_after) = e.retry_after() + && retry_after.as_millis() < self.config.max_backoff_ms as u128 + { + backoff = retry_after.as_millis() as u64; } last_error = Some(e); diff --git a/vulnera-orchestrator/src/application/use_cases.rs b/vulnera-orchestrator/src/application/use_cases.rs index b4f65cc9..427c84b4 100644 --- a/vulnera-orchestrator/src/application/use_cases.rs +++ b/vulnera-orchestrator/src/application/use_cases.rs @@ -493,10 +493,10 @@ fn estimate_source_size_bytes(source_uri: &str) -> Option { }) .filter_map(Result::ok) { - if entry.file_type().is_file() { - if let Ok(metadata) = entry.metadata() { - total = total.saturating_add(metadata.len()); - } + if entry.file_type().is_file() + && let Ok(metadata) = entry.metadata() + { + total = total.saturating_add(metadata.len()); } } diff --git a/vulnera-orchestrator/src/infrastructure/job_queue.rs b/vulnera-orchestrator/src/infrastructure/job_queue.rs index 9398550c..e711ad24 100644 --- a/vulnera-orchestrator/src/infrastructure/job_queue.rs +++ b/vulnera-orchestrator/src/infrastructure/job_queue.rs @@ -192,14 +192,13 @@ async fn process_job( .and_then(|inv| inv.user_id); // Record scan started event (if we have analytics context) - if let Some(ref subject) = analytics_subject { - if let Err(e) = ctx + if let Some(ref subject) = analytics_subject + && let Err(e) = ctx .analytics_recorder .on_scan_started(subject.clone(), user_id, job_id) .await - { - warn!(job_id = %job_id, error = %e, "Failed to record scan started analytics"); - } + { + warn!(job_id = %job_id, error = %e, "Failed to record scan started analytics"); } // ── Workflow: Pending → Running ────────────────────────────────── @@ -306,14 +305,13 @@ async fn process_job( } // Record scan failed (with zero findings) - if let Some(ref subject) = analytics_subject { - if let Err(e) = ctx + if let Some(ref subject) = analytics_subject + && let Err(e) = ctx .analytics_recorder .on_scan_completed(subject.clone(), user_id, job_id, FindingsSummary::default()) .await - { - warn!(job_id = %job_id, error = %e, "Failed to record scan failed analytics"); - } + { + warn!(job_id = %job_id, error = %e, "Failed to record scan failed analytics"); } // Deliver webhook for failed jobs too diff --git a/vulnera-orchestrator/src/presentation/auth/controller.rs b/vulnera-orchestrator/src/presentation/auth/controller.rs index b16ba9ae..81a74bec 100644 --- a/vulnera-orchestrator/src/presentation/auth/controller.rs +++ b/vulnera-orchestrator/src/presentation/auth/controller.rs @@ -473,29 +473,28 @@ pub async fn logout( headers: HeaderMap, ) -> Result)> { // If blacklist is enabled, revoke all user tokens - if state.blacklist_tokens_on_logout { - if let Some(ref blacklist) = state.token_blacklist { - // Try to extract user from the access token cookie - if let Some(access_token) = extract_cookie(&headers, "access_token") { - // Get the user ID from the token (even if expired, we can still extract claims) - if let Ok(claims) = state.validate_token_use_case.get_claims(&access_token) { - if let Ok(user_id) = claims.user_id() { - // Revoke all tokens for this user - // TTL should match the refresh token TTL to ensure all tokens are invalidated - let ttl = - std::time::Duration::from_secs(state.refresh_token_ttl_hours * 3600); - - if let Err(e) = blacklist.revoke_all_user_tokens(&user_id, ttl).await { - tracing::warn!( - user_id = %user_id, - error = %e, - "Failed to revoke user tokens on logout" - ); - // Don't fail the logout - cookie clearing will still work - } else { - tracing::info!(user_id = %user_id, "User tokens revoked on logout"); - } - } + if state.blacklist_tokens_on_logout + && let Some(ref blacklist) = state.token_blacklist + { + // Try to extract user from the access token cookie + if let Some(access_token) = extract_cookie(&headers, "access_token") { + // Get the user ID from the token (even if expired, we can still extract claims) + if let Ok(claims) = state.validate_token_use_case.get_claims(&access_token) + && let Ok(user_id) = claims.user_id() + { + // Revoke all tokens for this user + // TTL should match the refresh token TTL to ensure all tokens are invalidated + let ttl = std::time::Duration::from_secs(state.refresh_token_ttl_hours * 3600); + + if let Err(e) = blacklist.revoke_all_user_tokens(&user_id, ttl).await { + tracing::warn!( + user_id = %user_id, + error = %e, + "Failed to revoke user tokens on logout" + ); + // Don't fail the logout - cookie clearing will still work + } else { + tracing::info!(user_id = %user_id, "User tokens revoked on logout"); } } } diff --git a/vulnera-orchestrator/src/presentation/controllers.rs b/vulnera-orchestrator/src/presentation/controllers.rs index 41e6aa36..d4c7898f 100644 --- a/vulnera-orchestrator/src/presentation/controllers.rs +++ b/vulnera-orchestrator/src/presentation/controllers.rs @@ -11,6 +11,7 @@ use std::sync::Arc; use std::time::Instant; use axum::{extract::State, response::Json}; +use chrono::Utc; use vulnera_core::application::analytics::use_cases::{ CheckQuotaUseCase, GetDashboardOverviewUseCase, GetMonthlyAnalyticsUseCase, }; @@ -47,14 +48,16 @@ use crate::presentation::auth::extractors::{ use crate::presentation::models::{ AffectedPackageDto, AnalysisMetadataDto, AnalysisRequest, BatchAnalysisMetadata, BatchDependencyAnalysisRequest, BatchDependencyAnalysisResponse, DependencyGraphDto, - DependencyGraphEdgeDto, DependencyGraphNodeDto, FileAnalysisResult, JobAcceptedResponse, - PackageDto, SeverityBreakdownDto, VersionRecommendationDto, VulnerabilityDto, + DependencyGraphEdgeDto, DependencyGraphNodeDto, ErrorResponse, FileAnalysisResult, + JobAcceptedResponse, PackageDto, SeverityBreakdownDto, VersionRecommendationDto, + VulnerabilityDto, }; use axum::extract::Query; use axum::http::StatusCode; use serde::Deserialize; use tokio::task::JoinSet; use tracing::{error, info}; +use uuid::Uuid; use vulnera_core::domain::vulnerability::{ entities::{AnalysisReport, Package, Vulnerability}, value_objects::Ecosystem, @@ -560,6 +563,7 @@ async fn convert_analysis_report_to_response( }; FileAnalysisResult { + file_id: None, filename, ecosystem: ecosystem.canonical_name().to_string(), vulnerabilities, @@ -575,7 +579,7 @@ async fn convert_analysis_report_to_response( /// POST /api/v1/dependencies/analyze - Analyze dependency files (synchronous, batch support) /// -/// This endpoint accepts optional API key authentication via the `X-API-Key` header or `Authorization: ApiKey ` header. +/// This endpoint accepts optional API key authentication via the `X-API-Key` header. /// Authenticated requests have higher rate limits and batch size limits. /// Unauthenticated requests are limited to 10 analyzes per day and 10 files per batch. /// @@ -608,19 +612,59 @@ pub async fn analyze_dependencies( Query(query): Query, OptionalApiKeyAuth(maybe_api_key): OptionalApiKeyAuth, Json(request): Json, -) -> Result, (StatusCode, String)> { +) -> Result, (StatusCode, Json)> { + fn dependency_error( + status: StatusCode, + code: &str, + message: impl Into, + details: Option, + ) -> (StatusCode, Json) { + ( + status, + Json(ErrorResponse { + code: code.to_string(), + message: message.into(), + details, + request_id: Uuid::new_v4(), + timestamp: Utc::now(), + }), + ) + } + let start_time = std::time::Instant::now(); + let request_id = Uuid::new_v4().to_string(); let is_authenticated = maybe_api_key.is_some(); + let compact_mode = request.compact_mode; + let enable_cache = request.enable_cache; + let files = request.files; info!( + request_id = %request_id, authenticated = is_authenticated, - file_count = request.files.len(), + file_count = files.len(), + compact_mode, + enable_cache, "Starting batch dependency analysis" ); // Parse detail level - let detail_level = - DetailLevel::from_str(&query.detail_level).map_err(|e| (StatusCode::BAD_REQUEST, e))?; + let detail_level = DetailLevel::from_str(&query.detail_level).map_err(|e| { + dependency_error( + StatusCode::BAD_REQUEST, + "INVALID_DETAIL_LEVEL", + e, + Some(serde_json::json!({ + "detail_level": query.detail_level, + "allowed": ["minimal", "standard", "full"], + })), + ) + })?; + + let detail_level = if compact_mode { + DetailLevel::Minimal + } else { + detail_level + }; // Validate batch size limits let max_files = if is_authenticated { @@ -629,35 +673,51 @@ pub async fn analyze_dependencies( 10 // Stricter for unauthenticated }; - if request.files.len() > max_files { - return Err(( + if files.len() > max_files { + return Err(dependency_error( StatusCode::BAD_REQUEST, + "BATCH_SIZE_EXCEEDED", format!( - "Batch size exceeds limit: {} files (max: {} for {}, {} for authenticated)", - request.files.len(), - max_files, - if is_authenticated { - "authenticated" - } else { - "unauthenticated" - }, - state.config.analysis.max_concurrent_packages * 2 + "Batch size exceeds limit: {} files (max: {})", + files.len(), + max_files ), + Some(serde_json::json!({ + "provided_file_count": files.len(), + "max_file_count": max_files, + "authenticated": is_authenticated, + })), )); } // Process files in parallel - let mut join_set: JoinSet> = JoinSet::new(); + let mut join_set: JoinSet<(usize, FileAnalysisResult)> = JoinSet::new(); let use_case = state.dependencies.dependency_analysis_use_case.clone(); - let total_files = request.files.len(); + let total_files = files.len(); let version_resolution_service = state.dependencies.version_resolution_service.clone(); - for file_request in request.files { + for (file_index, file_request) in files.into_iter().enumerate() { let use_case_clone = use_case.clone(); let detail_level_clone = detail_level; let version_resolution_service_clone = version_resolution_service.clone(); + let file_id = file_request.file_id.clone(); + let filename = file_request.filename.clone(); + let ecosystem_name = file_request.ecosystem.clone(); let workspace_path = file_request.workspace_path.clone(); + let empty_metadata = AnalysisMetadataDto { + total_packages: 0, + vulnerable_packages: 0, + total_vulnerabilities: 0, + severity_breakdown: SeverityBreakdownDto { + critical: 0, + high: 0, + medium: 0, + low: 0, + }, + analysis_duration_ms: 0, + sources_queried: vec![], + }; join_set.spawn(async move { // Parse ecosystem @@ -669,7 +729,22 @@ pub async fn analyze_dependencies( "go" => Ecosystem::Go, "packagist" | "composer" | "php" => Ecosystem::Packagist, _ => { - return Err(format!("Invalid ecosystem: {}", file_request.ecosystem)); + return ( + file_index, + FileAnalysisResult { + file_id, + filename, + ecosystem: ecosystem_name, + vulnerabilities: vec![], + packages: None, + dependency_graph: None, + version_recommendations: None, + metadata: empty_metadata, + error: Some(format!("Invalid ecosystem: {}", file_request.ecosystem)), + cache_hit: if enable_cache { Some(false) } else { None }, + workspace_path, + }, + ); } }; @@ -686,8 +761,6 @@ pub async fn analyze_dependencies( .await { Ok((report, dependency_graph)) => { - let was_cached = - report.metadata.analysis_duration < std::time::Duration::from_millis(50); let mut result = convert_analysis_report_to_response( &report, filename_for_response, @@ -697,20 +770,47 @@ pub async fn analyze_dependencies( version_resolution_service_clone, ) .await; + + if compact_mode { + result.packages = None; + result.dependency_graph = None; + result.version_recommendations = None; + } + + result.file_id = file_id; result.workspace_path = workspace_path; - result.cache_hit = Some(was_cached); - Ok(result) + result.cache_hit = if enable_cache { + Some(result.cache_hit.unwrap_or(false)) + } else { + None + }; + + (file_index, result) } Err(e) => { error!("Analysis failed: {}", e); - Err(format!("Analysis failed: {}", e)) + ( + file_index, + FileAnalysisResult { + file_id, + filename, + ecosystem: ecosystem_name, + vulnerabilities: vec![], + packages: None, + dependency_graph: None, + version_recommendations: None, + metadata: empty_metadata, + error: Some(format!("Analysis failed: {}", e)), + cache_hit: if enable_cache { Some(false) } else { None }, + workspace_path, + }, + ) } } }); } // Collect results - let mut results = Vec::new(); let mut successful = 0; let mut failed = 0; let mut total_vulnerabilities = 0; @@ -718,9 +818,17 @@ pub async fn analyze_dependencies( let mut critical_count = 0; let mut high_count = 0; + let mut indexed_results = Vec::new(); + while let Some(result) = join_set.join_next().await { match result { - Ok(Ok(file_result)) => { + Ok((index, file_result)) => { + if file_result.error.is_some() { + failed += 1; + indexed_results.push((index, file_result)); + continue; + } + successful += 1; total_vulnerabilities += file_result.vulnerabilities.len(); @@ -739,74 +847,56 @@ pub async fn analyze_dependencies( // If packages not included, we can't count them // This is fine for minimal detail level } - results.push(file_result); - } - Ok(Err(error_msg)) => { - failed += 1; - results.push(FileAnalysisResult { - filename: None, - ecosystem: "unknown".to_string(), - vulnerabilities: vec![], - packages: None, - dependency_graph: None, - version_recommendations: None, - metadata: AnalysisMetadataDto { - total_packages: 0, - vulnerable_packages: 0, - total_vulnerabilities: 0, - severity_breakdown: SeverityBreakdownDto { - critical: 0, - high: 0, - medium: 0, - low: 0, - }, - analysis_duration_ms: 0, - sources_queried: vec![], - }, - error: Some(error_msg), - cache_hit: None, - workspace_path: None, - }); + indexed_results.push((index, file_result)); } Err(e) => { failed += 1; error!("Join error: {}", e); - results.push(FileAnalysisResult { - filename: None, - ecosystem: "unknown".to_string(), - vulnerabilities: vec![], - packages: None, - dependency_graph: None, - version_recommendations: None, - metadata: AnalysisMetadataDto { - total_packages: 0, - vulnerable_packages: 0, - total_vulnerabilities: 0, - severity_breakdown: SeverityBreakdownDto { - critical: 0, - high: 0, - medium: 0, - low: 0, + indexed_results.push(( + usize::MAX, + FileAnalysisResult { + file_id: None, + filename: None, + ecosystem: "unknown".to_string(), + vulnerabilities: vec![], + packages: None, + dependency_graph: None, + version_recommendations: None, + metadata: AnalysisMetadataDto { + total_packages: 0, + vulnerable_packages: 0, + total_vulnerabilities: 0, + severity_breakdown: SeverityBreakdownDto { + critical: 0, + high: 0, + medium: 0, + low: 0, + }, + analysis_duration_ms: 0, + sources_queried: vec![], }, - analysis_duration_ms: 0, - sources_queried: vec![], + error: Some(format!("Internal error: {}", e)), + cache_hit: None, + workspace_path: None, }, - error: Some(format!("Internal error: {}", e)), - cache_hit: None, - workspace_path: None, - }); + )); } } } + let results = ordered_batch_results(indexed_results); + let duration = start_time.elapsed(); - let cache_hits = { + let cache_hits = if enable_cache { let hits = results.iter().filter(|r| r.cache_hit == Some(true)).count(); if hits > 0 { Some(hits) } else { None } + } else { + None }; info!( + request_id = %request_id, authenticated = is_authenticated, successful, failed, @@ -817,6 +907,7 @@ pub async fn analyze_dependencies( Ok(Json(BatchDependencyAnalysisResponse { results, metadata: BatchAnalysisMetadata { + request_id: Some(request_id), total_files, successful, failed, @@ -835,3 +926,82 @@ pub use health::*; // Re-export repository controller(s) pub use repository::analyze_repository; + +fn ordered_batch_results( + mut indexed_results: Vec<(usize, FileAnalysisResult)>, +) -> Vec { + indexed_results.sort_by_key(|(index, _)| *index); + indexed_results + .into_iter() + .map(|(_, result)| result) + .collect() +} + +#[cfg(test)] +mod tests { + use super::{ + AnalysisMetadataDto, FileAnalysisResult, SeverityBreakdownDto, ordered_batch_results, + }; + + fn make_result(file_id: &str) -> FileAnalysisResult { + FileAnalysisResult { + file_id: Some(file_id.to_string()), + filename: Some("package.json".to_string()), + ecosystem: "npm".to_string(), + vulnerabilities: vec![], + packages: None, + dependency_graph: None, + version_recommendations: None, + metadata: AnalysisMetadataDto { + total_packages: 0, + vulnerable_packages: 0, + total_vulnerabilities: 0, + severity_breakdown: SeverityBreakdownDto { + critical: 0, + high: 0, + medium: 0, + low: 0, + }, + analysis_duration_ms: 0, + sources_queried: vec![], + }, + error: None, + cache_hit: Some(false), + workspace_path: None, + } + } + + #[test] + fn ordered_batch_results_restores_request_order() { + let indexed = vec![ + (2, make_result("file:///three")), + (0, make_result("file:///one")), + (1, make_result("file:///two")), + ]; + + let ordered = ordered_batch_results(indexed); + let ids = ordered + .iter() + .map(|r| r.file_id.as_deref().unwrap_or("")) + .collect::>(); + + assert_eq!(ids, vec!["file:///one", "file:///two", "file:///three"]); + } + + #[test] + fn ordered_batch_results_keeps_internal_errors_last() { + let indexed = vec![ + (1, make_result("file:///two")), + (usize::MAX, make_result("file:///internal-error")), + (0, make_result("file:///one")), + ]; + + let ordered = ordered_batch_results(indexed); + let last_id = ordered + .last() + .and_then(|r| r.file_id.as_deref()) + .unwrap_or(""); + + assert_eq!(last_id, "file:///internal-error"); + } +} diff --git a/vulnera-orchestrator/src/presentation/controllers/jobs.rs b/vulnera-orchestrator/src/presentation/controllers/jobs.rs index ecad5199..c9e675a2 100644 --- a/vulnera-orchestrator/src/presentation/controllers/jobs.rs +++ b/vulnera-orchestrator/src/presentation/controllers/jobs.rs @@ -41,20 +41,18 @@ pub async fn get_job( Ok(Some(snapshot)) => { // Validate job ownership: only the user who created the job can access it // Master key bypass: allow master key authentication to access any job - if !auth.is_master_key { - if let Some(ref ctx) = snapshot.invocation_context { - if let Some(ref job_user_id) = ctx.user_id { - if job_user_id != &auth.user_id { - warn!( - job_id = %id, - requesting_user = %auth.user_id.as_str(), - job_owner = %job_user_id.as_str(), - "Unauthorized job access attempt" - ); - return Err(StatusCode::FORBIDDEN); - } - } - } + if !auth.is_master_key + && let Some(ref ctx) = snapshot.invocation_context + && let Some(ref job_user_id) = ctx.user_id + && job_user_id != &auth.user_id + { + warn!( + job_id = %id, + requesting_user = %auth.user_id.as_str(), + job_owner = %job_user_id.as_str(), + "Unauthorized job access attempt" + ); + return Err(StatusCode::FORBIDDEN); } Ok(Json(JobStatusResponse::from(snapshot))) diff --git a/vulnera-orchestrator/src/presentation/controllers/llm.rs b/vulnera-orchestrator/src/presentation/controllers/llm.rs index 40b6d327..48c8ce88 100644 --- a/vulnera-orchestrator/src/presentation/controllers/llm.rs +++ b/vulnera-orchestrator/src/presentation/controllers/llm.rs @@ -239,10 +239,10 @@ pub async fn enrich_job_findings( } // Filter to specific finding IDs if provided - if let Some(ref finding_ids) = request.finding_ids { - if !finding_ids.is_empty() { - all_findings.retain(|f| finding_ids.contains(&f.id)); - } + if let Some(ref finding_ids) = request.finding_ids + && !finding_ids.is_empty() + { + all_findings.retain(|f| finding_ids.contains(&f.id)); } // Build code contexts map diff --git a/vulnera-orchestrator/src/presentation/middleware/mod.rs b/vulnera-orchestrator/src/presentation/middleware/mod.rs index b9742e1d..c29fd5cf 100644 --- a/vulnera-orchestrator/src/presentation/middleware/mod.rs +++ b/vulnera-orchestrator/src/presentation/middleware/mod.rs @@ -391,10 +391,9 @@ fn extract_api_key_from_headers(headers: &axum::http::HeaderMap) -> Option (Option, Option, bool) { // EarlyAuthInfo is set by early_auth_middleware before rate limiting. - if let Some(early_auth) = request.extensions().get::() { - if early_auth.user_id.is_some() { - return ( - early_auth.user_id, - early_auth.api_key_id, - early_auth.is_org_member, - ); - } + if let Some(early_auth) = request.extensions().get::() + && early_auth.user_id.is_some() + { + return ( + early_auth.user_id, + early_auth.api_key_id, + early_auth.is_org_member, + ); } // Anonymous user diff --git a/vulnera-orchestrator/src/presentation/models.rs b/vulnera-orchestrator/src/presentation/models.rs index 1515f866..12ccf414 100644 --- a/vulnera-orchestrator/src/presentation/models.rs +++ b/vulnera-orchestrator/src/presentation/models.rs @@ -625,6 +625,10 @@ pub struct RepositoryDescriptorDto { /// Request for a single dependency file in batch analysis #[derive(Deserialize, ToSchema)] pub struct DependencyFileRequest { + /// Optional client-side correlation ID for mapping batch responses to request files + #[schema(example = "file:///workspace/frontend/package.json")] + pub file_id: Option, + /// The dependency file content to analyze #[schema(example = r#"{"dependencies": {"express": "4.17.1", "lodash": "4.17.21"}}"#)] pub file_content: String, @@ -724,6 +728,10 @@ pub struct DependencyGraphDto { /// Result for a single file analysis #[derive(Serialize, ToSchema)] pub struct FileAnalysisResult { + /// Optional client-side correlation ID echoed from request + #[serde(skip_serializing_if = "Option::is_none")] + pub file_id: Option, + /// Optional filename #[schema(example = "package.json")] pub filename: Option, @@ -762,6 +770,11 @@ pub struct FileAnalysisResult { /// Batch analysis metadata #[derive(Serialize, ToSchema)] pub struct BatchAnalysisMetadata { + /// Correlation identifier for this batch request + #[serde(skip_serializing_if = "Option::is_none")] + #[schema(example = "3dd8a6d0-a6f6-4622-9f9c-53d84fd0c7ad")] + pub request_id: Option, + /// Total number of files analyzed #[schema(example = 5)] pub total_files: usize, diff --git a/vulnera-orchestrator/tests/test_api_endpoints.rs b/vulnera-orchestrator/tests/test_api_endpoints.rs index c5e7eaac..cb42ece9 100644 --- a/vulnera-orchestrator/tests/test_api_endpoints.rs +++ b/vulnera-orchestrator/tests/test_api_endpoints.rs @@ -1,17 +1,102 @@ //! Integration tests for API endpoints -// Note: This is a placeholder - actual implementation would require -// setting up the full application router +use vulnera_orchestrator::presentation::models::{ + BatchAnalysisMetadata, BatchDependencyAnalysisRequest, BatchDependencyAnalysisResponse, + FileAnalysisResult, SeverityBreakdownDto, +}; + #[tokio::test] async fn test_health_endpoint() { - // This would test the /health endpoint - // Placeholder for now + // Placeholder until full router harness is added. assert!(true); } #[tokio::test] async fn test_metrics_endpoint() { - // This would test the /metrics endpoint - // Placeholder for now + // Placeholder until full router harness is added. assert!(true); } + +#[test] +fn test_batch_request_supports_file_id_contract() { + let raw = serde_json::json!({ + "files": [ + { + "file_id": "file:///workspace/frontend/package.json", + "file_content": "{\"dependencies\":{\"lodash\":\"4.17.20\"}}", + "ecosystem": "npm", + "filename": "package.json", + "workspace_path": "/workspace/frontend/package.json" + } + ], + "enable_cache": true, + "compact_mode": false + }); + + let request: BatchDependencyAnalysisRequest = + serde_json::from_value(raw).expect("request deserialization should succeed"); + + assert_eq!(request.files.len(), 1); + assert_eq!( + request.files[0].file_id.as_deref(), + Some("file:///workspace/frontend/package.json") + ); +} + +#[test] +fn test_batch_response_supports_request_id_and_file_id_contract() { + let response = BatchDependencyAnalysisResponse { + results: vec![FileAnalysisResult { + file_id: Some("file:///workspace/frontend/package.json".to_string()), + filename: Some("package.json".to_string()), + ecosystem: "npm".to_string(), + vulnerabilities: vec![], + packages: None, + dependency_graph: None, + version_recommendations: None, + metadata: vulnera_orchestrator::presentation::models::AnalysisMetadataDto { + total_packages: 1, + vulnerable_packages: 0, + total_vulnerabilities: 0, + severity_breakdown: SeverityBreakdownDto { + critical: 0, + high: 0, + medium: 0, + low: 0, + }, + analysis_duration_ms: 3, + sources_queried: vec![], + }, + error: None, + cache_hit: Some(false), + workspace_path: Some("/workspace/frontend/package.json".to_string()), + }], + metadata: BatchAnalysisMetadata { + request_id: Some("3dd8a6d0-a6f6-4622-9f9c-53d84fd0c7ad".to_string()), + total_files: 1, + successful: 1, + failed: 0, + duration_ms: 3, + total_vulnerabilities: 0, + total_packages: 1, + cache_hits: Some(0), + critical_count: 0, + high_count: 0, + }, + }; + + let value = serde_json::to_value(&response).expect("response serialization should succeed"); + + let request_id = value + .get("metadata") + .and_then(|metadata| metadata.get("request_id")) + .and_then(|id| id.as_str()); + let file_id = value + .get("results") + .and_then(|results| results.get(0)) + .and_then(|result| result.get("file_id")) + .and_then(|id| id.as_str()); + + assert_eq!(request_id, Some("3dd8a6d0-a6f6-4622-9f9c-53d84fd0c7ad")); + assert_eq!(file_id, Some("file:///workspace/frontend/package.json")); +} diff --git a/vulnera-sast/examples/scale_benchmark.rs b/vulnera-sast/examples/scale_benchmark.rs index f3e33997..0f8dc987 100644 --- a/vulnera-sast/examples/scale_benchmark.rs +++ b/vulnera-sast/examples/scale_benchmark.rs @@ -61,14 +61,8 @@ async fn run() -> Result<(), String> { println!("baseline_parallelism={baseline_parallelism}"); println!("tuned_parallelism={tuned_parallelism}"); - let baseline = benchmark_profile( - "baseline", - &target, - iterations, - depth, - baseline_parallelism, - ) - .await?; + let baseline = + benchmark_profile("baseline", &target, iterations, depth, baseline_parallelism).await?; let tuned = benchmark_profile("tuned", &target, iterations, depth, tuned_parallelism).await?; @@ -150,7 +144,9 @@ async fn benchmark_profile( let avg_ms = durations.iter().copied().map(|v| v as f64).sum::() / iterations as f64; let p95_index = ((iterations as f64) * 0.95).ceil() as usize; - let p95_ms = durations[p95_index.saturating_sub(1).min(durations.len().saturating_sub(1))]; + let p95_ms = durations[p95_index + .saturating_sub(1) + .min(durations.len().saturating_sub(1))]; let avg_files_scanned = total_files_scanned as f64 / iterations as f64; let avg_findings = total_findings as f64 / iterations as f64; diff --git a/vulnera-sast/src/application/use_cases.rs b/vulnera-sast/src/application/use_cases.rs index c1d4e280..308ad6f8 100644 --- a/vulnera-sast/src/application/use_cases.rs +++ b/vulnera-sast/src/application/use_cases.rs @@ -121,11 +121,7 @@ fn default_js_ts_frontend() -> JavaScriptFrontend { } fn parse_js_ts_frontend(value: Option<&str>) -> JavaScriptFrontend { - match value - .map(str::trim) - .map(str::to_ascii_lowercase) - .as_deref() - { + match value.map(str::trim).map(str::to_ascii_lowercase).as_deref() { Some("tree_sitter") => JavaScriptFrontend::TreeSitter, Some("oxc_preferred") => JavaScriptFrontend::OxcPreferred, _ => default_js_ts_frontend(), @@ -490,10 +486,10 @@ impl ScanProjectUseCase { stage_analysis.files_scanned + stage_analysis.files_skipped, stage_analysis.files_skipped, ); - if let Some(ref state_path) = self.config.incremental_state_path { - if let Err(e) = t.save_to_file(state_path) { - warn!(error = %e, "Failed to save incremental state"); - } + if let Some(ref state_path) = self.config.incremental_state_path + && let Err(e) = t.save_to_file(state_path) + { + warn!(error = %e, "Failed to save incremental state"); } let stats = t.stats(); info!( @@ -567,7 +563,9 @@ impl ScanProjectUseCase { rules, effective_depth, effective_parallelism, - ast_cache_ttl: Duration::from_secs(self.config.ast_cache_ttl_hours.saturating_mul(3600)), + ast_cache_ttl: Duration::from_secs( + self.config.ast_cache_ttl_hours.saturating_mul(3600), + ), }) } @@ -591,20 +589,26 @@ impl ScanProjectUseCase { let results = stream::iter(files.iter().cloned()) .map(|file| async move { let file_path_str = file.path.display().to_string(); - let content = std::fs::read_to_string(&file.path) - .map_err(|e| format!("Failed to read {} in parse/index: {}", file.path.display(), e))?; + let content = std::fs::read_to_string(&file.path).map_err(|e| { + format!( + "Failed to read {} in parse/index: {}", + file.path.display(), + e + ) + })?; let selected_frontend = self.parser_frontend_selector.select(file.language); let mut warnings = Vec::new(); - if selected_frontend == ParserFrontend::Oxc && OxcFrontend::supports(file.language) { - if let Err(err) = self.oxc_frontend.parse_file(&file.path, &content) { - warnings.push(format!( - "OXC parse warning for {}: {}", - file.path.display(), - err - )); - } + if selected_frontend == ParserFrontend::Oxc + && OxcFrontend::supports(file.language) + && let Err(err) = self.oxc_frontend.parse_file(&file.path, &content) + { + warnings.push(format!( + "OXC parse warning for {}: {}", + file.path.display(), + err + )); } let tree = self @@ -633,16 +637,16 @@ impl ScanProjectUseCase { errors.push(warning); } - if self.config.enable_ast_cache { - if let Some(cache) = self.ast_cache.as_ref() { - let content_hash = Self::compute_content_hash(&content); - let ast = convert_tree_sitter_node(tree.root_node(), &content, None); - if let Err(e) = cache - .set(&content_hash, &language, &ast, Some(ast_cache_ttl)) - .await - { - warn!(error = %e, "Failed to write L2 AST cache"); - } + if self.config.enable_ast_cache + && let Some(cache) = self.ast_cache.as_ref() + { + let content_hash = Self::compute_content_hash(&content); + let ast = convert_tree_sitter_node(tree.root_node(), &content, None); + if let Err(e) = cache + .set(&content_hash, &language, &ast, Some(ast_cache_ttl)) + .await + { + warn!(error = %e, "Failed to write L2 AST cache"); } } @@ -740,20 +744,21 @@ impl ScanProjectUseCase { } }; - if selected_frontend == ParserFrontend::Oxc && OxcFrontend::supports(file.language) { - if let Err(err) = self.oxc_frontend.parse_file(&file.path, &content) { - warn!( - file = %file.path.display(), - language = %file.language, - error = %err, - "OXC parse failed, continuing with Tree-sitter compatibility lane" - ); - stage.errors.push(format!( - "OXC parse warning for {}: {}", - file.path.display(), - err - )); - } + if selected_frontend == ParserFrontend::Oxc + && OxcFrontend::supports(file.language) + && let Err(err) = self.oxc_frontend.parse_file(&file.path, &content) + { + warn!( + file = %file.path.display(), + language = %file.language, + error = %err, + "OXC parse failed, continuing with Tree-sitter compatibility lane" + ); + stage.errors.push(format!( + "OXC parse warning for {}: {}", + file.path.display(), + err + )); } let file_path_str = file.path.display().to_string(); @@ -782,7 +787,9 @@ impl ScanProjectUseCase { } } - let mut cached_tree = parsed_files.get(&file_path_str).map(|(tree, _)| tree.clone()); + let mut cached_tree = parsed_files + .get(&file_path_str) + .map(|(tree, _)| tree.clone()); let mut l2_hit = false; if cached_tree.is_some() { @@ -806,15 +813,16 @@ impl ScanProjectUseCase { } if let Ok(tree) = self.sast_engine.parse(&content, file.language).await { - if self.config.enable_ast_cache && !l2_hit { - if let Some(cache) = self.ast_cache.as_ref() { - let ast = convert_tree_sitter_node(tree.root_node(), &content, None); - if let Err(e) = cache - .set(&content_hash, &file.language, &ast, Some(ast_cache_ttl)) - .await - { - warn!(error = %e, "Failed to write L2 AST cache"); - } + if self.config.enable_ast_cache + && !l2_hit + && let Some(cache) = self.ast_cache.as_ref() + { + let ast = convert_tree_sitter_node(tree.root_node(), &content, None); + if let Err(e) = cache + .set(&content_hash, &file.language, &ast, Some(ast_cache_ttl)) + .await + { + warn!(error = %e, "Failed to write L2 AST cache"); } } cached_tree = Some(tree); @@ -848,9 +856,11 @@ impl ScanProjectUseCase { .await { warn!(file = %file.path.display(), error = %e, "Tree-sitter analysis failed"); - stage - .errors - .push(format!("Analysis failed for {}: {}", file.path.display(), e)); + stage.errors.push(format!( + "Analysis failed for {}: {}", + file.path.display(), + e + )); } if self.config.enable_data_flow && effective_depth != AnalysisDepth::Quick { @@ -879,24 +889,23 @@ impl ScanProjectUseCase { ); } - if let Some(max_total) = self.config.max_total_findings { - if stage.findings.len() >= max_total { - info!( - total_findings = stage.findings.len(), - max_total, - "Max total findings limit reached, stopping scan early" + if let Some(max_total) = self.config.max_total_findings + && stage.findings.len() >= max_total + { + info!( + total_findings = stage.findings.len(), + max_total, "Max total findings limit reached, stopping scan early" + ); + let mut tracker = self.incremental_tracker.lock().unwrap(); + if let Some(ref mut t) = *tracker { + t.record_file( + &file_path_str, + content_hash, + content.len() as u64, + file_finding_count, ); - let mut tracker = self.incremental_tracker.lock().unwrap(); - if let Some(ref mut t) = *tracker { - t.record_file( - &file_path_str, - content_hash, - content.len() as u64, - file_finding_count, - ); - } - break; } + break; } { @@ -1484,17 +1493,17 @@ impl ScanProjectUseCase { let sink_var = sink.variable_name.as_deref().unwrap_or(&sink.matched_text); - if analyzer.is_tainted(sink_var) { - if let Some(data_flow_finding) = analyzer.check_sink( + if analyzer.is_tainted(sink_var) + && let Some(data_flow_finding) = analyzer.check_sink( sink_var, &sink.pattern_name, file_str, sink.line as u32 + 1, sink.column as u32, - ) { - Self::add_finding(findings, &data_flow_finding, sink, file_str, language); - continue; - } + ) + { + Self::add_finding(findings, &data_flow_finding, sink, file_str, language); + continue; } let active_taints: Vec = analyzer @@ -1514,16 +1523,16 @@ impl ScanProjectUseCase { .or_else(|_| regex_cache::get_regex(tainted_var)) .unwrap(); - if re.is_match(&sink.matched_text) { - if let Some(data_flow_finding) = analyzer.check_sink( + if re.is_match(&sink.matched_text) + && let Some(data_flow_finding) = analyzer.check_sink( tainted_var, &sink.pattern_name, file_str, sink.line as u32 + 1, sink.column as u32, - ) { - Self::add_finding(findings, &data_flow_finding, sink, file_str, language); - } + ) + { + Self::add_finding(findings, &data_flow_finding, sink, file_str, language); } } } diff --git a/vulnera-sast/src/infrastructure/call_graph.rs b/vulnera-sast/src/infrastructure/call_graph.rs index f415cedd..6b9ce279 100644 --- a/vulnera-sast/src/infrastructure/call_graph.rs +++ b/vulnera-sast/src/infrastructure/call_graph.rs @@ -118,12 +118,12 @@ impl CallGraph { let mut queue = vec![start_id.to_string()]; while let Some(current) = queue.pop() { - if visited.insert(current.clone()) { - if let Some(calls) = self.edges.get(¤t) { - for call in calls { - if !visited.contains(&call.target_id) { - queue.push(call.target_id.clone()); - } + if visited.insert(current.clone()) + && let Some(calls) = self.edges.get(¤t) + { + for call in calls { + if !visited.contains(&call.target_id) { + queue.push(call.target_id.clone()); } } } @@ -138,12 +138,12 @@ impl CallGraph { let mut queue = vec![target_id.to_string()]; while let Some(current) = queue.pop() { - if visited.insert(current.clone()) { - if let Some(callers) = self.reverse_edges.get(¤t) { - for caller in callers { - if !visited.contains(caller) { - queue.push(caller.clone()); - } + if visited.insert(current.clone()) + && let Some(callers) = self.reverse_edges.get(¤t) + { + for caller in callers { + if !visited.contains(caller) { + queue.push(caller.clone()); } } } @@ -315,9 +315,7 @@ impl CallGraph { .collect(); ranked.sort_by(|(lhs_id, lhs_score), (rhs_id, rhs_score)| { - rhs_score - .cmp(lhs_score) - .then_with(|| lhs_id.cmp(rhs_id)) + rhs_score.cmp(lhs_score).then_with(|| lhs_id.cmp(rhs_id)) }); if let Some((best_id, _)) = ranked.first() { @@ -335,29 +333,30 @@ impl CallGraph { score += 120; } - if let Some(caller_dir) = std::path::Path::new(&call.caller_file_path).parent() { - if node.file_path.starts_with(caller_dir.to_string_lossy().as_ref()) { - score += 70; - } + if let Some(caller_dir) = std::path::Path::new(&call.caller_file_path).parent() + && node + .file_path + .starts_with(caller_dir.to_string_lossy().as_ref()) + { + score += 70; } - if let Some(ref hint) = call.module_hint { - if !hint.is_empty() - && (node.file_path.contains(hint) - || std::path::Path::new(&node.file_path) - .file_stem() - .and_then(|s| s.to_str()) - .map(|stem| stem == hint) - .unwrap_or(false)) - { - score += 90; - } + if let Some(ref hint) = call.module_hint + && !hint.is_empty() + && (node.file_path.contains(hint) + || std::path::Path::new(&node.file_path) + .file_stem() + .and_then(|s| s.to_str()) + .map(|stem| stem == hint) + .unwrap_or(false)) + { + score += 90; } - if let Some(ref scope) = call.caller_scope { - if node.id.contains(&format!("::{}::", scope)) { - score += 100; - } + if let Some(ref scope) = call.caller_scope + && node.id.contains(&format!("::{}::", scope)) + { + score += 100; } score + Self::path_similarity_bonus(&call.caller_file_path, &node.file_path) @@ -582,64 +581,61 @@ impl CallGraphBuilder { // 1. Extract class/struct contexts first let mut class_contexts: Vec = Vec::new(); - if let Some(class_query) = class_query_str { - if let Ok(query) = + if let Some(class_query) = class_query_str + && let Ok(query) = crate::infrastructure::query_engine::compile_query(class_query, language) - { - let matches = - crate::infrastructure::query_engine::execute_query(&query, tree, source_bytes); - for m in matches { - // Look for class.name, type.name, or struct.name depending on language - let class_name = m - .captures - .get("class.name") - .or_else(|| m.captures.get("type.name")) - .or_else(|| m.captures.get("struct.name")); - - if let Some(name_node) = class_name { - let name = source[name_node.start_byte..name_node.end_byte].to_string(); - class_contexts.push(ClassContext { - name, - start_byte: m.start_byte, - end_byte: m.end_byte, - }); - } + { + let matches = + crate::infrastructure::query_engine::execute_query(&query, tree, source_bytes); + for m in matches { + // Look for class.name, type.name, or struct.name depending on language + let class_name = m + .captures + .get("class.name") + .or_else(|| m.captures.get("type.name")) + .or_else(|| m.captures.get("struct.name")); + + if let Some(name_node) = class_name { + let name = source[name_node.start_byte..name_node.end_byte].to_string(); + class_contexts.push(ClassContext { + name, + start_byte: m.start_byte, + end_byte: m.end_byte, + }); } } } // 2. Extract parameters for functions (build a map: func_name -> params) let mut function_params: HashMap> = HashMap::new(); - if let Some(param_query) = param_query_str { - if let Ok(query) = + if let Some(param_query) = param_query_str + && let Ok(query) = crate::infrastructure::query_engine::compile_query(param_query, language) - { - let matches = - crate::infrastructure::query_engine::execute_query(&query, tree, source_bytes); - for m in matches { - let name_node = m.captures.get("name"); - let param_node = m.captures.get("param.name"); - let type_node = m.captures.get("param.type"); - - if let (Some(name_n), Some(param_n)) = (name_node, param_node) { - let func_name = source[name_n.start_byte..name_n.end_byte].to_string(); - let param_name = source[param_n.start_byte..param_n.end_byte].to_string(); - - // Extract type if available - let type_hint = - type_node.map(|t| source[t.start_byte..t.end_byte].to_string()); - - let param_info = ParameterInfo { - name: param_name, - type_hint, - default_value: None, // Default values not extracted currently - }; - - function_params - .entry(func_name) - .or_default() - .push(param_info); - } + { + let matches = + crate::infrastructure::query_engine::execute_query(&query, tree, source_bytes); + for m in matches { + let name_node = m.captures.get("name"); + let param_node = m.captures.get("param.name"); + let type_node = m.captures.get("param.type"); + + if let (Some(name_n), Some(param_n)) = (name_node, param_node) { + let func_name = source[name_n.start_byte..name_n.end_byte].to_string(); + let param_name = source[param_n.start_byte..param_n.end_byte].to_string(); + + // Extract type if available + let type_hint = type_node.map(|t| source[t.start_byte..t.end_byte].to_string()); + + let param_info = ParameterInfo { + name: param_name, + type_hint, + default_value: None, // Default values not extracted currently + }; + + function_params + .entry(func_name) + .or_default() + .push(param_info); } } } diff --git a/vulnera-sast/src/infrastructure/data_flow.rs b/vulnera-sast/src/infrastructure/data_flow.rs index 16f18594..9804a5ae 100644 --- a/vulnera-sast/src/infrastructure/data_flow.rs +++ b/vulnera-sast/src/infrastructure/data_flow.rs @@ -478,10 +478,10 @@ impl InterProceduralContext { for label in &return_state.labels { // Check if this label's source indicates a parameter // Parameter sources are encoded as "param:N" in the source field - if let Some(param_str) = label.source.strip_prefix("param:") { - if let Ok(param_idx) = param_str.parse::() { - summary.params_to_return.insert(param_idx); - } + if let Some(param_str) = label.source.strip_prefix("param:") + && let Ok(param_idx) = param_str.parse::() + { + summary.params_to_return.insert(param_idx); } } } @@ -517,23 +517,22 @@ impl InterProceduralContext { // If param is tainted and return is tainted, check for flow // by examining if any taint state has labels from this param - if summary.return_tainted { - if let Some(analyzer) = self.function_contexts.get(function_id) { - let param_source = format!("param:{}", param_idx); - - for (_, taint_state) in - analyzer.symbol_table().get_all_tainted_in_all_scopes() - { - // Check if this state has labels from the parameter - let from_param = - taint_state.labels.iter().any(|l| l.source == param_source); - - // If any taint state from this param exists, and return is tainted, - // mark parameter as flowing to return - if from_param { - summary.params_to_return.insert(param_idx); - break; - } + if summary.return_tainted + && let Some(analyzer) = self.function_contexts.get(function_id) + { + let param_source = format!("param:{}", param_idx); + + for (_, taint_state) in analyzer.symbol_table().get_all_tainted_in_all_scopes() + { + // Check if this state has labels from the parameter + let from_param = + taint_state.labels.iter().any(|l| l.source == param_source); + + // If any taint state from this param exists, and return is tainted, + // mark parameter as flowing to return + if from_param { + summary.params_to_return.insert(param_idx); + break; } } } diff --git a/vulnera-sast/src/infrastructure/metavar_patterns.rs b/vulnera-sast/src/infrastructure/metavar_patterns.rs index 18908ca1..4a8a2a94 100644 --- a/vulnera-sast/src/infrastructure/metavar_patterns.rs +++ b/vulnera-sast/src/infrastructure/metavar_patterns.rs @@ -216,34 +216,35 @@ fn detect_structure(tokens: &[MetavarToken]) -> PatternStructure { } // Check for function call: name(...) or $VAR(...) - if filtered.len() >= 3 { - if let (first, MetavarToken::OpenParen) = (&filtered[0], &filtered[1]) { - match first { - MetavarToken::Identifier(name) => { - return PatternStructure::FunctionCall { - name: name.clone(), - is_metavar: false, - }; - } - MetavarToken::Metavar(name) => { - return PatternStructure::FunctionCall { - name: name.clone(), - is_metavar: true, - }; - } - _ => {} + if filtered.len() >= 3 + && let (first, MetavarToken::OpenParen) = (&filtered[0], &filtered[1]) + { + match first { + MetavarToken::Identifier(name) => { + return PatternStructure::FunctionCall { + name: name.clone(), + is_metavar: false, + }; + } + MetavarToken::Metavar(name) => { + return PatternStructure::FunctionCall { + name: name.clone(), + is_metavar: true, + }; } + _ => {} } } // Check for binary expression: expr op expr for (i, token) in filtered.iter().enumerate() { - if let MetavarToken::Operator(op) = token { - if i > 0 && i < filtered.len() - 1 { - return PatternStructure::BinaryExpression { - operator: op.clone(), - }; - } + if let MetavarToken::Operator(op) = token + && i > 0 + && i < filtered.len() - 1 + { + return PatternStructure::BinaryExpression { + operator: op.clone(), + }; } } @@ -268,11 +269,11 @@ fn extract_metavar_indices(tokens: &[MetavarToken]) -> HashMap { let mut counter = 0; for token in tokens { - if let MetavarToken::Metavar(name) = token { - if !indices.contains_key(name) { - indices.insert(name.clone(), counter); - counter += 1; - } + if let MetavarToken::Metavar(name) = token + && !indices.contains_key(name) + { + indices.insert(name.clone(), counter); + counter += 1; } } diff --git a/vulnera-sast/src/infrastructure/oxc_frontend.rs b/vulnera-sast/src/infrastructure/oxc_frontend.rs index 5bdc6526..0e5d0f21 100644 --- a/vulnera-sast/src/infrastructure/oxc_frontend.rs +++ b/vulnera-sast/src/infrastructure/oxc_frontend.rs @@ -30,10 +30,7 @@ impl OxcFrontend { let parser_return = Parser::new(&allocator, source, source_type).parse(); if parser_return.panicked { - return Err(format!( - "OXC parser panicked for '{}'", - file_path.display() - )); + return Err(format!("OXC parser panicked for '{}'", file_path.display())); } if parser_return.errors.is_empty() { diff --git a/vulnera-sast/src/infrastructure/sarif.rs b/vulnera-sast/src/infrastructure/sarif.rs index a8c85393..4829ede1 100644 --- a/vulnera-sast/src/infrastructure/sarif.rs +++ b/vulnera-sast/src/infrastructure/sarif.rs @@ -273,18 +273,18 @@ impl SarifExporter { /// Extract code snippet from finding description fn extract_snippet_from_description(&self, description: &str) -> Option { // Look for code blocks in the description - if let Some(start) = description.find("```") { - if let Some(end) = description[start + 3..].find("```") { - let code = &description[start + 3..start + 3 + end]; - // Remove language identifier if present - let code = code - .lines() - .skip_while(|l| l.chars().all(|c| c.is_alphanumeric())) - .collect::>() - .join("\n"); - if !code.trim().is_empty() { - return Some(code.trim().to_string()); - } + if let Some(start) = description.find("```") + && let Some(end) = description[start + 3..].find("```") + { + let code = &description[start + 3..start + 3 + end]; + // Remove language identifier if present + let code = code + .lines() + .skip_while(|l| l.chars().all(|c| c.is_alphanumeric())) + .collect::>() + .join("\n"); + if !code.trim().is_empty() { + return Some(code.trim().to_string()); } } diff --git a/vulnera-sast/src/infrastructure/scanner.rs b/vulnera-sast/src/infrastructure/scanner.rs index a10b2b26..6e74d275 100644 --- a/vulnera-sast/src/infrastructure/scanner.rs +++ b/vulnera-sast/src/infrastructure/scanner.rs @@ -50,24 +50,23 @@ impl DirectoryScanner { let path = entry.path(); // Skip excluded directories - if entry.file_type().is_dir() { - if let Some(dir_name) = path.file_name().and_then(|n| n.to_str()) { - if self.exclude_patterns.iter().any(|p| dir_name.contains(p)) { - trace!(directory = %dir_name, "Excluding directory"); - excluded_count += 1; - continue; - } - } + if entry.file_type().is_dir() + && let Some(dir_name) = path.file_name().and_then(|n| n.to_str()) + && self.exclude_patterns.iter().any(|p| dir_name.contains(p)) + { + trace!(directory = %dir_name, "Excluding directory"); + excluded_count += 1; + continue; } - if entry.file_type().is_file() { - if let Some(language) = Language::from_filename(path.to_string_lossy().as_ref()) { - trace!(file = %path.display(), language = ?language, "Found scannable file"); - files.push(ScanFile { - path: path.to_path_buf(), - language, - }); - } + if entry.file_type().is_file() + && let Some(language) = Language::from_filename(path.to_string_lossy().as_ref()) + { + trace!(file = %path.display(), language = ?language, "Found scannable file"); + files.push(ScanFile { + path: path.to_path_buf(), + language, + }); } } diff --git a/vulnera-sast/src/infrastructure/symbol_table.rs b/vulnera-sast/src/infrastructure/symbol_table.rs index e735ccfa..16c0411a 100644 --- a/vulnera-sast/src/infrastructure/symbol_table.rs +++ b/vulnera-sast/src/infrastructure/symbol_table.rs @@ -431,10 +431,10 @@ impl SymbolTable { /// Get taint state by name across all scopes (ignores current scope) pub fn get_taint_any_scope(&self, name: &str) -> Option<&TaintState> { for scope in &self.scopes { - if let Some(symbol) = scope.resolve(name) { - if let Some(taint) = symbol.taint_state() { - return Some(taint); - } + if let Some(symbol) = scope.resolve(name) + && let Some(taint) = symbol.taint_state() + { + return Some(taint); } } None @@ -499,12 +499,11 @@ impl SymbolTable { /// Record a use of a symbol at a location pub fn record_use(&mut self, name: &str, location: Location) { - if let Some(scope_id) = self.find_symbol_scope(name) { - if let Some(scope) = self.scopes.get_mut(scope_id) { - if let Some(symbol) = scope.resolve_mut(name) { - symbol.record_use(location); - } - } + if let Some(scope_id) = self.find_symbol_scope(name) + && let Some(scope) = self.scopes.get_mut(scope_id) + && let Some(symbol) = scope.resolve_mut(name) + { + symbol.record_use(location); } } @@ -1366,21 +1365,21 @@ impl<'a> SymbolTableBuilder<'a> { // import { a, b } from "module" let mut cursor = node.walk(); for child in node.children(&mut cursor) { - if child.kind() == "import_specifier" { - if let Some(name_node) = child.child_by_field_name("name") { - let name = self.node_text(name_node); - let location = self.node_location(name_node); + if child.kind() == "import_specifier" + && let Some(name_node) = child.child_by_field_name("name") + { + let name = self.node_text(name_node); + let location = self.node_location(name_node); - let symbol = Symbol::new( - name, - SymbolKind::Import, - self.table.current_scope_id(), - location, - ) - .with_mutable(false); + let symbol = Symbol::new( + name, + SymbolKind::Import, + self.table.current_scope_id(), + location, + ) + .with_mutable(false); - let _ = self.table.declare(symbol); - } + let _ = self.table.declare(symbol); } } } @@ -2589,13 +2588,13 @@ impl<'a> SymbolTableBuilder<'a> { fn handle_c_function_parameters(&mut self, node: Node) { // Extract parameter declarations from function declarator - if node.kind() == "function_declarator" { - if let Some(params) = node.child_by_field_name("parameters") { - let mut cursor = params.walk(); - for child in params.children(&mut cursor) { - if child.kind() == "parameter_declaration" { - self.handle_c_parameter(child); - } + if node.kind() == "function_declarator" + && let Some(params) = node.child_by_field_name("parameters") + { + let mut cursor = params.walk(); + for child in params.children(&mut cursor) { + if child.kind() == "parameter_declaration" { + self.handle_c_parameter(child); } } } @@ -2822,23 +2821,22 @@ impl<'a> SymbolTableBuilder<'a> { if let Some(params) = node.child_by_field_name("parameters") { let mut cursor = params.walk(); for child in params.children(&mut cursor) { - if child.kind() == "type_parameter_declaration" - || child.kind() == "parameter_declaration" + if (child.kind() == "type_parameter_declaration" + || child.kind() == "parameter_declaration") + && let Some(name_node) = child.child_by_field_name("name") { - if let Some(name_node) = child.child_by_field_name("name") { - let name = self.node_text(name_node); - let location = self.node_location(name_node); + let name = self.node_text(name_node); + let location = self.node_location(name_node); - let symbol = Symbol::new( - name, - SymbolKind::TypeAlias, - self.table.current_scope_id(), - location, - ) - .with_mutable(false); + let symbol = Symbol::new( + name, + SymbolKind::TypeAlias, + self.table.current_scope_id(), + location, + ) + .with_mutable(false); - let _ = self.table.declare(symbol); - } + let _ = self.table.declare(symbol); } } } @@ -2906,15 +2904,15 @@ impl<'a> SymbolTableBuilder<'a> { fn handle_cpp_function_parameters(&mut self, node: Node) { // Extract parameter declarations - if node.kind() == "function_declarator" { - if let Some(params) = node.child_by_field_name("parameters") { - let mut cursor = params.walk(); - for child in params.children(&mut cursor) { - if child.kind() == "parameter_declaration" - || child.kind() == "optional_parameter_declaration" - { - self.handle_cpp_parameter(child); - } + if node.kind() == "function_declarator" + && let Some(params) = node.child_by_field_name("parameters") + { + let mut cursor = params.walk(); + for child in params.children(&mut cursor) { + if child.kind() == "parameter_declaration" + || child.kind() == "optional_parameter_declaration" + { + self.handle_cpp_parameter(child); } } } diff --git a/vulnera-sast/src/module.rs b/vulnera-sast/src/module.rs index 3316072b..963a2093 100644 --- a/vulnera-sast/src/module.rs +++ b/vulnera-sast/src/module.rs @@ -82,7 +82,8 @@ impl SastModuleBuilder { let use_case = if let Some(uc) = use_case_override { uc } else { - let analysis_cfg = analysis_config.unwrap_or_else(|| AnalysisConfig::from(&sast_config)); + let analysis_cfg = + analysis_config.unwrap_or_else(|| AnalysisConfig::from(&sast_config)); let uc = ScanProjectUseCase::with_config(&sast_config, analysis_cfg); let uc = if let Some(cache) = ast_cache { diff --git a/vulnera-secrets/src/application/use_cases.rs b/vulnera-secrets/src/application/use_cases.rs index 21fb6841..0bb5d21e 100644 --- a/vulnera-secrets/src/application/use_cases.rs +++ b/vulnera-secrets/src/application/use_cases.rs @@ -455,12 +455,12 @@ impl ScanForSecretsUseCase { }; // Optionally strip markdown code blocks if Markdown files are being scanned - if !scan_markdown_codeblocks { - if let Some(ext) = file.path.extension().and_then(|e| e.to_str()) { - let ext_lower = ext.to_lowercase(); - if ext_lower == "md" || ext_lower == "markdown" { - content = Self::strip_markdown_code_blocks(&content); - } + if !scan_markdown_codeblocks + && let Some(ext) = file.path.extension().and_then(|e| e.to_str()) + { + let ext_lower = ext.to_lowercase(); + if ext_lower == "md" || ext_lower == "markdown" { + content = Self::strip_markdown_code_blocks(&content); } } @@ -503,13 +503,12 @@ impl ScanForSecretsUseCase { return Some("allowlist:global".to_string()); } - if let Some(patterns) = self.rule_allowlist_patterns.get(&finding.rule_id) { - if patterns + if let Some(patterns) = self.rule_allowlist_patterns.get(&finding.rule_id) + && patterns .iter() .any(|pattern| pattern.is_match(&finding.matched_secret)) - { - return Some(format!("allowlist:rule:{}", finding.rule_id)); - } + { + return Some(format!("allowlist:rule:{}", finding.rule_id)); } None diff --git a/vulnera-secrets/src/domain/value_objects.rs b/vulnera-secrets/src/domain/value_objects.rs index 26395413..260c4578 100644 --- a/vulnera-secrets/src/domain/value_objects.rs +++ b/vulnera-secrets/src/domain/value_objects.rs @@ -78,7 +78,7 @@ impl Entropy { || c == '-' || c == '_' }) - && data.len() % 4 == 0 + && data.len().is_multiple_of(4) } /// Check if a string looks like hex diff --git a/vulnera-secrets/src/infrastructure/baseline/repository.rs b/vulnera-secrets/src/infrastructure/baseline/repository.rs index f631b3a0..a61480f8 100644 --- a/vulnera-secrets/src/infrastructure/baseline/repository.rs +++ b/vulnera-secrets/src/infrastructure/baseline/repository.rs @@ -150,13 +150,12 @@ impl FileBaselineRepository { } // Update modification time - if self.file_path.exists() { - if let Ok(metadata) = std::fs::metadata(&self.file_path) { - if let Ok(modified) = metadata.modified() { - let mut last_mod = self.last_modified.write().unwrap(); - *last_mod = Some(modified); - } - } + if self.file_path.exists() + && let Ok(metadata) = std::fs::metadata(&self.file_path) + && let Ok(modified) = metadata.modified() + { + let mut last_mod = self.last_modified.write().unwrap(); + *last_mod = Some(modified); } Ok(baseline) @@ -255,11 +254,11 @@ impl BaselineRepository for FileBaselineRepository { } // Update modification time - if let Ok(metadata) = std::fs::metadata(&self.file_path) { - if let Ok(modified) = metadata.modified() { - let mut last_mod = self.last_modified.write().unwrap(); - *last_mod = Some(modified); - } + if let Ok(metadata) = std::fs::metadata(&self.file_path) + && let Ok(modified) = metadata.modified() + { + let mut last_mod = self.last_modified.write().unwrap(); + *last_mod = Some(modified); } info!( diff --git a/vulnera-secrets/src/infrastructure/detectors/ast_extractor.rs b/vulnera-secrets/src/infrastructure/detectors/ast_extractor.rs index 60ee3893..4814a95b 100644 --- a/vulnera-secrets/src/infrastructure/detectors/ast_extractor.rs +++ b/vulnera-secrets/src/infrastructure/detectors/ast_extractor.rs @@ -230,18 +230,16 @@ impl AstContextExtractor { | "short_var_declaration" ) { - if let Some(first) = parent.named_child(0) { - if first.start_byte() < node.start_byte() { - lhs_variable = - Some(source[first.start_byte()..first.end_byte()].to_string()); - } + if let Some(first) = parent.named_child(0) + && first.start_byte() < node.start_byte() + { + lhs_variable = Some(source[first.start_byte()..first.end_byte()].to_string()); } if let Some(last) = parent.named_child(parent.named_child_count().saturating_sub(1) as u32) + && last.start_byte() >= node.start_byte() { - if last.start_byte() >= node.start_byte() { - rhs_value = Some(source[last.start_byte()..last.end_byte()].to_string()); - } + rhs_value = Some(source[last.start_byte()..last.end_byte()].to_string()); } if lhs_variable.is_some() || rhs_value.is_some() { break; diff --git a/vulnera-secrets/src/infrastructure/git/scanner.rs b/vulnera-secrets/src/infrastructure/git/scanner.rs index 2f172dda..2c5b4760 100644 --- a/vulnera-secrets/src/infrastructure/git/scanner.rs +++ b/vulnera-secrets/src/infrastructure/git/scanner.rs @@ -82,11 +82,11 @@ impl GitScanner { for oid in oids { // Check max commits limit - if let Some(max) = self.max_commits { - if commit_count >= max { - debug!("Reached max commits limit: {}", max); - break; - } + if let Some(max) = self.max_commits + && commit_count >= max + { + debug!("Reached max commits limit: {}", max); + break; } // Extract commit data and lines to scan synchronously @@ -286,23 +286,23 @@ impl GitScanner { // Build a scanable hunk content stream using context and added lines. // Removed lines are skipped because they do not exist in the new file version. - if line.origin() == '+' || line.origin() == ' ' { - if let Ok(content) = std::str::from_utf8(line.content()) { - builder.content.push_str(content); - if !content.ends_with('\n') { - builder.content.push('\n'); - } - - if line.origin() == '+' { - if let Some(actual_line) = line.new_lineno() { - builder - .added_line_map - .insert(builder.snippet_line, actual_line); - } - } + if (line.origin() == '+' || line.origin() == ' ') + && let Ok(content) = std::str::from_utf8(line.content()) + { + builder.content.push_str(content); + if !content.ends_with('\n') { + builder.content.push('\n'); + } - builder.snippet_line = builder.snippet_line.saturating_add(1); + if line.origin() == '+' + && let Some(actual_line) = line.new_lineno() + { + builder + .added_line_map + .insert(builder.snippet_line, actual_line); } + + builder.snippet_line = builder.snippet_line.saturating_add(1); } true diff --git a/vulnera-secrets/src/infrastructure/scanner.rs b/vulnera-secrets/src/infrastructure/scanner.rs index 84fccf0b..e973038a 100644 --- a/vulnera-secrets/src/infrastructure/scanner.rs +++ b/vulnera-secrets/src/infrastructure/scanner.rs @@ -65,15 +65,14 @@ impl DirectoryScanner { // Use filter_entry to skip excluded directories efficiently let it = walker.into_iter().filter_entry(|e| { - if e.file_type().is_dir() { - if let Some(dir_name) = e.file_name().to_str() { - if self.exclude_patterns.iter().any(|p| { - dir_name.contains(p) - || p.contains('*') && dir_name.matches(&p.replace('*', "")).count() > 0 - }) { - return false; - } - } + if e.file_type().is_dir() + && let Some(dir_name) = e.file_name().to_str() + && self.exclude_patterns.iter().any(|p| { + dir_name.contains(p) + || p.contains('*') && dir_name.matches(&p.replace('*', "")).count() > 0 + }) + { + return false; } true }); diff --git a/vulnera-secrets/src/infrastructure/verification/aws_verifier.rs b/vulnera-secrets/src/infrastructure/verification/aws_verifier.rs index 75b585f3..e656081c 100644 --- a/vulnera-secrets/src/infrastructure/verification/aws_verifier.rs +++ b/vulnera-secrets/src/infrastructure/verification/aws_verifier.rs @@ -118,17 +118,17 @@ impl SecretVerifier for AwsVerifier { if let Some(ctx) = context { match secret_type { SecretType::AwsAccessKey => { - if let Some(secret_key) = ctx.get(&SecretType::AwsSecretKey) { - if Self::validate_secret_key_format(secret_key) { - debug!("Found both AWS access key and secret key with valid formats"); - } + if let Some(secret_key) = ctx.get(&SecretType::AwsSecretKey) + && Self::validate_secret_key_format(secret_key) + { + debug!("Found both AWS access key and secret key with valid formats"); } } SecretType::AwsSecretKey => { - if let Some(access_key) = ctx.get(&SecretType::AwsAccessKey) { - if Self::validate_access_key_format(access_key) { - debug!("Found both AWS access key and secret key with valid formats"); - } + if let Some(access_key) = ctx.get(&SecretType::AwsAccessKey) + && Self::validate_access_key_format(access_key) + { + debug!("Found both AWS access key and secret key with valid formats"); } } _ => {} From d484c66c78a82f32930c249948922705076f048a Mon Sep 17 00:00:00 2001 From: k5602 <188656344+k5602@users.noreply.github.com> Date: Sat, 14 Feb 2026 18:02:14 +0200 Subject: [PATCH 5/9] sast: add quality gates and CI tests - Introduce SAST `quality_gates` in config/default.toml for CI/benchmark enforcement - Add `SastQualityGatesConfig` to vulnera-core::config and include in `SastConfig` with defaults - Update vulnera-sast tests to read gates from config and enforce: - aggregate precision/recall and CWE coverage - primary language gates for Python and combined JS/TS - incremental scan latency ratio test - deep-scan resident memory budget (Linux-only, reads /proc for RSS/HWM) - Centralize thresholds so CI can be tuned via config; failing gates will fail the accuracy tests --- config/default.toml | 17 ++ vulnera-core/src/config/mod.rs | 48 +++++ vulnera-sast/tests/test_accuracy_report.rs | 240 ++++++++++++++++++++- 3 files changed, 299 insertions(+), 6 deletions(-) diff --git a/config/default.toml b/config/default.toml index 9137c063..0b4bcc96 100644 --- a/config/default.toml +++ b/config/default.toml @@ -190,6 +190,23 @@ min_finding_confidence = "low" # high | medium | low require_data_flow_evidence_for_dataflow = false require_recommendation = false +[sast.quality_gates] +# Minimum aggregate precision/recall for fixture-based SAST accuracy tests +min_precision = 0.70 +min_recall = 0.50 +# Minimum unique CWE identifiers represented by CVE fixtures +min_cwe_coverage = 12 +# Maximum allowed second/first scan duration ratio for incremental mode +max_incremental_duration_ratio = 1.20 +# Maximum resident memory (RSS, MB) during deep-scan quality-gate tests +max_resident_memory_mb = 2048 +# Enforce focused rollout gates for Python and combined JS/TS quality +enforce_primary_language_gates = true +python_min_precision = 0.75 +python_min_recall = 0.60 +js_ts_min_precision = 0.75 +js_ts_min_recall = 0.60 + # Cache TTL for popular package vulnerability listings (in hours) cache_ttl_hours = 24 diff --git a/vulnera-core/src/config/mod.rs b/vulnera-core/src/config/mod.rs index 4859c7c4..30defe18 100644 --- a/vulnera-core/src/config/mod.rs +++ b/vulnera-core/src/config/mod.rs @@ -654,6 +654,51 @@ impl Default for RulePackConfig { } } +/// SAST quality gate thresholds used for CI and benchmark enforcement. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(default)] +pub struct SastQualityGatesConfig { + /// Minimum aggregate precision required by the SAST accuracy report. + pub min_precision: f64, + /// Minimum aggregate recall required by the SAST accuracy report. + pub min_recall: f64, + /// Minimum unique CWE IDs required across CVE fixtures. + pub min_cwe_coverage: usize, + /// Maximum allowed ratio for incremental scan duration (second_run / first_run). + /// + /// Values < 1.0 require speedup. Values > 1.0 allow some variance. + pub max_incremental_duration_ratio: f64, + /// Maximum allowed resident set size (RSS) in MB during deep-scan quality tests. + pub max_resident_memory_mb: u64, + /// Enforce focused quality gates for primary rollout languages (Python + JS/TS). + pub enforce_primary_language_gates: bool, + /// Minimum precision required for Python fixtures. + pub python_min_precision: f64, + /// Minimum recall required for Python fixtures. + pub python_min_recall: f64, + /// Minimum precision required for combined JavaScript + TypeScript fixtures. + pub js_ts_min_precision: f64, + /// Minimum recall required for combined JavaScript + TypeScript fixtures. + pub js_ts_min_recall: f64, +} + +impl Default for SastQualityGatesConfig { + fn default() -> Self { + Self { + min_precision: 0.70, + min_recall: 0.50, + min_cwe_coverage: 12, + max_incremental_duration_ratio: 1.20, + max_resident_memory_mb: 2048, + enforce_primary_language_gates: true, + python_min_precision: 0.75, + python_min_recall: 0.60, + js_ts_min_precision: 0.75, + js_ts_min_recall: 0.60, + } + } +} + /// SAST (Static Application Security Testing) configuration #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(default)] @@ -719,6 +764,8 @@ pub struct SastConfig { pub enable_incremental: Option, /// Path to store incremental analysis state (file hashes) pub incremental_state_path: Option, + /// CI quality gate thresholds for SAST benchmark and regression tests + pub quality_gates: SastQualityGatesConfig, } impl Default for SastConfig { @@ -771,6 +818,7 @@ impl Default for SastConfig { require_recommendation: false, enable_incremental: Some(false), // Disabled by default incremental_state_path: None, + quality_gates: SastQualityGatesConfig::default(), } } } diff --git a/vulnera-sast/tests/test_accuracy_report.rs b/vulnera-sast/tests/test_accuracy_report.rs index 9962fd51..78df488f 100644 --- a/vulnera-sast/tests/test_accuracy_report.rs +++ b/vulnera-sast/tests/test_accuracy_report.rs @@ -15,6 +15,8 @@ use common::accuracy::AccuracyReport; use common::fixture_runner; use common::fixture_types::CveFixture; use std::path::PathBuf; +use vulnera_core::config::{AnalysisDepth, SastConfig}; +use vulnera_sast::{AnalysisConfig, ScanProjectUseCase}; /// Discover all YAML fixture files under `tests/data/cve-fixtures/`. fn discover_fixtures() -> Vec { @@ -36,6 +38,25 @@ fn discover_fixtures() -> Vec { .collect() } +fn read_linux_status_value_kb(field_name: &str) -> Option { + if !cfg!(target_os = "linux") { + return None; + } + + let status = std::fs::read_to_string("/proc/self/status").ok()?; + status.lines().find_map(|line| { + if !line.starts_with(field_name) { + return None; + } + + let value_kb = line + .split_whitespace() + .nth(1) + .and_then(|v| v.parse::().ok()); + value_kb + }) +} + #[tokio::test] async fn accuracy_report_all_fixtures() { let fixture_paths = discover_fixtures(); @@ -44,8 +65,10 @@ async fn accuracy_report_all_fixtures() { return; } + let quality_gates = SastConfig::default().quality_gates; let mut report = AccuracyReport::new(); let mut fixture_failures: Vec = Vec::new(); + let mut covered_cwes: std::collections::HashSet = std::collections::HashSet::new(); for path in &fixture_paths { let fixture = match CveFixture::from_file(path) { @@ -56,6 +79,10 @@ async fn accuracy_report_all_fixtures() { } }; + for cwe in &fixture.cwe { + covered_cwes.insert(cwe.trim().to_string()); + } + let result = fixture_runner::run_fixture(&fixture).await; let metrics = report.metrics_for(&fixture.language); result.accumulate_metrics(metrics); @@ -90,13 +117,12 @@ async fn accuracy_report_all_fixtures() { "\nAggregate: Precision={precision:.3} Recall={recall:.3} F1={f1:.3} (TP={} TN={} FP={} FN={})", agg.true_positives, agg.true_negatives, agg.false_positives, agg.false_negatives ); + eprintln!("CWE coverage: {} unique IDs", covered_cwes.len()); - // CI Gate thresholds — lenient during bootstrap, tighten over time: - // Phase 4 initial: P ≥ 0.70, R ≥ 0.50 - // Phase 5 target: P ≥ 0.85, R ≥ 0.70 - // Phase 6 target: P ≥ 0.90, R ≥ 0.80 - let min_precision = 0.70; - let min_recall = 0.50; + // CI Gate thresholds are centralized in SastConfig::quality_gates and can be + // tightened over time without changing test logic. + let min_precision = quality_gates.min_precision; + let min_recall = quality_gates.min_recall; assert!( precision >= min_precision, @@ -106,6 +132,74 @@ async fn accuracy_report_all_fixtures() { recall >= min_recall, "CI GATE: Aggregate recall {recall:.3} < {min_recall} threshold" ); + assert!( + covered_cwes.len() >= quality_gates.min_cwe_coverage, + "CI GATE: CWE coverage {} < {} threshold", + covered_cwes.len(), + quality_gates.min_cwe_coverage + ); + + if quality_gates.enforce_primary_language_gates { + let python = report + .per_language + .get("python") + .expect("CI GATE: missing Python fixtures/metrics in accuracy report"); + let python_precision = python.precision().unwrap_or(0.0); + let python_recall = python.recall().unwrap_or(0.0); + + assert!( + python_precision >= quality_gates.python_min_precision, + "CI GATE: Python precision {python_precision:.3} < {:.3}", + quality_gates.python_min_precision + ); + assert!( + python_recall >= quality_gates.python_min_recall, + "CI GATE: Python recall {python_recall:.3} < {:.3}", + quality_gates.python_min_recall + ); + + let js = report.per_language.get("javascript"); + let ts = report.per_language.get("typescript"); + + let js_ts_tp = + js.map_or(0usize, |m| m.true_positives) + ts.map_or(0usize, |m| m.true_positives); + let js_ts_fp = + js.map_or(0usize, |m| m.false_positives) + ts.map_or(0usize, |m| m.false_positives); + let js_ts_fn = + js.map_or(0usize, |m| m.false_negatives) + ts.map_or(0usize, |m| m.false_negatives); + + let js_ts_precision = if js_ts_tp + js_ts_fp == 0 { + 0.0 + } else { + js_ts_tp as f64 / (js_ts_tp + js_ts_fp) as f64 + }; + let js_ts_recall = if js_ts_tp + js_ts_fn == 0 { + 0.0 + } else { + js_ts_tp as f64 / (js_ts_tp + js_ts_fn) as f64 + }; + + assert!( + js_ts_tp + js_ts_fp + js_ts_fn > 0, + "CI GATE: missing JavaScript/TypeScript fixtures for primary language gate" + ); + + eprintln!( + "Primary language gates: python(P={:.3},R={:.3}) js_ts(P={:.3},R={:.3})", + python_precision, python_recall, js_ts_precision, js_ts_recall + ); + + assert!( + js_ts_precision >= quality_gates.js_ts_min_precision, + "CI GATE: JS/TS precision {js_ts_precision:.3} < {:.3}", + quality_gates.js_ts_min_precision + ); + assert!( + js_ts_recall >= quality_gates.js_ts_min_recall, + "CI GATE: JS/TS recall {js_ts_recall:.3} < {:.3}", + quality_gates.js_ts_min_recall + ); + } // Soft check: warn (but don't fail) if any single language is below threshold for (lang, metrics) in &report.per_language { @@ -177,3 +271,137 @@ fn accuracy_metrics_unit_test_aggregation() { // R = 18/(18+5) ≈ 0.783 assert!((agg.recall().unwrap() - 18.0 / 23.0).abs() < 0.001); } + +#[tokio::test] +async fn incremental_scan_latency_quality_gate() { + let temp_dir = tempfile::tempdir().expect("Failed to create temp dir"); + let state_path = temp_dir.path().join("incremental-state.json"); + + // Create a moderately-sized synthetic repo to exercise incremental skipping. + for idx in 0..120usize { + let file_path = temp_dir.path().join(format!("module_{idx}.py")); + std::fs::write( + &file_path, + format!( + "def fn_{idx}(x):\n y = x + 1\n return y\n\nvalue_{idx} = fn_{idx}(41)\n" + ), + ) + .expect("Failed to write synthetic source file"); + } + + let sast_config = SastConfig { + analysis_depth: AnalysisDepth::Quick, + enable_data_flow: false, + enable_call_graph: false, + enable_incremental: Some(true), + incremental_state_path: Some(state_path), + ..Default::default() + }; + let quality_gates = sast_config.quality_gates.clone(); + + let first_use_case = + ScanProjectUseCase::with_config(&sast_config, AnalysisConfig::from(&sast_config)); + + let first = first_use_case + .execute(temp_dir.path()) + .await + .expect("First scan should succeed"); + // Create a fresh use-case so incremental state is reloaded from disk and becomes + // the previous-state baseline for the second run. + let second_use_case = + ScanProjectUseCase::with_config(&sast_config, AnalysisConfig::from(&sast_config)); + let second = second_use_case + .execute(temp_dir.path()) + .await + .expect("Second scan should succeed"); + + let ratio = if first.duration_ms == 0 { + 0.0 + } else { + second.duration_ms as f64 / first.duration_ms as f64 + }; + + eprintln!( + "Incremental timing: first={}ms second={}ms ratio={:.3}", + first.duration_ms, second.duration_ms, ratio + ); + + assert!( + second.files_skipped >= first.files_scanned.saturating_sub(1), + "Expected second incremental scan to skip most files. first_scanned={}, second_skipped={}", + first.files_scanned, + second.files_skipped + ); + + assert!( + ratio <= quality_gates.max_incremental_duration_ratio, + "CI GATE: incremental scan ratio {:.3} exceeds threshold {:.3}", + ratio, + quality_gates.max_incremental_duration_ratio + ); +} + +#[tokio::test] +async fn deep_scan_memory_budget_quality_gate() { + if !cfg!(target_os = "linux") { + eprintln!("⚠ deep_scan_memory_budget_quality_gate skipped: non-Linux target"); + return; + } + + let temp_dir = tempfile::tempdir().expect("Failed to create temp dir"); + + // Build a moderate synthetic project to exercise deep-scan memory behavior. + for idx in 0..180usize { + let file_path = temp_dir.path().join(format!("deep_{idx}.py")); + std::fs::write( + &file_path, + format!( + "import os\n\ndef source_{idx}():\n return os.environ.get('USER_INPUT')\n\ndef sink_{idx}(v):\n eval(v)\n\nx_{idx} = source_{idx}()\nsink_{idx}(x_{idx})\n" + ), + ) + .expect("Failed to write deep scan fixture file"); + } + + let sast_config = SastConfig { + analysis_depth: AnalysisDepth::Deep, + enable_data_flow: true, + enable_call_graph: true, + ..Default::default() + }; + let quality_gates = sast_config.quality_gates.clone(); + + let baseline_rss_kb = read_linux_status_value_kb("VmRSS:").unwrap_or(0); + + let use_case = + ScanProjectUseCase::with_config(&sast_config, AnalysisConfig::from(&sast_config)); + let result = use_case + .execute(temp_dir.path()) + .await + .expect("Deep scan should succeed"); + + let final_rss_kb = read_linux_status_value_kb("VmRSS:").unwrap_or(0); + let peak_hwm_kb = read_linux_status_value_kb("VmHWM:").unwrap_or(final_rss_kb); + + let baseline_rss_mb = baseline_rss_kb as f64 / 1024.0; + let final_rss_mb = final_rss_kb as f64 / 1024.0; + let peak_hwm_mb = peak_hwm_kb as f64 / 1024.0; + let delta_rss_mb = ((final_rss_kb.saturating_sub(baseline_rss_kb)) as f64) / 1024.0; + + eprintln!( + "Deep memory gate: scanned={} findings={} baseline_rss={:.1}MB final_rss={:.1}MB delta={:.1}MB peak_hwm={:.1}MB budget={}MB", + result.files_scanned, + result.findings.len(), + baseline_rss_mb, + final_rss_mb, + delta_rss_mb, + peak_hwm_mb, + quality_gates.max_resident_memory_mb + ); + + assert!( + peak_hwm_mb <= quality_gates.max_resident_memory_mb as f64, + "CI GATE: peak resident memory {:.1}MB exceeds threshold {}MB", + peak_hwm_mb, + quality_gates.max_resident_memory_mb + ); +} From 3a5e667f6a7bba49e50ad7a52c5f12bb89db62d4 Mon Sep 17 00:00:00 2001 From: k5602 <188656344+k5602@users.noreply.github.com> Date: Sat, 14 Feb 2026 18:21:21 +0200 Subject: [PATCH 6/9] feat(sast): per-language quality gates and typing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Introduce per-language precision/recall gates and min_languages_with_fixtures threshold, replacing the focused primary-language gates - Add validation for SAST quality gate fields (precision/recall in [0,1], non-zero memory, non-negative duration ratio) to fail fast on invalid config - Improve semantic type matching: normalize type names, add types_compatible helper, handle identifier aliasing and TypeScript annotations, and use the TypeScript tree-sitter grammar separately from JavaScript - Add a C++ fixture to broaden language coverage and update tests: accuracy report now enforces per-language thresholds and new unit tests cover alias/type annotation semantic constraints BREAKING CHANGE: config keys renamed — update config/default.toml (enforce_primary_language_gates → enforce_per_language_gates, python/js_ts_* keys → per_language_min_precision/ per_language_min_recall) and add min_languages_with_fixtures or rely on new defaults --- config/default.toml | 12 +- vulnera-core/src/config/mod.rs | 109 +++++++++++++++--- .../src/infrastructure/sast_engine.rs | 24 +++- vulnera-sast/src/infrastructure/semantic.rs | 60 +++++++++- .../c_cpp/cpp-command-injection.yaml | 30 +++++ vulnera-sast/tests/test_accuracy_report.rs | 81 ++++--------- vulnera-sast/tests/test_semantic_rules.rs | 109 ++++++++++++++++++ 7 files changed, 341 insertions(+), 84 deletions(-) create mode 100644 vulnera-sast/tests/data/cve-fixtures/c_cpp/cpp-command-injection.yaml diff --git a/config/default.toml b/config/default.toml index 0b4bcc96..88f39b25 100644 --- a/config/default.toml +++ b/config/default.toml @@ -200,12 +200,12 @@ min_cwe_coverage = 12 max_incremental_duration_ratio = 1.20 # Maximum resident memory (RSS, MB) during deep-scan quality-gate tests max_resident_memory_mb = 2048 -# Enforce focused rollout gates for Python and combined JS/TS quality -enforce_primary_language_gates = true -python_min_precision = 0.75 -python_min_recall = 0.60 -js_ts_min_precision = 0.75 -js_ts_min_recall = 0.60 +# Enforce precision/recall gates for each language represented in fixtures +enforce_per_language_gates = true +per_language_min_precision = 0.70 +per_language_min_recall = 0.50 +# Require broad multi-language coverage for benchmark validity +min_languages_with_fixtures = 7 # Cache TTL for popular package vulnerability listings (in hours) diff --git a/vulnera-core/src/config/mod.rs b/vulnera-core/src/config/mod.rs index 30defe18..e78db475 100644 --- a/vulnera-core/src/config/mod.rs +++ b/vulnera-core/src/config/mod.rs @@ -670,16 +670,14 @@ pub struct SastQualityGatesConfig { pub max_incremental_duration_ratio: f64, /// Maximum allowed resident set size (RSS) in MB during deep-scan quality tests. pub max_resident_memory_mb: u64, - /// Enforce focused quality gates for primary rollout languages (Python + JS/TS). - pub enforce_primary_language_gates: bool, - /// Minimum precision required for Python fixtures. - pub python_min_precision: f64, - /// Minimum recall required for Python fixtures. - pub python_min_recall: f64, - /// Minimum precision required for combined JavaScript + TypeScript fixtures. - pub js_ts_min_precision: f64, - /// Minimum recall required for combined JavaScript + TypeScript fixtures. - pub js_ts_min_recall: f64, + /// Enforce precision/recall gates for every language represented in fixtures. + pub enforce_per_language_gates: bool, + /// Minimum precision required per language in fixture-based tests. + pub per_language_min_precision: f64, + /// Minimum recall required per language in fixture-based tests. + pub per_language_min_recall: f64, + /// Minimum number of distinct fixture languages required for benchmark validity. + pub min_languages_with_fixtures: usize, } impl Default for SastQualityGatesConfig { @@ -690,11 +688,10 @@ impl Default for SastQualityGatesConfig { min_cwe_coverage: 12, max_incremental_duration_ratio: 1.20, max_resident_memory_mb: 2048, - enforce_primary_language_gates: true, - python_min_precision: 0.75, - python_min_recall: 0.60, - js_ts_min_precision: 0.75, - js_ts_min_recall: 0.60, + enforce_per_language_gates: true, + per_language_min_precision: 0.70, + per_language_min_recall: 0.50, + min_languages_with_fixtures: 7, } } } @@ -1455,6 +1452,43 @@ impl Validate for Config { "Analytics cleanup_interval_hours must be > 0", )); } + + let gates = &self.sast.quality_gates; + if !(0.0..=1.0).contains(&gates.min_precision) { + return Err(ValidationError::api( + "SAST quality gate min_precision must be in [0.0, 1.0]", + )); + } + if !(0.0..=1.0).contains(&gates.min_recall) { + return Err(ValidationError::api( + "SAST quality gate min_recall must be in [0.0, 1.0]", + )); + } + if !(0.0..=1.0).contains(&gates.per_language_min_precision) { + return Err(ValidationError::api( + "SAST quality gate per_language_min_precision must be in [0.0, 1.0]", + )); + } + if !(0.0..=1.0).contains(&gates.per_language_min_recall) { + return Err(ValidationError::api( + "SAST quality gate per_language_min_recall must be in [0.0, 1.0]", + )); + } + if gates.min_languages_with_fixtures == 0 { + return Err(ValidationError::api( + "SAST quality gate min_languages_with_fixtures must be > 0", + )); + } + if gates.max_incremental_duration_ratio < 0.0 { + return Err(ValidationError::api( + "SAST quality gate max_incremental_duration_ratio must be >= 0.0", + )); + } + if gates.max_resident_memory_mb == 0 { + return Err(ValidationError::api( + "SAST quality gate max_resident_memory_mb must be > 0", + )); + } Ok(()) } } @@ -1499,3 +1533,48 @@ pub enum ConfigLoadError { #[error("Configuration validation error: {0}")] Validation(#[from] ValidationError), } + +#[cfg(test)] +mod tests { + use super::*; + + fn valid_config_for_tests() -> Config { + let mut config = Config::default(); + config.auth.jwt_secret = "a9f7b2c4e6h8k1m3p5r7t9v2x4z6b8d0q1w3e5r7t9y2u4i6".to_string(); + config + } + + #[test] + fn test_quality_gates_validation_accepts_valid_defaults() { + let config = valid_config_for_tests(); + assert!(config.validate().is_ok()); + } + + #[test] + fn test_quality_gates_validation_rejects_invalid_per_language_precision() { + let mut config = valid_config_for_tests(); + config.sast.quality_gates.per_language_min_precision = 1.5; + + let err = config + .validate() + .expect_err("config validation should fail"); + assert!( + err.to_string().contains("per_language_min_precision"), + "unexpected validation error: {err}" + ); + } + + #[test] + fn test_quality_gates_validation_rejects_zero_language_coverage_requirement() { + let mut config = valid_config_for_tests(); + config.sast.quality_gates.min_languages_with_fixtures = 0; + + let err = config + .validate() + .expect_err("config validation should fail"); + assert!( + err.to_string().contains("min_languages_with_fixtures"), + "unexpected validation error: {err}" + ); + } +} diff --git a/vulnera-sast/src/infrastructure/sast_engine.rs b/vulnera-sast/src/infrastructure/sast_engine.rs index e5a7902a..2f1165fc 100755 --- a/vulnera-sast/src/infrastructure/sast_engine.rs +++ b/vulnera-sast/src/infrastructure/sast_engine.rs @@ -412,7 +412,10 @@ impl SastEngine { match inferred { Some(ty) => { - if !allowed_types.iter().any(|allowed| allowed == &ty) { + if !allowed_types + .iter() + .any(|allowed| types_compatible(allowed, &ty)) + { return false; } } @@ -964,6 +967,25 @@ fn normalize_identifier(text: &str) -> String { .to_string() } +fn normalize_type_name(raw: &str) -> String { + let trimmed = raw.trim(); + let base = trimmed + .split(['<', '[', '|', '&']) + .next() + .unwrap_or(trimmed); + base.rsplit(['.', ':']) + .next() + .unwrap_or(base) + .trim() + .to_ascii_lowercase() +} + +fn types_compatible(expected: &str, inferred: &str) -> bool { + let expected_norm = normalize_type_name(expected); + let inferred_norm = normalize_type_name(inferred); + expected_norm == inferred_norm +} + #[cfg(test)] mod tests { use super::*; diff --git a/vulnera-sast/src/infrastructure/semantic.rs b/vulnera-sast/src/infrastructure/semantic.rs index 46e4c453..d1e0ae46 100644 --- a/vulnera-sast/src/infrastructure/semantic.rs +++ b/vulnera-sast/src/infrastructure/semantic.rs @@ -39,8 +39,12 @@ fn infer_types(tree: &Tree, source_bytes: &[u8], language: Language) -> HashMap< left: (identifier) @var right: (call function: (attribute attribute: (identifier) @type)) )"#, + r#"(assignment + left: (identifier) @var + right: (identifier) @alias + )"#, ], - Language::JavaScript | Language::TypeScript => vec![ + Language::JavaScript => vec![ r#"(variable_declarator name: (identifier) @var value: (new_expression constructor: (identifier) @type) @@ -49,6 +53,44 @@ fn infer_types(tree: &Tree, source_bytes: &[u8], language: Language) -> HashMap< left: (identifier) @var right: (new_expression constructor: (identifier) @type) )"#, + r#"(variable_declarator + name: (identifier) @var + value: (identifier) @alias + )"#, + r#"(assignment_expression + left: (identifier) @var + right: (identifier) @alias + )"#, + ], + Language::TypeScript => vec![ + r#"(variable_declarator + name: (identifier) @var + type: (type_annotation (type_identifier) @type) + )"#, + r#"(required_parameter + pattern: (identifier) @var + type: (type_annotation (type_identifier) @type) + )"#, + r#"(optional_parameter + pattern: (identifier) @var + type: (type_annotation (type_identifier) @type) + )"#, + r#"(variable_declarator + name: (identifier) @var + value: (new_expression constructor: (identifier) @type) + )"#, + r#"(assignment_expression + left: (identifier) @var + right: (new_expression constructor: (identifier) @type) + )"#, + r#"(variable_declarator + name: (identifier) @var + value: (identifier) @alias + )"#, + r#"(assignment_expression + left: (identifier) @var + right: (identifier) @alias + )"#, ], _ => Vec::new(), }; @@ -59,7 +101,8 @@ fn infer_types(tree: &Tree, source_bytes: &[u8], language: Language) -> HashMap< let ts_language = match language { Language::Python => tree_sitter_python::LANGUAGE.into(), - Language::JavaScript | Language::TypeScript => tree_sitter_javascript::LANGUAGE.into(), + Language::JavaScript => tree_sitter_javascript::LANGUAGE.into(), + Language::TypeScript => tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(), Language::Go => tree_sitter_go::LANGUAGE.into(), Language::Rust => tree_sitter_rust::LANGUAGE.into(), Language::C => tree_sitter_c::LANGUAGE.into(), @@ -81,6 +124,7 @@ fn infer_types(tree: &Tree, source_bytes: &[u8], language: Language) -> HashMap< } { let mut var: Option = None; let mut ty: Option = None; + let mut alias: Option = None; for capture in m.captures { let capture_name = query.capture_names()[capture.index as usize]; @@ -93,12 +137,20 @@ fn infer_types(tree: &Tree, source_bytes: &[u8], language: Language) -> HashMap< match capture_name { "var" => var = Some(text), "type" => ty = Some(text), + "alias" => alias = Some(text), _ => {} } } - if let (Some(var), Some(ty)) = (var, ty) { - types.insert(var, ty); + if let (Some(var_name), Some(type_name)) = (var.as_ref(), ty.as_ref()) { + types.insert(var_name.clone(), type_name.clone()); + continue; + } + + if let (Some(var_name), Some(alias_name)) = (var.as_ref(), alias.as_ref()) + && let Some(alias_ty) = types.get(alias_name.as_str()).cloned() + { + types.insert(var_name.clone(), alias_ty); } } } diff --git a/vulnera-sast/tests/data/cve-fixtures/c_cpp/cpp-command-injection.yaml b/vulnera-sast/tests/data/cve-fixtures/c_cpp/cpp-command-injection.yaml new file mode 100644 index 00000000..b0ef05d2 --- /dev/null +++ b/vulnera-sast/tests/data/cve-fixtures/c_cpp/cpp-command-injection.yaml @@ -0,0 +1,30 @@ +# C++ Command Injection baseline fixture +# Ensures cpp fixture language is represented in generic per-language quality gates. + +id: "CPP-RAW-POINTER" +name: "C++ Raw Pointer Safety" +language: "cpp" +vulnerability_type: "memory_safety" +severity: "medium" +cwe: + - "CWE-401" +impact: "Raw pointers increase memory management risk" + +test_cases: + - name: "raw pointer allocation with new" + vulnerable: true + code: | + int* make_ptr() { + int* value = new int(42); + return value; + } + expected_findings: + - rule_id: "cpp-raw-ptr" + + - name: "stack allocation without raw new (safe)" + vulnerable: false + code: | + void run_safe() { + int value = 42; + (void)value; + } diff --git a/vulnera-sast/tests/test_accuracy_report.rs b/vulnera-sast/tests/test_accuracy_report.rs index 78df488f..455af9e9 100644 --- a/vulnera-sast/tests/test_accuracy_report.rs +++ b/vulnera-sast/tests/test_accuracy_report.rs @@ -139,66 +139,29 @@ async fn accuracy_report_all_fixtures() { quality_gates.min_cwe_coverage ); - if quality_gates.enforce_primary_language_gates { - let python = report - .per_language - .get("python") - .expect("CI GATE: missing Python fixtures/metrics in accuracy report"); - let python_precision = python.precision().unwrap_or(0.0); - let python_recall = python.recall().unwrap_or(0.0); - - assert!( - python_precision >= quality_gates.python_min_precision, - "CI GATE: Python precision {python_precision:.3} < {:.3}", - quality_gates.python_min_precision - ); - assert!( - python_recall >= quality_gates.python_min_recall, - "CI GATE: Python recall {python_recall:.3} < {:.3}", - quality_gates.python_min_recall - ); - - let js = report.per_language.get("javascript"); - let ts = report.per_language.get("typescript"); - - let js_ts_tp = - js.map_or(0usize, |m| m.true_positives) + ts.map_or(0usize, |m| m.true_positives); - let js_ts_fp = - js.map_or(0usize, |m| m.false_positives) + ts.map_or(0usize, |m| m.false_positives); - let js_ts_fn = - js.map_or(0usize, |m| m.false_negatives) + ts.map_or(0usize, |m| m.false_negatives); - - let js_ts_precision = if js_ts_tp + js_ts_fp == 0 { - 0.0 - } else { - js_ts_tp as f64 / (js_ts_tp + js_ts_fp) as f64 - }; - let js_ts_recall = if js_ts_tp + js_ts_fn == 0 { - 0.0 - } else { - js_ts_tp as f64 / (js_ts_tp + js_ts_fn) as f64 - }; - + if quality_gates.enforce_per_language_gates { assert!( - js_ts_tp + js_ts_fp + js_ts_fn > 0, - "CI GATE: missing JavaScript/TypeScript fixtures for primary language gate" + report.per_language.len() >= quality_gates.min_languages_with_fixtures, + "CI GATE: fixture language coverage {} < {} threshold", + report.per_language.len(), + quality_gates.min_languages_with_fixtures ); - eprintln!( - "Primary language gates: python(P={:.3},R={:.3}) js_ts(P={:.3},R={:.3})", - python_precision, python_recall, js_ts_precision, js_ts_recall - ); + for (lang, metrics) in &report.per_language { + let lang_precision = metrics.precision().unwrap_or(0.0); + let lang_recall = metrics.recall().unwrap_or(0.0); - assert!( - js_ts_precision >= quality_gates.js_ts_min_precision, - "CI GATE: JS/TS precision {js_ts_precision:.3} < {:.3}", - quality_gates.js_ts_min_precision - ); - assert!( - js_ts_recall >= quality_gates.js_ts_min_recall, - "CI GATE: JS/TS recall {js_ts_recall:.3} < {:.3}", - quality_gates.js_ts_min_recall - ); + assert!( + lang_precision >= quality_gates.per_language_min_precision, + "CI GATE: language '{lang}' precision {lang_precision:.3} < {:.3}", + quality_gates.per_language_min_precision + ); + assert!( + lang_recall >= quality_gates.per_language_min_recall, + "CI GATE: language '{lang}' recall {lang_recall:.3} < {:.3}", + quality_gates.per_language_min_recall + ); + } } // Soft check: warn (but don't fail) if any single language is below threshold @@ -216,6 +179,7 @@ async fn accuracy_report_all_fixtures() { #[tokio::test] async fn accuracy_report_has_minimum_fixture_coverage() { let fixture_paths = discover_fixtures(); + let quality_gates = SastConfig::default().quality_gates; // We expect at least fixtures for the primary languages assert!( @@ -236,8 +200,9 @@ async fn accuracy_report_has_minimum_fixture_coverage() { .collect(); assert!( - languages.len() >= 3, - "Expected fixtures covering at least 3 languages, found {}: {:?}", + languages.len() >= quality_gates.min_languages_with_fixtures, + "Expected fixtures covering at least {} languages, found {}: {:?}", + quality_gates.min_languages_with_fixtures, languages.len(), languages ); diff --git a/vulnera-sast/tests/test_semantic_rules.rs b/vulnera-sast/tests/test_semantic_rules.rs index 23793f1f..af245391 100644 --- a/vulnera-sast/tests/test_semantic_rules.rs +++ b/vulnera-sast/tests/test_semantic_rules.rs @@ -112,3 +112,112 @@ async fn test_semantic_constraints_fail_for_mismatched_type() { "semantic constraint should reject mismatched type" ); } + +#[tokio::test] +async fn test_semantic_constraints_pass_for_python_alias_type() { + let source = "client = Client()\nalias = client\n"; + let engine = SastEngine::new(); + let rule = PatternRule { + id: "semantic-python-alias-check".to_string(), + name: "Semantic alias type constraint".to_string(), + description: "Ensure $X alias resolves to Client".to_string(), + severity: Severity::High, + languages: vec![Language::Python], + pattern: Pattern::TreeSitterQuery( + "(assignment left: (identifier) @mv_X right: (identifier) @rhs)".to_string(), + ), + options: RuleOptions::default(), + cwe_ids: vec![], + owasp_categories: vec![], + tags: vec![], + message: None, + fix: None, + metavariable_constraints: vec![], + semantic: Some(SemanticRuleOptions { + required_types: [("$X".to_string(), vec!["Client".to_string()])] + .into_iter() + .collect(), + allow_unknown_types: false, + }), + }; + + let results = engine.query_batch(source, Language::Python, &[&rule]).await; + let matches = results + .into_iter() + .find(|(rule_id, _)| rule_id == &rule.id) + .map(|(_, m)| m) + .unwrap_or_default(); + + assert_eq!(matches.len(), 1); + + let tree = engine.parse(source, Language::Python).await.unwrap(); + let semantic_context = SemanticContext::from_tree(&tree, source, Language::Python); + + let allowed = engine + .metavariable_constraints_pass( + &rule, + &matches[0], + Language::Python, + Some(&semantic_context), + ) + .await; + + assert!(allowed, "semantic constraint should pass for typed alias"); +} + +#[tokio::test] +async fn test_semantic_constraints_pass_for_typescript_annotation() { + let source = "let client: Client = createClient();\n"; + let engine = SastEngine::new(); + let rule = PatternRule { + id: "semantic-typescript-annotation-check".to_string(), + name: "Semantic TS type constraint".to_string(), + description: "Ensure $X has annotated type Client".to_string(), + severity: Severity::High, + languages: vec![Language::TypeScript], + pattern: Pattern::TreeSitterQuery( + "(variable_declarator name: (identifier) @mv_X)".to_string(), + ), + options: RuleOptions::default(), + cwe_ids: vec![], + owasp_categories: vec![], + tags: vec![], + message: None, + fix: None, + metavariable_constraints: vec![], + semantic: Some(SemanticRuleOptions { + required_types: [("$X".to_string(), vec!["Client".to_string()])] + .into_iter() + .collect(), + allow_unknown_types: false, + }), + }; + + let results = engine + .query_batch(source, Language::TypeScript, &[&rule]) + .await; + let matches = results + .into_iter() + .find(|(rule_id, _)| rule_id == &rule.id) + .map(|(_, m)| m) + .unwrap_or_default(); + + assert_eq!(matches.len(), 1); + + let tree = engine.parse(source, Language::TypeScript).await.unwrap(); + let semantic_context = SemanticContext::from_tree(&tree, source, Language::TypeScript); + + let allowed = engine + .metavariable_constraints_pass( + &rule, + &matches[0], + Language::TypeScript, + Some(&semantic_context), + ) + .await; + + assert!( + allowed, + "semantic constraint should pass for TypeScript type annotation" + ); +} From 9d40866c62759d7b1638e425cce42ae0e3ec38e4 Mon Sep 17 00:00:00 2001 From: k5602 <188656344+k5602@users.noreply.github.com> Date: Sun, 15 Feb 2026 04:17:15 +0200 Subject: [PATCH 7/9] Add intelligence, sharding, and CFG utilities - Add application intelligence utilities (correlation, ranking, correlation index) using deterministic heuristics as a safe baseline for future ML ranking - Add infrastructure sharding (ShardCandidate, WorkShard, build_balanced_shards) for deterministic, balanced partitioning of files across workers/executors - Add SAST CFG foundations (ControlFlowGraph, CfgBuilder, path enumeration, path constraints) to support future path-sensitive data-flow analysis - Export intelligence module and register new infra modules; add tests and interprocedural callback propagation tests to validate taint flow through callbacks - No breaking changes; new functionality is covered by unit tests --- .../src/application/intelligence.rs | 241 +++++++++++ vulnera-orchestrator/src/application/mod.rs | 2 + .../src/infrastructure/mod.rs | 2 + .../src/infrastructure/sharding.rs | 130 ++++++ vulnera-sast/src/application/use_cases.rs | 42 ++ vulnera-sast/src/infrastructure/cfg.rs | 401 ++++++++++++++++++ vulnera-sast/src/infrastructure/mod.rs | 2 + .../tests/test_interprocedural_taint.rs | 80 ++++ 8 files changed, 900 insertions(+) create mode 100644 vulnera-orchestrator/src/application/intelligence.rs create mode 100644 vulnera-orchestrator/src/infrastructure/sharding.rs create mode 100644 vulnera-sast/src/infrastructure/cfg.rs diff --git a/vulnera-orchestrator/src/application/intelligence.rs b/vulnera-orchestrator/src/application/intelligence.rs new file mode 100644 index 00000000..04bc8a9d --- /dev/null +++ b/vulnera-orchestrator/src/application/intelligence.rs @@ -0,0 +1,241 @@ +//! Optional intelligence utilities: ranking and cross-module correlation. +//! +//! This module intentionally uses deterministic heuristics as a safe baseline +//! until ML-assisted ranking is introduced. + +use std::collections::{BTreeMap, HashSet}; + +use vulnera_core::domain::module::{ + Finding, FindingConfidence, FindingSeverity, ModuleResult, ModuleType, +}; + +/// Stable correlation key for findings that likely refer to the same issue. +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct CorrelationKey { + pub path: String, + pub line: Option, + pub rule_id: Option, +} + +/// Correlation cluster across modules. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct CorrelatedFinding { + pub key: CorrelationKey, + pub modules: Vec, + pub finding_ids: Vec, +} + +/// Ranked finding with deterministic priority score. +#[derive(Debug, Clone)] +pub struct RankedFinding { + pub score: u32, + pub finding: Finding, +} + +/// Build correlation clusters from module results. +pub fn correlate_findings(module_results: &[ModuleResult]) -> Vec { + let mut grouped: BTreeMap, Vec)> = BTreeMap::new(); + + for result in module_results { + for finding in &result.findings { + let key = CorrelationKey { + path: finding.location.path.clone(), + line: finding.location.line, + rule_id: finding.rule_id.clone(), + }; + + let entry = grouped + .entry(key) + .or_insert_with(|| (HashSet::new(), Vec::new())); + entry.0.insert(result.module_type.clone()); + entry.1.push(finding.id.clone()); + } + } + + grouped + .into_iter() + .map(|(key, (modules, finding_ids))| { + let mut modules: Vec = modules.into_iter().collect(); + modules.sort_by_key(|m| format!("{:?}", m)); + + CorrelatedFinding { + key, + modules, + finding_ids, + } + }) + .collect() +} + +/// Deterministic heuristic ranking (safe baseline for future ML ranking). +/// +/// Score model: +/// - Severity: critical=100, high=80, medium=50, low=20, info=5 +/// - Confidence: high=15, medium=8, low=3 +/// - Correlation boost: +10 per additional corroborating module (capped +30) +pub fn rank_findings( + findings: Vec, + correlation_index: &BTreeMap, +) -> Vec { + let mut ranked: Vec = findings + .into_iter() + .map(|finding| { + let base = severity_score(&finding.severity) + confidence_score(&finding.confidence); + let corroboration = correlation_index.get(&finding.id).copied().unwrap_or(1); + let boost = ((corroboration.saturating_sub(1) as u32) * 10).min(30); + + RankedFinding { + score: base + boost, + finding, + } + }) + .collect(); + + ranked.sort_by(|lhs, rhs| { + rhs.score + .cmp(&lhs.score) + .then_with(|| lhs.finding.id.cmp(&rhs.finding.id)) + }); + + ranked +} + +/// Build an index: finding_id -> number of corroborating modules. +pub fn build_correlation_index(clusters: &[CorrelatedFinding]) -> BTreeMap { + let mut index = BTreeMap::new(); + for cluster in clusters { + let corroborating_modules = cluster.modules.len(); + for finding_id in &cluster.finding_ids { + index.insert(finding_id.clone(), corroborating_modules); + } + } + index +} + +fn severity_score(severity: &FindingSeverity) -> u32 { + match severity { + FindingSeverity::Critical => 100, + FindingSeverity::High => 80, + FindingSeverity::Medium => 50, + FindingSeverity::Low => 20, + FindingSeverity::Info => 5, + } +} + +fn confidence_score(confidence: &FindingConfidence) -> u32 { + match confidence { + FindingConfidence::High => 15, + FindingConfidence::Medium => 8, + FindingConfidence::Low => 3, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use uuid::Uuid; + use vulnera_core::domain::module::{ + FindingType, Location, ModuleResultMetadata, VulnerabilityFindingMetadata, + }; + + fn mk_finding( + id: &str, + path: &str, + line: u32, + rule_id: Option<&str>, + severity: FindingSeverity, + confidence: FindingConfidence, + ) -> Finding { + Finding { + id: id.to_string(), + r#type: FindingType::Vulnerability, + rule_id: rule_id.map(ToString::to_string), + location: Location { + path: path.to_string(), + line: Some(line), + column: None, + end_line: Some(line), + end_column: None, + }, + severity, + confidence, + description: "desc".to_string(), + recommendation: None, + secret_metadata: None, + vulnerability_metadata: VulnerabilityFindingMetadata::default(), + enrichment: None, + } + } + + fn mk_result(module_type: ModuleType, findings: Vec) -> ModuleResult { + ModuleResult { + job_id: Uuid::new_v4(), + module_type, + findings, + metadata: ModuleResultMetadata::default(), + error: None, + } + } + + #[test] + fn correlation_groups_by_location_and_rule() { + let shared_a = mk_finding( + "sast-1", + "src/app.py", + 12, + Some("rule-a"), + FindingSeverity::High, + FindingConfidence::High, + ); + let shared_b = mk_finding( + "sec-1", + "src/app.py", + 12, + Some("rule-a"), + FindingSeverity::Medium, + FindingConfidence::Medium, + ); + + let module_results = vec![ + mk_result(ModuleType::SAST, vec![shared_a]), + mk_result(ModuleType::SecretDetection, vec![shared_b]), + ]; + + let clusters = correlate_findings(&module_results); + assert_eq!(clusters.len(), 1); + assert_eq!(clusters[0].modules.len(), 2); + assert_eq!(clusters[0].finding_ids.len(), 2); + } + + #[test] + fn ranking_prefers_higher_severity_and_corroboration() { + let critical = mk_finding( + "f-critical", + "src/a.py", + 10, + Some("r1"), + FindingSeverity::Critical, + FindingConfidence::Medium, + ); + let high = mk_finding( + "f-high", + "src/b.py", + 11, + Some("r2"), + FindingSeverity::High, + FindingConfidence::High, + ); + + let index = BTreeMap::from([ + ("f-critical".to_string(), 1usize), + ("f-high".to_string(), 4usize), + ]); + + let ranked = rank_findings(vec![high.clone(), critical.clone()], &index); + + // high: 80 + 15 + 30(max boost) = 125 + // critical: 100 + 8 + 0 = 108 + assert_eq!(ranked[0].finding.id, "f-high"); + assert_eq!(ranked[1].finding.id, "f-critical"); + } +} diff --git a/vulnera-orchestrator/src/application/mod.rs b/vulnera-orchestrator/src/application/mod.rs index 795f78f0..6a0ac0c5 100644 --- a/vulnera-orchestrator/src/application/mod.rs +++ b/vulnera-orchestrator/src/application/mod.rs @@ -1,7 +1,9 @@ //! Orchestrator application layer +pub mod intelligence; pub mod use_cases; pub mod workflow; +pub use intelligence::*; pub use use_cases::*; pub use workflow::JobWorkflow; diff --git a/vulnera-orchestrator/src/infrastructure/mod.rs b/vulnera-orchestrator/src/infrastructure/mod.rs index 0fc2c2ad..d7c699df 100644 --- a/vulnera-orchestrator/src/infrastructure/mod.rs +++ b/vulnera-orchestrator/src/infrastructure/mod.rs @@ -7,6 +7,7 @@ pub mod module_registry; pub mod module_selector; pub mod project_detection; pub mod s3; +pub mod sharding; pub use git::*; pub use job_queue::*; @@ -15,3 +16,4 @@ pub use module_registry::*; pub use module_selector::*; pub use project_detection::*; pub use s3::*; +pub use sharding::*; diff --git a/vulnera-orchestrator/src/infrastructure/sharding.rs b/vulnera-orchestrator/src/infrastructure/sharding.rs new file mode 100644 index 00000000..b69b1abd --- /dev/null +++ b/vulnera-orchestrator/src/infrastructure/sharding.rs @@ -0,0 +1,130 @@ +//! Sharding utilities for distributed analysis execution. +//! +//! Provides deterministic partitioning of files into balanced shards that can be +//! dispatched across worker pools or remote executors. + +use std::path::PathBuf; + +/// A file candidate with optional estimated size for balancing. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ShardCandidate { + pub path: PathBuf, + pub estimated_size_bytes: u64, +} + +impl ShardCandidate { + pub fn new(path: impl Into, estimated_size_bytes: u64) -> Self { + Self { + path: path.into(), + estimated_size_bytes, + } + } +} + +/// A deterministic shard payload. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct WorkShard { + pub id: usize, + pub total_size_bytes: u64, + pub files: Vec, +} + +/// Build approximately balanced shards using a deterministic best-fit strategy. +/// +/// - `max_shards`: maximum number of shards to generate (>0) +/// - returns empty vector when input is empty +pub fn build_balanced_shards( + mut candidates: Vec, + max_shards: usize, +) -> Vec { + if candidates.is_empty() || max_shards == 0 { + return Vec::new(); + } + + // Deterministic ordering: largest files first, then lexical path tie-breaker. + candidates.sort_by(|a, b| { + b.estimated_size_bytes + .cmp(&a.estimated_size_bytes) + .then_with(|| a.path.cmp(&b.path)) + }); + + let shard_count = max_shards.min(candidates.len()).max(1); + let mut shards: Vec = (0..shard_count) + .map(|id| WorkShard { + id, + total_size_bytes: 0, + files: Vec::new(), + }) + .collect(); + + for candidate in candidates { + // Pick lightest shard; tie-break on shard id for deterministic placement. + let target_idx = shards + .iter() + .enumerate() + .min_by(|(_, lhs), (_, rhs)| { + lhs.total_size_bytes + .cmp(&rhs.total_size_bytes) + .then_with(|| lhs.id.cmp(&rhs.id)) + }) + .map(|(idx, _)| idx) + .unwrap_or(0); + + let shard = &mut shards[target_idx]; + shard.total_size_bytes = shard + .total_size_bytes + .saturating_add(candidate.estimated_size_bytes); + shard.files.push(candidate.path); + } + + shards +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn build_balanced_shards_is_deterministic() { + let input = vec![ + ShardCandidate::new("a.py", 120), + ShardCandidate::new("b.py", 100), + ShardCandidate::new("c.py", 80), + ShardCandidate::new("d.py", 60), + ]; + + let left = build_balanced_shards(input.clone(), 2); + let right = build_balanced_shards(input, 2); + + assert_eq!(left, right); + assert_eq!(left.len(), 2); + } + + #[test] + fn build_balanced_shards_spreads_large_items() { + let input = vec![ + ShardCandidate::new("big-1.py", 1_000), + ShardCandidate::new("big-2.py", 900), + ShardCandidate::new("small-1.py", 100), + ShardCandidate::new("small-2.py", 100), + ]; + + let shards = build_balanced_shards(input, 2); + assert_eq!(shards.len(), 2); + + let delta = shards[0] + .total_size_bytes + .abs_diff(shards[1].total_size_bytes); + assert!(delta <= 200, "Expected reasonable balancing, delta={delta}"); + } + + #[test] + fn build_balanced_shards_handles_edge_inputs() { + assert!(build_balanced_shards(Vec::new(), 3).is_empty()); + + let single = build_balanced_shards(vec![ShardCandidate::new("one.rs", 10)], 8); + assert_eq!(single.len(), 1); + assert_eq!(single[0].files, vec![PathBuf::from("one.rs")]); + assert_eq!(single[0].total_size_bytes, 10); + } +} diff --git a/vulnera-sast/src/application/use_cases.rs b/vulnera-sast/src/application/use_cases.rs index 308ad6f8..1400f1e7 100644 --- a/vulnera-sast/src/application/use_cases.rs +++ b/vulnera-sast/src/application/use_cases.rs @@ -1237,6 +1237,10 @@ impl ScanProjectUseCase { target_id, argument_taints, )); + } else { + Self::propagate_unresolved_callback_call( + analyzer, range, &call, &file_str, + ); } } @@ -1860,6 +1864,44 @@ impl ScanProjectUseCase { None } + fn propagate_unresolved_callback_call( + analyzer: &mut DataFlowAnalyzer, + function_range: &FunctionRange, + call: &CallAssignment, + file: &str, + ) { + let is_callback_param = function_range + .parameters + .iter() + .any(|parameter| parameter.name == call.callee); + + if !is_callback_param { + return; + } + + let first_tainted_argument = call + .args + .iter() + .find_map(|arg| Self::resolve_taint_for_expr(analyzer, arg)); + + if let Some(state) = first_tainted_argument { + analyzer.set_taint_state( + &call.target, + state, + file, + call.line as u32 + 1, + call.column as u32, + ); + + debug!( + callback = %call.callee, + target = %call.target, + line = call.line + 1, + "Propagated taint through unresolved callback parameter call" + ); + } + } + fn extract_call_assignments( tree: &tree_sitter::Tree, source_code: &[u8], diff --git a/vulnera-sast/src/infrastructure/cfg.rs b/vulnera-sast/src/infrastructure/cfg.rs new file mode 100644 index 00000000..778bb61c --- /dev/null +++ b/vulnera-sast/src/infrastructure/cfg.rs @@ -0,0 +1,401 @@ +//! Control-Flow Graph (CFG) foundations for path-sensitive analysis. +//! +//! This module provides a lightweight, language-agnostic CFG representation +//! intended as a stable substrate for future path-sensitive data-flow lanes. + +use std::collections::{BTreeMap, BTreeSet}; + +/// A node identifier inside a CFG. +pub type CfgNodeId = usize; + +/// Basic block kind for path-sensitive reasoning. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum CfgNodeKind { + Entry, + Exit, + Statement, + Branch, + Merge, +} + +/// A single CFG node. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct CfgNode { + pub id: CfgNodeId, + pub kind: CfgNodeKind, + pub line: Option, + pub label: Option, +} + +/// Directed edge type in a CFG. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum CfgEdgeKind { + Normal, + TrueBranch, + FalseBranch, +} + +/// Edge between CFG nodes. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct CfgEdge { + pub from: CfgNodeId, + pub to: CfgNodeId, + pub kind: CfgEdgeKind, +} + +/// Constraint collected when traversing a branch edge. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct PathConstraint { + /// Branch node where this constraint originated. + pub branch_node: CfgNodeId, + /// Optional source-code line for diagnostics. + pub line: Option, + /// Human-friendly condition label (if known). + pub condition_label: Option, + /// Whether the branch was taken as true or false. + pub expected_truth: bool, +} + +/// A single path state through CFG with accumulated constraints. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct CfgPath { + pub nodes: Vec, + pub constraints: Vec, +} + +/// Immutable control-flow graph. +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub struct ControlFlowGraph { + nodes: BTreeMap, + outgoing: BTreeMap>, + incoming: BTreeMap>, + entry_id: Option, + exit_id: Option, +} + +impl ControlFlowGraph { + pub fn entry_id(&self) -> Option { + self.entry_id + } + + pub fn exit_id(&self) -> Option { + self.exit_id + } + + pub fn node(&self, id: CfgNodeId) -> Option<&CfgNode> { + self.nodes.get(&id) + } + + pub fn nodes(&self) -> impl Iterator { + self.nodes.values() + } + + pub fn outgoing(&self, id: CfgNodeId) -> &[CfgEdge] { + self.outgoing.get(&id).map(Vec::as_slice).unwrap_or(&[]) + } + + pub fn incoming(&self, id: CfgNodeId) -> &[CfgEdge] { + self.incoming.get(&id).map(Vec::as_slice).unwrap_or(&[]) + } + + /// Conservative branch count used as a quick path-sensitivity signal. + pub fn branch_count(&self) -> usize { + self.nodes + .values() + .filter(|node| node.kind == CfgNodeKind::Branch) + .count() + } + + /// Acyclic upper-bound estimate for number of path splits. + /// + /// This computes $2^b$ where $b$ is number of branch nodes, capped at 1<<20. + pub fn path_split_upper_bound(&self) -> usize { + let branches = self.branch_count().min(20) as u32; + 1usize << branches + } + + /// Enumerate execution paths from entry to exit while collecting branch constraints. + /// + /// The search is bounded by `max_paths` to prevent combinatorial explosion. + pub fn enumerate_paths(&self, max_paths: usize) -> Vec { + if max_paths == 0 { + return Vec::new(); + } + + let Some(entry) = self.entry_id else { + return Vec::new(); + }; + let Some(exit) = self.exit_id else { + return Vec::new(); + }; + + let mut collected = Vec::new(); + let mut stack = vec![CfgPath { + nodes: vec![entry], + constraints: Vec::new(), + }]; + + while let Some(path) = stack.pop() { + let Some(¤t) = path.nodes.last() else { + continue; + }; + + if current == exit { + collected.push(path); + if collected.len() >= max_paths { + break; + } + continue; + } + + for edge in self.outgoing(current).iter().rev() { + // Keep traversal loop-safe without requiring full cycle summaries yet. + if path.nodes.contains(&edge.to) { + continue; + } + + let mut next = path.clone(); + next.nodes.push(edge.to); + + if let Some(constraint) = self.constraint_from_edge(edge) { + next.constraints.push(constraint); + } + + stack.push(next); + } + } + + collected + } + + fn constraint_from_edge(&self, edge: &CfgEdge) -> Option { + let expected_truth = match edge.kind { + CfgEdgeKind::TrueBranch => Some(true), + CfgEdgeKind::FalseBranch => Some(false), + CfgEdgeKind::Normal => None, + }?; + + let source_node = self.node(edge.from)?; + if source_node.kind != CfgNodeKind::Branch { + return None; + } + + Some(PathConstraint { + branch_node: source_node.id, + line: source_node.line, + condition_label: source_node.label.clone(), + expected_truth, + }) + } +} + +/// Builder for constructing CFGs. +#[derive(Debug, Default)] +pub struct CfgBuilder { + graph: ControlFlowGraph, + next_id: CfgNodeId, +} + +impl CfgBuilder { + pub fn new() -> Self { + Self::default() + } + + pub fn add_node( + &mut self, + kind: CfgNodeKind, + line: Option, + label: Option, + ) -> CfgNodeId { + let id = self.next_id; + self.next_id = self.next_id.saturating_add(1); + + let node = CfgNode { + id, + kind, + line, + label, + }; + + if kind == CfgNodeKind::Entry { + self.graph.entry_id = Some(id); + } + if kind == CfgNodeKind::Exit { + self.graph.exit_id = Some(id); + } + + self.graph.nodes.insert(id, node); + id + } + + pub fn add_edge(&mut self, from: CfgNodeId, to: CfgNodeId, kind: CfgEdgeKind) { + let edge = CfgEdge { from, to, kind }; + self.graph + .outgoing + .entry(from) + .or_default() + .push(edge.clone()); + self.graph.incoming.entry(to).or_default().push(edge); + } + + pub fn build(self) -> ControlFlowGraph { + self.graph + } + + /// Create a simple linear CFG from statement lines. + pub fn linear_from_lines(lines: &[u32]) -> ControlFlowGraph { + let mut builder = Self::new(); + let entry = builder.add_node(CfgNodeKind::Entry, None, Some("entry".to_string())); + + let mut prev = entry; + for line in lines { + let stmt = builder.add_node(CfgNodeKind::Statement, Some(*line), None); + builder.add_edge(prev, stmt, CfgEdgeKind::Normal); + prev = stmt; + } + + let exit = builder.add_node(CfgNodeKind::Exit, None, Some("exit".to_string())); + builder.add_edge(prev, exit, CfgEdgeKind::Normal); + + builder.build() + } + + /// Validate graph connectivity from entry for basic soundness checks. + pub fn reachable_from_entry(graph: &ControlFlowGraph) -> BTreeSet { + let mut visited = BTreeSet::new(); + let Some(entry) = graph.entry_id() else { + return visited; + }; + + let mut stack = vec![entry]; + while let Some(node) = stack.pop() { + if !visited.insert(node) { + continue; + } + + for edge in graph.outgoing(node) { + stack.push(edge.to); + } + } + + visited + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn linear_cfg_has_entry_and_exit() { + let cfg = CfgBuilder::linear_from_lines(&[10, 11, 12]); + assert!(cfg.entry_id().is_some()); + assert!(cfg.exit_id().is_some()); + assert_eq!(cfg.branch_count(), 0); + assert_eq!(cfg.path_split_upper_bound(), 1); + } + + #[test] + fn branch_cfg_counts_splits() { + let mut builder = CfgBuilder::new(); + let entry = builder.add_node(CfgNodeKind::Entry, None, None); + let branch = builder.add_node(CfgNodeKind::Branch, Some(20), Some("if".to_string())); + let then_n = builder.add_node(CfgNodeKind::Statement, Some(21), None); + let else_n = builder.add_node(CfgNodeKind::Statement, Some(23), None); + let merge = builder.add_node(CfgNodeKind::Merge, Some(24), None); + let exit = builder.add_node(CfgNodeKind::Exit, None, None); + + builder.add_edge(entry, branch, CfgEdgeKind::Normal); + builder.add_edge(branch, then_n, CfgEdgeKind::TrueBranch); + builder.add_edge(branch, else_n, CfgEdgeKind::FalseBranch); + builder.add_edge(then_n, merge, CfgEdgeKind::Normal); + builder.add_edge(else_n, merge, CfgEdgeKind::Normal); + builder.add_edge(merge, exit, CfgEdgeKind::Normal); + + let cfg = builder.build(); + assert_eq!(cfg.branch_count(), 1); + assert_eq!(cfg.path_split_upper_bound(), 2); + + let reachable = CfgBuilder::reachable_from_entry(&cfg); + assert_eq!(reachable.len(), 6); + } + + #[test] + fn enumerate_paths_collects_true_false_constraints() { + let mut builder = CfgBuilder::new(); + let entry = builder.add_node(CfgNodeKind::Entry, None, None); + let branch = builder.add_node( + CfgNodeKind::Branch, + Some(40), + Some("user_is_admin".to_string()), + ); + let then_n = builder.add_node(CfgNodeKind::Statement, Some(41), None); + let else_n = builder.add_node(CfgNodeKind::Statement, Some(42), None); + let merge = builder.add_node(CfgNodeKind::Merge, Some(43), None); + let exit = builder.add_node(CfgNodeKind::Exit, None, None); + + builder.add_edge(entry, branch, CfgEdgeKind::Normal); + builder.add_edge(branch, then_n, CfgEdgeKind::TrueBranch); + builder.add_edge(branch, else_n, CfgEdgeKind::FalseBranch); + builder.add_edge(then_n, merge, CfgEdgeKind::Normal); + builder.add_edge(else_n, merge, CfgEdgeKind::Normal); + builder.add_edge(merge, exit, CfgEdgeKind::Normal); + + let cfg = builder.build(); + let paths = cfg.enumerate_paths(8); + + assert_eq!(paths.len(), 2, "Expected both true/false execution paths"); + + let truth_values: BTreeSet = paths + .iter() + .flat_map(|p| p.constraints.iter().map(|c| c.expected_truth)) + .collect(); + assert_eq!(truth_values, [false, true].into_iter().collect()); + + for path in paths { + assert_eq!(path.constraints.len(), 1); + let c = &path.constraints[0]; + assert_eq!(c.branch_node, branch); + assert_eq!(c.line, Some(40)); + assert_eq!(c.condition_label.as_deref(), Some("user_is_admin")); + } + } + + #[test] + fn enumerate_paths_respects_max_paths_bound() { + let mut builder = CfgBuilder::new(); + let entry = builder.add_node(CfgNodeKind::Entry, None, None); + let branch_a = builder.add_node(CfgNodeKind::Branch, Some(10), Some("a".to_string())); + let branch_b = builder.add_node(CfgNodeKind::Branch, Some(20), Some("b".to_string())); + let a_true = builder.add_node(CfgNodeKind::Statement, Some(11), None); + let a_false = builder.add_node(CfgNodeKind::Statement, Some(12), None); + let b_true = builder.add_node(CfgNodeKind::Statement, Some(21), None); + let b_false = builder.add_node(CfgNodeKind::Statement, Some(22), None); + let merge_a = builder.add_node(CfgNodeKind::Merge, Some(13), None); + let merge_b = builder.add_node(CfgNodeKind::Merge, Some(23), None); + let exit = builder.add_node(CfgNodeKind::Exit, None, None); + + builder.add_edge(entry, branch_a, CfgEdgeKind::Normal); + builder.add_edge(branch_a, a_true, CfgEdgeKind::TrueBranch); + builder.add_edge(branch_a, a_false, CfgEdgeKind::FalseBranch); + builder.add_edge(a_true, merge_a, CfgEdgeKind::Normal); + builder.add_edge(a_false, merge_a, CfgEdgeKind::Normal); + builder.add_edge(merge_a, branch_b, CfgEdgeKind::Normal); + builder.add_edge(branch_b, b_true, CfgEdgeKind::TrueBranch); + builder.add_edge(branch_b, b_false, CfgEdgeKind::FalseBranch); + builder.add_edge(b_true, merge_b, CfgEdgeKind::Normal); + builder.add_edge(b_false, merge_b, CfgEdgeKind::Normal); + builder.add_edge(merge_b, exit, CfgEdgeKind::Normal); + + let cfg = builder.build(); + let all_paths = cfg.enumerate_paths(16); + assert_eq!(all_paths.len(), 4, "Two branches should produce 4 paths"); + + let bounded_paths = cfg.enumerate_paths(3); + assert_eq!( + bounded_paths.len(), + 3, + "Path enumeration should honor max_paths" + ); + } +} diff --git a/vulnera-sast/src/infrastructure/mod.rs b/vulnera-sast/src/infrastructure/mod.rs index 673e1101..f85d59a9 100644 --- a/vulnera-sast/src/infrastructure/mod.rs +++ b/vulnera-sast/src/infrastructure/mod.rs @@ -11,6 +11,7 @@ pub mod ast_cache; pub mod call_graph; pub mod call_graph_queries; +pub mod cfg; pub mod data_flow; pub mod incremental; pub mod metavar_patterns; @@ -29,6 +30,7 @@ pub mod taint_queries; pub use ast_cache::*; pub use call_graph::*; +pub use cfg::*; pub use data_flow::*; pub use incremental::*; pub use oxc_frontend::*; diff --git a/vulnera-sast/tests/test_interprocedural_taint.rs b/vulnera-sast/tests/test_interprocedural_taint.rs index aec094eb..2345f454 100644 --- a/vulnera-sast/tests/test_interprocedural_taint.rs +++ b/vulnera-sast/tests/test_interprocedural_taint.rs @@ -180,6 +180,86 @@ runCommand(input); ); } +#[tokio::test] +async fn test_python_callback_parameter_propagates_to_return() { + let result = scan_files(&[( + "app.py", + r#" +import os + +def apply(callback, value): + result = callback(value) + return result + +def identity(v): + return v + +user = os.environ.get("USER_INPUT") +command = apply(identity, user) +eval(command) +"#, + )]) + .await; + + let has_eval_or_dataflow = result.findings.iter().any(|f| { + f.rule_id + .as_deref() + .map(|r| r.contains("unsafe-function-call") || r.contains("data-flow")) + .unwrap_or(false) + }); + + assert!( + has_eval_or_dataflow, + "Expected callback parameter taint to flow through apply() return into eval(). Findings: {:?}", + result + .findings + .iter() + .map(|f| f.rule_id.as_deref().unwrap_or("(none)")) + .collect::>() + ); +} + +#[tokio::test] +async fn test_js_callback_parameter_propagates_to_return() { + let result = scan_files(&[( + "app.js", + r#" +const child_process = require('child_process'); + +function apply(callback, value) { + const result = callback(value); + return result; +} + +function passthrough(v) { + return v; +} + +const user = process.env.USER_INPUT; +const cmd = apply(passthrough, user); +child_process.exec(cmd); +"#, + )]) + .await; + + let has_command_or_dataflow = result.findings.iter().any(|f| { + f.rule_id + .as_deref() + .map(|r| r.contains("child-process") || r.contains("data-flow")) + .unwrap_or(false) + }); + + assert!( + has_command_or_dataflow, + "Expected callback parameter taint to flow through apply() return into child_process.exec(). Findings: {:?}", + result + .findings + .iter() + .map(|f| f.rule_id.as_deref().unwrap_or("(none)")) + .collect::>() + ); +} + // ========================================================================= // Call graph construction: multi-file // ========================================================================= From ce7b982e79c929fa0ce3c6f190d49a4a9979ccbe Mon Sep 17 00:00:00 2001 From: k5602 <188656344+k5602@users.noreply.github.com> Date: Sun, 15 Feb 2026 04:50:04 +0200 Subject: [PATCH 8/9] refactor(sast): dataflow to semantic path - Rename VulnerabilityDataFlowPath/Node -> VulnerabilitySemanticPath/Node and vulnerability metadata field data_flow_path -> semantic_path in vulnera-core to better represent taint evidence as a semantic trace - Update vulnera-sast domain and infra: DataFlowPath/Node -> SemanticPath/Node; build and propagate semantic_path in SAST engine, query engine, match-to-finding, SARIF export, and tests; adjust severity escalation to check semantic_path - Change SastModule builder to keep AnalysisConfig and AST cache, call execute_scan_with_depth_override, and map semantic_path into module results - In vulnera-orchestrator, inject "sast.analysis_depth" into module config based on job.analysis_depth via sast_analysis_depth_for_job and add unit tests for the mapping BREAKING CHANGE: Public types and field names changed (VulnerabilityDataFlowPath / VulnerabilityDataFlowNode, DataFlowPath / DataFlowNode, and data_flow_path -> semantic_path). Update any external consumers/implementations to the new names and the new module config key "sast.analysis_depth" if relying on SAST depth behavior. --- vulnera-core/src/domain/module/entities.rs | 18 +-- .../src/application/use_cases.rs | 36 ++++- vulnera-sast/src/application/use_cases.rs | 14 +- vulnera-sast/src/domain/finding.rs | 18 +-- .../src/infrastructure/query_engine.rs | 2 +- vulnera-sast/src/infrastructure/sarif.rs | 26 ++-- .../src/infrastructure/sast_engine.rs | 14 +- vulnera-sast/src/module.rs | 140 ++++++++++++++++-- 8 files changed, 206 insertions(+), 62 deletions(-) diff --git a/vulnera-core/src/domain/module/entities.rs b/vulnera-core/src/domain/module/entities.rs index 15a42465..34e968a8 100644 --- a/vulnera-core/src/domain/module/entities.rs +++ b/vulnera-core/src/domain/module/entities.rs @@ -86,24 +86,24 @@ pub struct VulnerabilityFindingMetadata { pub snippet: Option, /// Rule metavariable bindings captured during matching pub bindings: Option>, - /// Optional data-flow trace for taint/dataflow findings - pub data_flow_path: Option, + /// Optional semantic trace for taint/dataflow findings + pub semantic_path: Option, } -/// Data flow path showing source-to-sink propagation +/// Semantic path showing source-to-sink propagation #[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] -pub struct VulnerabilityDataFlowPath { +pub struct VulnerabilitySemanticPath { /// Source location where taint originated - pub source: VulnerabilityDataFlowNode, + pub source: VulnerabilitySemanticNode, /// Intermediate propagation steps - pub steps: Vec, + pub steps: Vec, /// Sink location where taint is consumed - pub sink: VulnerabilityDataFlowNode, + pub sink: VulnerabilitySemanticNode, } -/// Data flow node metadata for vulnerability traces +/// Semantic node metadata for vulnerability traces #[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] -pub struct VulnerabilityDataFlowNode { +pub struct VulnerabilitySemanticNode { /// Location in source code pub location: Location, /// Description of the node operation diff --git a/vulnera-orchestrator/src/application/use_cases.rs b/vulnera-orchestrator/src/application/use_cases.rs index 427c84b4..3927e914 100644 --- a/vulnera-orchestrator/src/application/use_cases.rs +++ b/vulnera-orchestrator/src/application/use_cases.rs @@ -196,7 +196,7 @@ impl ExecuteAnalysisJobUseCase { for module_type in &job.modules_to_run { if let Some(module) = self.module_registry.get_module(module_type) { // Prepare module-specific configuration from project metadata - let config_map = match module.prepare_config(project).await { + let mut config_map = match module.prepare_config(project).await { Ok(map) => map, Err(e) => { warn!( @@ -209,6 +209,15 @@ impl ExecuteAnalysisJobUseCase { } }; + if *module_type == ModuleType::SAST { + config_map.insert( + "sast.analysis_depth".to_string(), + serde_json::Value::String( + sast_analysis_depth_for_job(&job.analysis_depth).to_string(), + ), + ); + } + let config = ModuleConfig { job_id: job.job_id, project_id: job.project_id.clone(), @@ -440,6 +449,13 @@ impl ExecuteAnalysisJobUseCase { } } +fn sast_analysis_depth_for_job(depth: &AnalysisDepth) -> &'static str { + match depth { + AnalysisDepth::Full => "deep", + AnalysisDepth::FastScan | AnalysisDepth::DependenciesOnly => "quick", + } +} + fn module_policy_profile( module_type: &ModuleType, sandbox_config: &SandboxConfig, @@ -716,3 +732,21 @@ impl Default for AggregateResultsUseCase { Self::new() } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_sast_analysis_depth_for_job_mapping() { + assert_eq!(sast_analysis_depth_for_job(&AnalysisDepth::Full), "deep"); + assert_eq!( + sast_analysis_depth_for_job(&AnalysisDepth::FastScan), + "quick" + ); + assert_eq!( + sast_analysis_depth_for_job(&AnalysisDepth::DependenciesOnly), + "quick" + ); + } +} diff --git a/vulnera-sast/src/application/use_cases.rs b/vulnera-sast/src/application/use_cases.rs index 1400f1e7..330bc450 100644 --- a/vulnera-sast/src/application/use_cases.rs +++ b/vulnera-sast/src/application/use_cases.rs @@ -20,7 +20,7 @@ use vulnera_core::config::{AnalysisDepth, SastConfig}; use crate::domain::call_graph::ParameterInfo; use crate::domain::finding::{ - DataFlowFinding, DataFlowNode, DataFlowPath, Finding as SastFinding, Location, Severity, + DataFlowFinding, Finding as SastFinding, Location, SemanticNode, SemanticPath, Severity, }; use crate::domain::pattern_types::PatternRule; use crate::domain::suppression::FileSuppressions; @@ -1552,7 +1552,7 @@ impl ScanProjectUseCase { let language_tag = language.to_string().to_lowercase(); let severity = Self::data_flow_severity_for_category(&sink.category); // Build the finding with data flow path - let source_node = DataFlowNode { + let source_node = SemanticNode { location: Location { file_path: data_flow_finding.source.file.clone(), line: data_flow_finding.source.line, @@ -1568,7 +1568,7 @@ impl ScanProjectUseCase { expression: data_flow_finding.source.expression.clone(), }; - let sink_node = DataFlowNode { + let sink_node = SemanticNode { location: Location { file_path: data_flow_finding.sink.file.clone(), line: data_flow_finding.sink.line, @@ -1584,10 +1584,10 @@ impl ScanProjectUseCase { expression: data_flow_finding.sink.expression.clone(), }; - let steps: Vec = data_flow_finding + let steps: Vec = data_flow_finding .intermediate_steps .iter() - .map(|step| DataFlowNode { + .map(|step| SemanticNode { location: Location { file_path: step.file.clone(), line: step.line, @@ -1625,7 +1625,7 @@ impl ScanProjectUseCase { Consider using appropriate escaping for {} context.", sink.pattern_name, sink.category )), - data_flow_path: Some(DataFlowPath { + semantic_path: Some(SemanticPath { source: source_node, sink: sink_node, steps, @@ -1661,7 +1661,7 @@ impl ScanProjectUseCase { /// Adjust severity for findings confirmed by data flow analysis fn adjust_severity_for_data_flow(findings: &mut [SastFinding]) { for finding in findings.iter_mut() { - if finding.data_flow_path.is_some() { + if finding.semantic_path.is_some() { // Escalate severity when data flow confirms the vulnerability match finding.severity { Severity::Low => finding.severity = Severity::Medium, diff --git a/vulnera-sast/src/domain/finding.rs b/vulnera-sast/src/domain/finding.rs index 8c4252f4..850ecc70 100644 --- a/vulnera-sast/src/domain/finding.rs +++ b/vulnera-sast/src/domain/finding.rs @@ -16,9 +16,9 @@ pub struct Finding { pub confidence: Confidence, pub description: String, pub recommendation: Option, - /// Data flow path if this is a taint finding + /// Semantic path if this finding includes taint/dataflow evidence #[serde(default)] - pub data_flow_path: Option, + pub semantic_path: Option, /// Code snippet at the finding location #[serde(default)] pub snippet: Option, @@ -83,20 +83,20 @@ impl std::fmt::Display for Severity { } } -/// Data flow path showing how taint propagates +/// Semantic path showing source-to-sink evidence. #[derive(Debug, Clone, Serialize, Deserialize)] -pub struct DataFlowPath { +pub struct SemanticPath { /// Source location where taint originated - pub source: DataFlowNode, + pub source: SemanticNode, /// Intermediate steps in the flow - pub steps: Vec, + pub steps: Vec, /// Sink location where taint is consumed - pub sink: DataFlowNode, + pub sink: SemanticNode, } -/// A node in the data flow path +/// A node in a semantic path #[derive(Debug, Clone, Serialize, Deserialize)] -pub struct DataFlowNode { +pub struct SemanticNode { /// Location in source code pub location: Location, /// Description of what happens at this node diff --git a/vulnera-sast/src/infrastructure/query_engine.rs b/vulnera-sast/src/infrastructure/query_engine.rs index 09f6eb1b..04f57096 100644 --- a/vulnera-sast/src/infrastructure/query_engine.rs +++ b/vulnera-sast/src/infrastructure/query_engine.rs @@ -392,7 +392,7 @@ pub fn match_to_finding( confidence: calculate_confidence(rule, match_result), description: format_description(rule, match_result, &snippet), recommendation, - data_flow_path: None, + semantic_path: None, snippet: Some(snippet), bindings, } diff --git a/vulnera-sast/src/infrastructure/sarif.rs b/vulnera-sast/src/infrastructure/sarif.rs index 4829ede1..5861d74d 100644 --- a/vulnera-sast/src/infrastructure/sarif.rs +++ b/vulnera-sast/src/infrastructure/sarif.rs @@ -208,27 +208,27 @@ impl SarifExporter { .map(|r| SarifLevel::from(&r.severity)) .unwrap_or(SarifLevel::from(&finding.severity)); - // Build code flows from data flow path if available - let code_flows = finding.data_flow_path.as_ref().map(|path| { + // Build code flows from semantic path if available + let code_flows = finding.semantic_path.as_ref().map(|path| { vec![SarifCodeFlow { thread_flows: vec![SarifThreadFlow { - locations: path - .steps - .iter() - .map(|step| SarifThreadFlowLocation { + locations: std::iter::once(&path.source) + .chain(path.steps.iter()) + .chain(std::iter::once(&path.sink)) + .map(|node| SarifThreadFlowLocation { location: SarifLocation { physical_location: SarifPhysicalLocation { artifact_location: SarifArtifactLocation { - uri: step.location.file_path.clone(), + uri: node.location.file_path.clone(), uri_base_id: self.config.uri_base_id.clone(), }, region: Some(SarifRegion { - start_line: step.location.line, - start_column: step.location.column, - end_line: step.location.end_line, - end_column: step.location.end_column, + start_line: node.location.line, + start_column: node.location.column, + end_line: node.location.end_line, + end_column: node.location.end_column, snippet: Some(SarifSnippet { - text: step.expression.clone(), + text: node.expression.clone(), }), }), }, @@ -348,7 +348,7 @@ mod tests { description: "SQL injection vulnerability.\n\nMatched code:\ncursor.execute(query)" .to_string(), recommendation: Some("Use parameterized queries".to_string()), - data_flow_path: None, + semantic_path: None, snippet: None, bindings: None, } diff --git a/vulnera-sast/src/infrastructure/sast_engine.rs b/vulnera-sast/src/infrastructure/sast_engine.rs index 2f1165fc..60c7556d 100755 --- a/vulnera-sast/src/infrastructure/sast_engine.rs +++ b/vulnera-sast/src/infrastructure/sast_engine.rs @@ -27,7 +27,7 @@ use tracing::{debug, instrument, trace}; use tree_sitter::{Query, QueryPredicateArg, Tree}; use crate::domain::finding::{ - DataFlowFinding, DataFlowNode, DataFlowPath, Finding, Location, Severity, + DataFlowFinding, Finding, Location, SemanticNode, SemanticPath, Severity, }; use crate::domain::pattern_types::{Pattern, PatternRule}; use crate::domain::value_objects::{Confidence, Language}; @@ -581,8 +581,8 @@ impl SastEngine { /// Convert a data flow finding to a regular Finding fn dataflow_to_finding(&self, df: &DataFlowFinding, file_path: &str) -> Finding { // Build data flow path from the data flow finding - let data_flow_path = Some(DataFlowPath { - source: DataFlowNode { + let semantic_path = Some(SemanticPath { + source: SemanticNode { location: Location { file_path: df.source.file.clone(), line: df.source.line, @@ -596,7 +596,7 @@ impl SastEngine { steps: df .intermediate_steps .iter() - .map(|step| DataFlowNode { + .map(|step| SemanticNode { location: Location { file_path: step.file.clone(), line: step.line, @@ -608,7 +608,7 @@ impl SastEngine { expression: step.expression.clone(), }) .collect(), - sink: DataFlowNode { + sink: SemanticNode { location: Location { file_path: df.sink.file.clone(), line: df.sink.line, @@ -638,7 +638,7 @@ impl SastEngine { df.sink.note.clone().unwrap_or_default() ), recommendation: Some("Sanitize input before using in sensitive operations".to_string()), - data_flow_path, + semantic_path, snippet: Some(df.sink.expression.clone()), bindings: None, } @@ -687,7 +687,7 @@ impl SastEngine { confidence: Confidence::High, description, recommendation, - data_flow_path: None, + semantic_path: None, snippet: Some(snippet), bindings, } diff --git a/vulnera-sast/src/module.rs b/vulnera-sast/src/module.rs index 963a2093..55d3d35e 100644 --- a/vulnera-sast/src/module.rs +++ b/vulnera-sast/src/module.rs @@ -4,11 +4,11 @@ use async_trait::async_trait; use std::path::Path; use std::sync::Arc; -use vulnera_core::config::SastConfig; +use vulnera_core::config::{AnalysisDepth as SastAnalysisDepth, SastConfig}; use vulnera_core::domain::module::{ AnalysisModule, Finding, FindingConfidence, FindingSeverity, FindingType, Location, ModuleConfig, ModuleExecutionError, ModuleResult, ModuleResultMetadata, ModuleType, - VulnerabilityDataFlowNode, VulnerabilityDataFlowPath, VulnerabilityFindingMetadata, + VulnerabilityFindingMetadata, VulnerabilitySemanticNode, VulnerabilitySemanticPath, }; use crate::application::use_cases::{AnalysisConfig, ScanProjectUseCase}; @@ -25,6 +25,8 @@ use crate::infrastructure::ast_cache::AstCacheService; pub struct SastModule { use_case: Arc, sast_config: SastConfig, + analysis_config: AnalysisConfig, + ast_cache: Option>, } /// Builder for [`SastModule`]. @@ -78,15 +80,14 @@ impl SastModuleBuilder { } = self; let sast_config = sast_config.unwrap_or_default(); + let analysis_config = analysis_config.unwrap_or_else(|| AnalysisConfig::from(&sast_config)); + let ast_cache_for_use_case = ast_cache.clone(); let use_case = if let Some(uc) = use_case_override { uc } else { - let analysis_cfg = - analysis_config.unwrap_or_else(|| AnalysisConfig::from(&sast_config)); - - let uc = ScanProjectUseCase::with_config(&sast_config, analysis_cfg); - let uc = if let Some(cache) = ast_cache { + let uc = ScanProjectUseCase::with_config(&sast_config, analysis_config.clone()); + let uc = if let Some(cache) = ast_cache_for_use_case { uc.with_ast_cache(cache) } else { uc @@ -97,6 +98,8 @@ impl SastModuleBuilder { SastModule { use_case, sast_config, + analysis_config, + ast_cache, } } } @@ -143,10 +146,8 @@ impl AnalysisModule for SastModule { ))); } - // Execute scan let scan_result = self - .use_case - .execute(source_path) + .execute_scan_with_depth_override(config, source_path) .await .map_err(|e| ModuleExecutionError::ExecutionFailed(e.to_string()))?; @@ -187,7 +188,7 @@ impl AnalysisModule for SastModule { if policy.require_data_flow_evidence_for_dataflow && is_data_flow_rule(&f.rule_id) - && f.data_flow_path.is_none() + && f.semantic_path.is_none() { filtered_by_dataflow += 1; return false; @@ -227,8 +228,8 @@ impl AnalysisModule for SastModule { vulnerability_metadata: VulnerabilityFindingMetadata { snippet: f.snippet, bindings: f.bindings, - data_flow_path: f.data_flow_path.map(|path| VulnerabilityDataFlowPath { - source: VulnerabilityDataFlowNode { + semantic_path: f.semantic_path.map(|path| VulnerabilitySemanticPath { + source: VulnerabilitySemanticNode { location: Location { path: path.source.location.file_path, line: Some(path.source.location.line), @@ -242,7 +243,7 @@ impl AnalysisModule for SastModule { steps: path .steps .into_iter() - .map(|step| VulnerabilityDataFlowNode { + .map(|step| VulnerabilitySemanticNode { location: Location { path: step.location.file_path, line: Some(step.location.line), @@ -254,7 +255,7 @@ impl AnalysisModule for SastModule { expression: step.expression, }) .collect(), - sink: VulnerabilityDataFlowNode { + sink: VulnerabilitySemanticNode { location: Location { path: path.sink.location.file_path, line: Some(path.sink.location.line), @@ -321,6 +322,33 @@ impl AnalysisModule for SastModule { } } +impl SastModule { + async fn execute_scan_with_depth_override( + &self, + module_config: &ModuleConfig, + source_path: &Path, + ) -> Result + { + let Some(depth_override) = parse_analysis_depth_override(module_config) + .map_err(|e| crate::application::use_cases::ScanError::Config(e.to_string()))? + else { + return self.use_case.execute(source_path).await; + }; + + let mut effective_analysis = self.analysis_config.clone(); + effective_analysis.analysis_depth = depth_override; + + let use_case = if let Some(cache) = &self.ast_cache { + ScanProjectUseCase::with_config(&self.sast_config, effective_analysis) + .with_ast_cache(Arc::clone(cache)) + } else { + ScanProjectUseCase::with_config(&self.sast_config, effective_analysis) + }; + + use_case.execute(source_path).await + } +} + #[derive(Debug, Clone)] struct SastFindingPolicy { min_severity: SastSeverity, @@ -441,8 +469,90 @@ fn is_data_flow_rule(rule_id: &str) -> bool { rule_id.starts_with("data-flow-") || rule_id.contains("dataflow") || rule_id.contains("taint") } +fn parse_analysis_depth_override( + module_config: &ModuleConfig, +) -> Result, ModuleExecutionError> { + let Some(raw) = module_config + .config + .get("sast.analysis_depth") + .and_then(serde_json::Value::as_str) + else { + return Ok(None); + }; + + let parsed = match raw.trim().to_ascii_lowercase().as_str() { + "deep" => SastAnalysisDepth::Deep, + "standard" => SastAnalysisDepth::Standard, + "quick" => SastAnalysisDepth::Quick, + other => { + return Err(ModuleExecutionError::InvalidConfig(format!( + "Invalid SAST analysis depth override: {other}" + ))); + } + }; + + Ok(Some(parsed)) +} + impl Default for SastModule { fn default() -> Self { Self::new() } } + +#[cfg(test)] +mod tests { + use super::*; + use std::collections::HashMap; + + fn module_config_with_depth(value: Option<&str>) -> ModuleConfig { + let mut config = HashMap::new(); + if let Some(v) = value { + config.insert( + "sast.analysis_depth".to_string(), + serde_json::Value::String(v.to_string()), + ); + } + + ModuleConfig { + job_id: uuid::Uuid::new_v4(), + project_id: "test-project".to_string(), + source_uri: ".".to_string(), + config, + } + } + + #[test] + fn parse_analysis_depth_override_accepts_known_values() { + let deep = parse_analysis_depth_override(&module_config_with_depth(Some("deep"))) + .expect("deep should parse"); + let standard = parse_analysis_depth_override(&module_config_with_depth(Some("standard"))) + .expect("standard should parse"); + let quick = parse_analysis_depth_override(&module_config_with_depth(Some("quick"))) + .expect("quick should parse"); + + assert_eq!(deep, Some(SastAnalysisDepth::Deep)); + assert_eq!(standard, Some(SastAnalysisDepth::Standard)); + assert_eq!(quick, Some(SastAnalysisDepth::Quick)); + } + + #[test] + fn parse_analysis_depth_override_absent_returns_none() { + let parsed = parse_analysis_depth_override(&module_config_with_depth(None)) + .expect("absence should not fail"); + assert_eq!(parsed, None); + } + + #[test] + fn parse_analysis_depth_override_rejects_invalid_value() { + let err = parse_analysis_depth_override(&module_config_with_depth(Some("ultra"))) + .expect_err("invalid value should fail"); + + match err { + ModuleExecutionError::InvalidConfig(message) => { + assert!(message.contains("Invalid SAST analysis depth override")); + } + other => panic!("unexpected error variant: {other}"), + } + } +} From c1406fb8e018ffd4690d9867fa524e8f4be925af Mon Sep 17 00:00:00 2001 From: Zero <188656344+k5602@users.noreply.github.com> Date: Tue, 17 Feb 2026 06:05:34 +0200 Subject: [PATCH 9/9] chore(license): Relicense project under AGPL-3.0-or-later Replace BUSL-1.1 with the GNU Affero General Public License v3 in Cargo.toml and commit the full AGPL-3.0-or-later LICENSE text. Add robust LLM response parsing utilities: - New vulnera-llm::infrastructure::response_parser with JSON extraction from fenced code blocks, any fenced code, or first JSON value. - Unit tests for parsing strategies. Wire parser and related changes into the LLM module: - Import ResponseParser in enrich/findings and code-fix use cases. - Export response_parser in vulnera-llm::infrastructure mod. LLM provider and test updates: - Make provider timeouts configurable via timeout_seconds local var. - Add ResilienceConfig wrapper and ResilientProvider wiring in registry. - Update mock provider, tests and domain types to CompletionRequest / CompletionResponse / StreamChunk shapes and streaming helpers. - Adjust integration tests to exercise GoogleAIProvider semantics. Keep changes scoped to licensing and LLM module refactor/adaptations; ensure new parser and tests improve robustness of model output handling. --- Cargo.toml | 2 +- LICENSE | 700 ++++++++++++++++-- README.md | 22 - vulnera-api/src/domain/value_objects.rs | 2 +- .../application/use_cases/enrich_findings.rs | 71 +- .../use_cases/generate_code_fix.rs | 19 +- vulnera-llm/src/infrastructure/mod.rs | 1 + .../src/infrastructure/providers/google_ai.rs | 15 +- .../src/infrastructure/providers/openai.rs | 18 +- vulnera-llm/src/infrastructure/registry.rs | 40 +- .../src/infrastructure/response_parser.rs | 187 +++++ vulnera-llm/tests/common/mod.rs | 166 +++-- .../tests/integration/test_gemini_provider.rs | 282 +++---- .../test_streaming_chunk_parsing.rs | 133 ---- .../tests/unit/test_explain_vulnerability.rs | 31 +- .../tests/unit/test_generate_code_fix.rs | 7 +- .../tests/unit/test_natural_language_query.rs | 56 +- 17 files changed, 1186 insertions(+), 566 deletions(-) create mode 100644 vulnera-llm/src/infrastructure/response_parser.rs delete mode 100644 vulnera-llm/tests/integration/test_streaming_chunk_parsing.rs diff --git a/Cargo.toml b/Cargo.toml index 3be78ece..59c6b82a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -104,7 +104,7 @@ version = "0.5.1" edition = "2024" rust-version = "1.91" authors = ["Vulnera-Team"] -license = "BUSL-1.1" +license = "AGPL-3.0-or-later" license-file = "LICENSE" repository = "https://github.com/k5602/vulnera" keywords = ["security", "vulnerability", "analysis", "dependencies"] diff --git a/LICENSE b/LICENSE index 73863491..be3f7b28 100644 --- a/LICENSE +++ b/LICENSE @@ -1,77 +1,661 @@ -Business Source License 1.1 + GNU AFFERO GENERAL PUBLIC LICENSE + Version 3, 19 November 2007 -Licensor: Vulnera Labs, Inc. -Licensed Work: Vulnera (all code in this repository except where otherwise noted) -Additional Use Grant: You may use, copy, modify, and redistribute the Licensed Work for non-commercial purposes only. Non-commercial purposes include use by individuals and non-commercial organizations. Any commercial use, including use by for-profit entities in production or for providing services to third parties, requires a commercial license from Vulnera Labs, Inc. -Change Date: 2029-02-11 -Change License: GPL-3.0-or-later + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. -Terms + Preamble -The Licensor hereby grants you the right to copy, modify, create derivative works, -redistribute, and make non-production use of the Licensed Work. The Licensor may -make an Additional Use Grant, above, permitting limited production use. + The GNU Affero General Public License is a free, copyleft license for +software and other kinds of works, specifically designed to ensure +cooperation with the community in the case of network server software. -Effective on the Change Date, or the fourth anniversary of the first publicly -available distribution of a specific version of the Licensed Work under this -License, whichever comes first, the Licensor hereby grants you rights under the -terms of the Change License, and the rights granted in the paragraph above -terminate. + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +our General Public Licenses are intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. -If your use of the Licensed Work does not comply with the requirements currently -in effect as described in this License, you must purchase a commercial license -from the Licensor, its affiliated entities, or authorized resellers, or you must -refrain from using the Licensed Work. + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. -All copies of the original and modified Licensed Work, and derivative works of -the Licensed Work, are subject to this License. This License applies separately -for each version of the Licensed Work and the Change Date may vary for each -version of the Licensed Work released by Licensor. + Developers that use our General Public Licenses protect your rights +with two steps: (1) assert copyright on the software, and (2) offer +you this License which gives you legal permission to copy, distribute +and/or modify the software. -You must conspicuously display this License on each original or modified copy of -the Licensed Work. If you receive the Licensed Work in original or modified form -from a third party, the terms and conditions set forth in this License apply to -your use of that work. + A secondary benefit of defending all users' freedom is that +improvements made in alternate versions of the program, if they +receive widespread use, become available for other developers to +incorporate. Many developers of free software are heartened and +encouraged by the resulting cooperation. However, in the case of +software used on network servers, this result may fail to come about. +The GNU General Public License permits making a modified version and +letting the public access it on a server without ever releasing its +source code to the public. -Any use of the Licensed Work in violation of this License will automatically -terminate your rights under this License for the current and all other versions -of the Licensed Work. + The GNU Affero General Public License is designed specifically to +ensure that, in such cases, the modified source code becomes available +to the community. It requires the operator of a network server to +provide the source code of the modified version running there to the +users of that server. Therefore, public use of a modified version, on +a publicly accessible server, gives the public access to the source +code of the modified version. -This License does not grant you any right in any trademark or logo of Licensor or -its affiliates (provided that you may use a trademark or logo of Licensor as -expressly required by this License). + An older license, called the Affero General Public License and +published by Affero, was designed to accomplish similar goals. This is +a different license, not a version of the Affero GPL, but Affero has +released a new version of the Affero GPL which permits relicensing under +this license. -TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON AN -“AS IS” BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS, EXPRESS -OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND TITLE. + The precise terms and conditions for copying, distribution and +modification follow. -MariaDB hereby grants you permission to use this License’s text to license your -works, and to refer to it using the trademark “Business Source License”, as long -as you comply with the Covenants of Licensor below. + TERMS AND CONDITIONS -Covenants of Licensor + 0. Definitions. -In consideration of the right to use this License’s text and the “Business Source -License” name and trademark, Licensor covenants to MariaDB, and to all other -recipients of the licensed work to be provided by Licensor: + "This License" refers to version 3 of the GNU Affero General Public License. -1. To specify as the Change License the GPL Version 2.0 or any later version, or - a license that is compatible with GPL Version 2.0 or a later version, where - “compatible” means that software provided under the Change License can be - included in a program with software provided under GPL Version 2.0 or a later - version. Licensor may specify additional Change Licenses without limitation. + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. -2. To either: (a) specify an additional grant of rights to use that does not - impose any additional restriction on the right granted in this License, as the - Additional Use Grant; or (b) insert the text “None” to specify a Change Date. + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. -3. Not to modify this License in any other way. + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. -Notice + A "covered work" means either the unmodified Program or a work based +on the Program. -The Business Source License (this document, or the “License”) is not an Open -Source license. However, the Licensed Work will eventually be made available -under an Open Source License, as stated in this License. + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. -Copyright © 2026 Vulnera Labs, Inc. + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Remote Network Interaction; Use with the GNU General Public License. + + Notwithstanding any other provision of this License, if you modify the +Program, your modified version must prominently offer all users +interacting with it remotely through a computer network (if your version +supports such interaction) an opportunity to receive the Corresponding +Source of your version by providing access to the Corresponding Source +from a network server at no charge, through some standard or customary +means of facilitating copying of software. This Corresponding Source +shall include the Corresponding Source for any work covered by version 3 +of the GNU General Public License that is incorporated pursuant to the +following paragraph. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the work with which it is combined will remain governed by version +3 of the GNU General Public License. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU Affero General Public License from time to time. Such new versions +will be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU Affero General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU Affero General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU Affero General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If your software can interact with users remotely through a computer +network, you should also make sure that it provides a way for users to +get its source. For example, if your program is a web application, its +interface could display a "Source" link that leads users to an archive +of the code. There are many ways you could offer source, and different +solutions will be better for different programs; see section 13 for the +specific requirements. + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU AGPL, see +. diff --git a/README.md b/README.md index 9035ffd2..1f63e9ca 100644 --- a/README.md +++ b/README.md @@ -18,30 +18,9 @@ _Multi-ecosystem vulnerability analysis with dependency scanning, SAST, secrets --- -## 🌐 Web Dashboard & Team Collaboration - -**[Vulnera Studio](https://vulnera.studio)** — Central web platform for team collaboration, security insights, and reporting: - -| Feature | Description | -| --------------------------- | ---------------------------------------------------------------------------------- | -| **Organization Management** | Create teams, manage members with role-based access (Owner, Admin, Member, Viewer) | -| **Shared Quota Pools** | Team members share token allocations; no per-user limits | -| **API Key Management** | Generate and rotate keys for CLI and CI/CD integration | -| **Integrations** | Connect GitHub, GitLab, Slack, webhooks for automated workflows | -| **Team Analytics** | Track usage by member, module, and project; export reports | -| **Compliance Reports** | Generate SOC2, ISO27001, GDPR, HIPAA-aligned reports | -| **Audit Logs** | Full history of member actions, configuration changes, and scans | -| **Webhooks** | Real-time notifications for scan events and findings | - -**Get started:** Visit [vulnera.studio](https://vulnera.studio) → Sign up → Create organization → Invite team. - ---- - ## Architecture - - | [Quick Start](https://k5602.github.io/Vulnera/getting-started/quick-start.html) | Installation and first scan | | [CLI Reference](https://k5602.github.io/Vulnera/guide/cli-reference.html) | Command-line usage | | [Configuration](https://k5602.github.io/Vulnera/guide/configuration.html) | Environment variables and TOML config | @@ -119,7 +98,6 @@ cargo run **Verify:** `curl http://localhost:3000/health` • **API Docs:** - --- ## ⚙️ Configuration diff --git a/vulnera-api/src/domain/value_objects.rs b/vulnera-api/src/domain/value_objects.rs index 4bdd135d..3eb9ea6d 100644 --- a/vulnera-api/src/domain/value_objects.rs +++ b/vulnera-api/src/domain/value_objects.rs @@ -46,7 +46,7 @@ pub enum ApiVulnerabilityType { MissingErrorHandling, InformationDisclosure, MissingPagination, - ResourceExhaustion, // NEW: No pagination/limits + ResourceExhaustion, // NEW: No pagination/limits ,the only new that not tested. // OAuth/OIDC InsecureOAuthFlow, diff --git a/vulnera-llm/src/application/use_cases/enrich_findings.rs b/vulnera-llm/src/application/use_cases/enrich_findings.rs index c3cd5314..f18b0f02 100644 --- a/vulnera-llm/src/application/use_cases/enrich_findings.rs +++ b/vulnera-llm/src/application/use_cases/enrich_findings.rs @@ -2,6 +2,7 @@ use crate::domain::{CompletionRequest, LlmProvider}; use crate::infrastructure::prompts::PromptBuilder; +use crate::infrastructure::response_parser::ResponseParser; use anyhow::Result; use futures::stream::{self, StreamExt}; use std::sync::Arc; @@ -173,62 +174,38 @@ impl EnrichFindingsUseCase { /// Parse LLM response into FindingEnrichment fn parse_enrichment_response(content: &str) -> Result { - // Try to parse as JSON first - if let Ok(parsed) = serde_json::from_str::(content) { - return Ok(FindingEnrichment { + match ResponseParser::parse_json::(content) { + Ok(parsed) => Ok(FindingEnrichment { explanation: Some(parsed.explanation), remediation_suggestion: Some(parsed.remediation), risk_summary: Some(parsed.risk_summary), enrichment_successful: true, error: None, enriched_at: Some(chrono::Utc::now()), - }); - } - - // Try to extract JSON from markdown code block - if let Some(json_content) = Self::extract_json_from_markdown(content) - && let Ok(parsed) = serde_json::from_str::(&json_content) - { - return Ok(FindingEnrichment { - explanation: Some(parsed.explanation), - remediation_suggestion: Some(parsed.remediation), - risk_summary: Some(parsed.risk_summary), - enrichment_successful: true, - error: None, - enriched_at: Some(chrono::Utc::now()), - }); - } - - // Fallback: treat entire response as explanation - if !content.trim().is_empty() { - Ok(FindingEnrichment { - explanation: Some(content.to_string()), - remediation_suggestion: None, - risk_summary: None, - enrichment_successful: true, - error: None, - enriched_at: Some(chrono::Utc::now()), - }) - } else { - error!("Empty response from LLM"); - Ok(FindingEnrichment { - enrichment_successful: false, - error: Some("Empty response from LLM".to_string()), - enriched_at: Some(chrono::Utc::now()), - ..Default::default() - }) + }), + Err(parse_error) => { + if !content.trim().is_empty() { + Ok(FindingEnrichment { + explanation: Some(content.to_string()), + remediation_suggestion: None, + risk_summary: None, + enrichment_successful: true, + error: Some(parse_error.to_string()), + enriched_at: Some(chrono::Utc::now()), + }) + } else { + error!("Empty response from LLM"); + Ok(FindingEnrichment { + enrichment_successful: false, + error: Some("Empty response from LLM".to_string()), + enriched_at: Some(chrono::Utc::now()), + ..Default::default() + }) + } + } } } - /// Extract JSON from markdown code block - fn extract_json_from_markdown(content: &str) -> Option { - let json_start = content.find("```json").or_else(|| content.find("```"))?; - let content_after_start = &content[json_start..]; - let actual_start = content_after_start.find('\n')? + 1; - let json_end = content_after_start[actual_start..].find("```")?; - Some(content_after_start[actual_start..actual_start + json_end].to_string()) - } - /// Get priority value for severity (higher = more critical) fn severity_priority(severity: &FindingSeverity) -> u8 { match severity { diff --git a/vulnera-llm/src/application/use_cases/generate_code_fix.rs b/vulnera-llm/src/application/use_cases/generate_code_fix.rs index 27ed1732..bcb6baf4 100644 --- a/vulnera-llm/src/application/use_cases/generate_code_fix.rs +++ b/vulnera-llm/src/application/use_cases/generate_code_fix.rs @@ -2,6 +2,7 @@ use crate::domain::{CodeFix, CompletionRequest, LlmError, LlmProvider}; use crate::infrastructure::prompts::CODE_FIX_SYSTEM_PROMPT; +use crate::infrastructure::response_parser::ResponseParser; use std::sync::Arc; use vulnera_core::config::LlmConfig; @@ -43,22 +44,6 @@ impl GenerateCodeFixUseCase { let response = self.provider.complete(request).await?; let content = response.text(); - // Parse JSON from content (handling potential markdown code blocks) - let json_str = if let Some(start) = content.find("```json") { - if let Some(_end) = content[start..].find("```") { - let start_brace = content.find('{').unwrap_or(0); - let end_brace = content.rfind('}').unwrap_or(content.len()); - &content[start_brace..=end_brace] - } else { - &content - } - } else if let Some(start) = content.find('{') { - let end = content.rfind('}').unwrap_or(content.len()); - &content[start..=end] - } else { - &content - }; - #[derive(serde::Deserialize)] struct LlmOutput { explanation: String, @@ -66,7 +51,7 @@ impl GenerateCodeFixUseCase { diff: String, } - let output: LlmOutput = serde_json::from_str(json_str).map_err(|e| { + let output: LlmOutput = ResponseParser::parse_json(&content).map_err(|e| { LlmError::InvalidResponse(format!("Failed to parse code fix response: {}", e)) })?; diff --git a/vulnera-llm/src/infrastructure/mod.rs b/vulnera-llm/src/infrastructure/mod.rs index fd99819e..2a98f1f1 100644 --- a/vulnera-llm/src/infrastructure/mod.rs +++ b/vulnera-llm/src/infrastructure/mod.rs @@ -1,3 +1,4 @@ pub mod prompts; pub mod providers; pub mod registry; +pub mod response_parser; diff --git a/vulnera-llm/src/infrastructure/providers/google_ai.rs b/vulnera-llm/src/infrastructure/providers/google_ai.rs index 8ecaa2dc..6905700d 100644 --- a/vulnera-llm/src/infrastructure/providers/google_ai.rs +++ b/vulnera-llm/src/infrastructure/providers/google_ai.rs @@ -25,8 +25,9 @@ pub struct GoogleAIProvider { impl GoogleAIProvider { /// Create a new Google AI provider pub fn new(api_key: impl Into, model: impl Into) -> Self { + let timeout_seconds = 120; let client = Client::builder() - .timeout(Duration::from_secs(120)) + .timeout(Duration::from_secs(timeout_seconds)) .build() .unwrap_or_else(|e| { error!(error = %e, "Failed to build HTTP client with custom timeout, using default client"); @@ -47,6 +48,18 @@ impl GoogleAIProvider { self } + /// Configure request timeout (in seconds) + pub fn with_timeout(mut self, timeout_seconds: u64) -> Self { + self.client = Client::builder() + .timeout(Duration::from_secs(timeout_seconds)) + .build() + .unwrap_or_else(|e| { + error!(error = %e, "Failed to build HTTP client with custom timeout, using default client"); + Client::new() + }); + self + } + /// Build the API URL for a model endpoint fn build_url(&self, model: &str, endpoint: &str) -> String { format!( diff --git a/vulnera-llm/src/infrastructure/providers/openai.rs b/vulnera-llm/src/infrastructure/providers/openai.rs index 41519f7a..86236953 100644 --- a/vulnera-llm/src/infrastructure/providers/openai.rs +++ b/vulnera-llm/src/infrastructure/providers/openai.rs @@ -35,8 +35,9 @@ pub struct OpenAIProvider { impl OpenAIProvider { /// Create a new OpenAI provider pub fn new(api_key: impl Into, model: impl Into) -> Self { + let timeout_seconds = 120; let client = Client::builder() - .timeout(Duration::from_secs(120)) + .timeout(Duration::from_secs(timeout_seconds)) .build() .unwrap_or_else(|e| { error!(error = %e, "Failed to build HTTP client with custom timeout, using default client"); @@ -61,8 +62,9 @@ impl OpenAIProvider { deployment: impl Into, api_version: impl Into, ) -> Self { + let timeout_seconds = 120; let client = Client::builder() - .timeout(Duration::from_secs(120)) + .timeout(Duration::from_secs(timeout_seconds)) .build() .unwrap_or_else(|e| { error!(error = %e, "Failed to build Azure HTTP client with custom timeout, using default client"); @@ -88,6 +90,18 @@ impl OpenAIProvider { self } + /// Configure request timeout (in seconds) + pub fn with_timeout(mut self, timeout_seconds: u64) -> Self { + self.client = Client::builder() + .timeout(Duration::from_secs(timeout_seconds)) + .build() + .unwrap_or_else(|e| { + error!(error = %e, "Failed to build HTTP client with custom timeout, using default client"); + Client::new() + }); + self + } + /// Set organization ID pub fn with_organization(mut self, org_id: impl Into) -> Self { self.organization_id = Some(org_id.into()); diff --git a/vulnera-llm/src/infrastructure/registry.rs b/vulnera-llm/src/infrastructure/registry.rs index c2b6aa67..1034a08b 100644 --- a/vulnera-llm/src/infrastructure/registry.rs +++ b/vulnera-llm/src/infrastructure/registry.rs @@ -267,6 +267,29 @@ impl ProviderRegistry { pub fn from_llm_config(config: &vulnera_core::config::LlmConfig) -> Result { let mut registry = Self::new(); + let resilience = if config.resilience.enabled { + Some(ResilienceConfig { + max_retries: config.resilience.max_retries, + initial_backoff_ms: config.resilience.initial_backoff_ms, + max_backoff_ms: config.resilience.max_backoff_ms, + circuit_breaker_threshold: config.resilience.circuit_breaker_threshold, + circuit_breaker_timeout_secs: config.resilience.circuit_breaker_timeout_secs, + ..Default::default() + }) + } else { + None + }; + + fn wrap_with_resilience( + provider: P, + resilience: &Option, + ) -> Arc { + match resilience { + Some(cfg) => Arc::new(ResilientProvider::new(provider, cfg.clone())), + None => Arc::new(provider), + } + } + let provider: Arc = match config.provider.to_lowercase().as_str() { "google_ai" | "gemini" | "google" => { let api_key = config @@ -280,11 +303,12 @@ impl ProviderRegistry { ) })?; - let mut provider = GoogleAIProvider::new(&api_key, &config.default_model); + let mut provider = GoogleAIProvider::new(&api_key, &config.default_model) + .with_timeout(config.timeout_seconds); if !config.google_ai.base_url.is_empty() { provider = provider.with_base_url(&config.google_ai.base_url); } - Arc::new(provider) + wrap_with_resilience(provider, &resilience) } "openai" | "gpt" => { let api_key = config @@ -298,7 +322,8 @@ impl ProviderRegistry { ) })?; - let mut provider = OpenAIProvider::new(&api_key, &config.default_model); + let mut provider = OpenAIProvider::new(&api_key, &config.default_model) + .with_timeout(config.timeout_seconds); if !config.openai.base_url.is_empty() && config.openai.base_url != "https://api.openai.com/v1" { @@ -307,7 +332,7 @@ impl ProviderRegistry { if let Some(ref org) = config.openai.organization_id { provider = provider.with_organization(org); } - Arc::new(provider) + wrap_with_resilience(provider, &resilience) } "azure" | "azure_openai" => { let api_key = config @@ -332,12 +357,15 @@ impl ProviderRegistry { )); } - Arc::new(OpenAIProvider::azure( + let provider = OpenAIProvider::azure( &config.azure.endpoint, &api_key, &config.azure.deployment, &config.azure.api_version, - )) + ) + .with_timeout(config.timeout_seconds); + + wrap_with_resilience(provider, &resilience) } other => { return Err(LlmError::ProviderNotFound(format!( diff --git a/vulnera-llm/src/infrastructure/response_parser.rs b/vulnera-llm/src/infrastructure/response_parser.rs new file mode 100644 index 00000000..f8e6a1bc --- /dev/null +++ b/vulnera-llm/src/infrastructure/response_parser.rs @@ -0,0 +1,187 @@ +//! Shared response parsing utilities for LLM outputs +//! +//! Provides robust JSON extraction from model responses that may include +//! markdown code fences or surrounding narrative text. + +use serde::de::DeserializeOwned; + +use crate::domain::LlmError; + +/// Utilities for extracting and parsing JSON from LLM responses. +pub struct ResponseParser; + +impl ResponseParser { + /// Parse a JSON value from an LLM response. + /// + /// Strategy order: + /// 1) Try the full trimmed content as JSON. + /// 2) Extract a fenced JSON code block (```json ... ```). + /// 3) Extract any fenced code block (``` ... ```). + /// 4) Extract the first valid JSON object/array found in the text. + pub fn parse_json(content: &str) -> Result { + let trimmed = content.trim(); + if let Ok(parsed) = serde_json::from_str::(trimmed) { + return Ok(parsed); + } + + if let Some(json) = Self::extract_fenced_json(trimmed) + && let Ok(parsed) = serde_json::from_str::(&json) + { + return Ok(parsed); + } + + if let Some(json) = Self::extract_any_fenced_code(trimmed) + && let Ok(parsed) = serde_json::from_str::(&json) + { + return Ok(parsed); + } + + if let Some(json) = Self::extract_first_json_value(trimmed) + && let Ok(parsed) = serde_json::from_str::(&json) + { + return Ok(parsed); + } + + Err(LlmError::InvalidResponse( + "Failed to extract valid JSON from LLM response".to_string(), + )) + } + + /// Extract a ```json fenced code block. + pub fn extract_fenced_json(content: &str) -> Option { + Self::extract_fenced_block(content, Some("json")) + } + + /// Extract any fenced code block. + pub fn extract_any_fenced_code(content: &str) -> Option { + Self::extract_fenced_block(content, None) + } + + /// Extract the first valid JSON value (object or array) from text. + /// + /// Uses `serde_json::Deserializer` to detect a valid JSON prefix. + pub fn extract_first_json_value(content: &str) -> Option { + for (idx, ch) in content.char_indices() { + if ch == '{' || ch == '[' { + let candidate = &content[idx..]; + let mut de = + serde_json::Deserializer::from_str(candidate).into_iter::(); + if let Some(Ok(_value)) = de.next() { + let end = de.byte_offset(); + if end > 0 && end <= candidate.len() { + return Some(candidate[..end].to_string()); + } + } + } + } + None + } + + fn extract_fenced_block(content: &str, language: Option<&str>) -> Option { + let fence = "```"; + let mut search = content; + + loop { + let start = search.find(fence)?; + let after_start = &search[start + fence.len()..]; + + // Determine language tag + let (lang_tag, rest) = if let Some(line_end) = after_start.find('\n') { + let tag = after_start[..line_end].trim(); + (tag, &after_start[line_end + 1..]) + } else { + return None; + }; + + if let Some(expected) = language + && !lang_tag.eq_ignore_ascii_case(expected) + { + // Continue scanning after this fence + search = after_start; + continue; + } + + let end = rest.find(fence)?; + let block = rest[..end].trim().to_string(); + return Some(block); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_json_direct() { + #[derive(serde::Deserialize, Debug, PartialEq)] + struct Payload { + key: String, + } + + let json = r#"{ "key": "value" }"#; + let parsed: Payload = ResponseParser::parse_json(json).unwrap(); + assert_eq!( + parsed, + Payload { + key: "value".into() + } + ); + } + + #[test] + fn test_parse_json_fenced_json() { + #[derive(serde::Deserialize, Debug, PartialEq)] + struct Payload { + ok: bool, + } + + let content = r#" +Here is the result: +```json +{ "ok": true } +``` +"#; + let parsed: Payload = ResponseParser::parse_json(content).unwrap(); + assert_eq!(parsed, Payload { ok: true }); + } + + #[test] + fn test_parse_json_any_fence() { + #[derive(serde::Deserialize, Debug, PartialEq)] + struct Payload { + count: u32, + } + + let content = r#" +```text +{ "count": 7 } +``` +"#; + let parsed: Payload = ResponseParser::parse_json(content).unwrap(); + assert_eq!(parsed, Payload { count: 7 }); + } + + #[test] + fn test_parse_json_first_value() { + #[derive(serde::Deserialize, Debug, PartialEq)] + struct Payload { + status: String, + } + + let content = "Some text before {\"status\":\"ok\"} trailing text"; + let parsed: Payload = ResponseParser::parse_json(content).unwrap(); + assert_eq!( + parsed, + Payload { + status: "ok".into() + } + ); + } + + #[test] + fn test_extract_fenced_json_none() { + let content = "no fences here"; + assert!(ResponseParser::extract_fenced_json(content).is_none()); + } +} diff --git a/vulnera-llm/tests/common/mod.rs b/vulnera-llm/tests/common/mod.rs index d5013bb8..4b4d0209 100644 --- a/vulnera-llm/tests/common/mod.rs +++ b/vulnera-llm/tests/common/mod.rs @@ -1,128 +1,171 @@ //! Common test utilities and mock implementations use async_trait::async_trait; +use futures::stream::{self, BoxStream}; use std::sync::Arc; -use tokio::sync::mpsc; -use vulnera_llm::domain::{Choice, LlmRequest, LlmResponse, Message, Usage}; -use vulnera_llm::infrastructure::providers::LlmProvider; +use tokio::sync::Mutex; +use vulnera_llm::domain::{ + CompletionRequest, CompletionResponse, ContentBlock, LlmError, LlmProvider, + ProviderCapabilities, ProviderInfo, StopReason, StreamChunk, Usage, +}; /// Mock LLM provider for testing pub struct MockLlmProvider { - /// Response to return from generate() - pub response: Option, - /// Error message to return (if any) - pub error: Option, + /// Response to return from complete() + pub response: Option, + /// Streamed chunks to return from complete_stream() + pub stream_chunks: Option>, + /// Error to return (if any) + pub error: Option, /// Captured requests for verification - pub captured_requests: Arc>>, + pub captured_requests: Arc>>, } impl MockLlmProvider { pub fn new() -> Self { Self { response: None, + stream_chunks: None, error: None, - captured_requests: Arc::new(tokio::sync::Mutex::new(Vec::new())), + captured_requests: Arc::new(Mutex::new(Vec::new())), } } - pub fn with_response(mut self, response: LlmResponse) -> Self { + pub fn with_response(mut self, response: CompletionResponse) -> Self { self.response = Some(response); self } - pub fn with_error(mut self, error: &str) -> Self { - self.error = Some(error.to_string()); + pub fn with_stream_chunks(mut self, chunks: Vec) -> Self { + self.stream_chunks = Some(chunks); + self + } + + pub fn with_error(mut self, error: LlmError) -> Self { + self.error = Some(error); self } pub fn with_json_response(content: &str) -> Self { - Self::new().with_response(create_llm_response(content)) + Self::new().with_response(create_completion_response(content)) } } #[async_trait] impl LlmProvider for MockLlmProvider { - async fn generate(&self, request: LlmRequest) -> Result { - // Capture the request + fn info(&self) -> ProviderInfo { + ProviderInfo { + id: "mock", + name: "Mock Provider", + version: "test", + capabilities: ProviderCapabilities::text_only(8192, 2048), + } + } + + fn default_model(&self) -> &str { + "test-model" + } + + async fn complete(&self, request: CompletionRequest) -> Result { self.captured_requests.lock().await.push(request); if let Some(error) = &self.error { - return Err(anyhow::anyhow!("{}", error)); + return Err(error.clone()); } self.response .clone() - .ok_or_else(|| anyhow::anyhow!("No response configured")) + .ok_or_else(|| LlmError::Other("No response configured".to_string())) } - async fn generate_stream( + async fn complete_stream( &self, - request: LlmRequest, - ) -> Result>, anyhow::Error> { - // Capture the request + request: CompletionRequest, + ) -> Result>, LlmError> { self.captured_requests.lock().await.push(request); if let Some(error) = &self.error { - return Err(anyhow::anyhow!("{}", error)); + return Err(error.clone()); } - let (tx, rx) = mpsc::channel(10); - - // Send the response as a single chunk if available - if let Some(response) = &self.response { - let response_clone = response.clone(); - tokio::spawn(async move { - let _ = tx.send(Ok(response_clone)).await; - }); - } - - Ok(rx) + let chunks = if let Some(chunks) = &self.stream_chunks { + chunks.clone() + } else if let Some(response) = &self.response { + create_stream_chunks(&response.text()) + } else { + vec![StreamChunk { + index: 0, + delta: None, + is_final: true, + stop_reason: Some(StopReason::EndTurn), + usage: Some(Usage::default()), + }] + }; + + let stream = stream::iter(chunks.into_iter().map(Ok)); + Ok(Box::pin(stream)) } } /// Create a standard LLM response with given content -pub fn create_llm_response(content: &str) -> LlmResponse { - LlmResponse { +pub fn create_completion_response(content: &str) -> CompletionResponse { + CompletionResponse { id: "test-response-id".to_string(), - object: "chat.completion".to_string(), - created: 1234567890, model: "test-model".to_string(), - choices: vec![Choice { - index: 0, - message: Some(Message::new("assistant", content)), - delta: None, - finish_reason: Some("stop".to_string()), - }], - usage: Some(Usage { + content: vec![ContentBlock::text(content)], + stop_reason: StopReason::EndTurn, + usage: Usage { prompt_tokens: 100, completion_tokens: 50, total_tokens: 150, - }), + cached_tokens: None, + }, + created: Some(1234567890), } } -/// Create a streaming LLM response with delta content -pub fn create_streaming_response(content: &str) -> LlmResponse { - LlmResponse { - id: "test-stream-id".to_string(), - object: "chat.completion.chunk".to_string(), - created: 1234567890, - model: "test-model".to_string(), - choices: vec![Choice { +/// Create streaming chunks from content +pub fn create_stream_chunks(content: &str) -> Vec { + if content.is_empty() { + return vec![StreamChunk { index: 0, - message: None, - delta: Some(Message::new("assistant", content)), - finish_reason: None, - }], - usage: None, + delta: None, + is_final: true, + stop_reason: Some(StopReason::EndTurn), + usage: Some(Usage::default()), + }]; } + + vec![StreamChunk { + index: 0, + delta: Some(ContentBlock::text(content)), + is_final: true, + stop_reason: Some(StopReason::EndTurn), + usage: Some(Usage::default()), + }] } /// Create default test LlmConfig pub fn create_test_config() -> vulnera_core::config::LlmConfig { - vulnera_core::config::LlmConfig { - gemini_api_url: "https://test.api.example.com".to_string(), - gemini_api_key: Some("test-api-key".to_string()), + use vulnera_core::config::{AzureOpenAIConfig, GoogleAIConfig, LlmConfig, OpenAIConfig}; + + LlmConfig { + provider: "google_ai".to_string(), + google_ai: GoogleAIConfig { + api_key: Some("test-api-key".to_string()), + base_url: "https://test.api.example.com".to_string(), + }, + openai: OpenAIConfig { + api_key: Some("test-openai-key".to_string()), + base_url: "https://api.openai.com/v1".to_string(), + organization_id: None, + }, + azure: AzureOpenAIConfig { + endpoint: String::new(), + api_key: None, + deployment: String::new(), + api_version: "2024-02-15-preview".to_string(), + }, default_model: "test-model".to_string(), explanation_model: Some("explanation-model".to_string()), code_fix_model: Some("code-fix-model".to_string()), @@ -131,6 +174,7 @@ pub fn create_test_config() -> vulnera_core::config::LlmConfig { max_tokens: 10240, timeout_seconds: 30, enable_streaming: false, + resilience: Default::default(), enrichment: Default::default(), } } diff --git a/vulnera-llm/tests/integration/test_gemini_provider.rs b/vulnera-llm/tests/integration/test_gemini_provider.rs index a5239ec8..a72ac39a 100644 --- a/vulnera-llm/tests/integration/test_gemini_provider.rs +++ b/vulnera-llm/tests/integration/test_gemini_provider.rs @@ -1,100 +1,70 @@ -//! Integration tests for GeminiLlmProvider using wiremock +//! Integration tests for GoogleAIProvider using wiremock -use wiremock::matchers::{header, method}; +use futures::StreamExt; +use wiremock::matchers::{method, path, query_param}; use wiremock::{Mock, MockServer, ResponseTemplate}; -use vulnera_core::config::LlmConfig; -use vulnera_llm::domain::{LlmRequest, Message}; -use vulnera_llm::infrastructure::providers::{GeminiLlmProvider, LlmProvider}; +use vulnera_llm::domain::{CompletionRequest, ContentBlock, LlmProvider, Message}; +use vulnera_llm::infrastructure::providers::GoogleAIProvider; -fn create_test_config(api_url: &str) -> LlmConfig { - LlmConfig { - gemini_api_url: api_url.to_string(), - gemini_api_key: Some("test-api-key".to_string()), - default_model: "test-model".to_string(), - explanation_model: None, - code_fix_model: None, - enrichment_model: None, - temperature: 0.7, - max_tokens: 1024, - timeout_seconds: 30, - enable_streaming: false, - enrichment: Default::default(), - } +fn create_provider(mock_server: &MockServer) -> GoogleAIProvider { + GoogleAIProvider::new("test-api-key", "test-model") + .with_base_url(mock_server.uri()) + .with_timeout(10) } -fn create_test_request() -> LlmRequest { - LlmRequest { - model: "test-model".to_string(), - messages: vec![Message::new("user", "Hello, world!")], - max_tokens: Some(100), - temperature: Some(0.7), - top_p: None, - top_k: None, - frequency_penalty: None, - presence_penalty: None, - stream: Some(false), - } +fn create_test_request() -> CompletionRequest { + CompletionRequest::new() + .with_model("test-model") + .with_message(Message::user("Hello, world!")) + .with_max_tokens(100) + .with_temperature(0.7) } -/// Test successful generation with mocked Gemini API #[tokio::test] -async fn test_gemini_provider_generate_success() { +async fn test_google_ai_provider_complete_success() { let mock_server = MockServer::start().await; let response_body = serde_json::json!({ - "id": "resp-123", - "object": "chat.completion", - "created": 1234567890, - "model": "test-model", - "choices": [{ - "index": 0, - "message": { - "role": "assistant", - "content": "Hello! How can I help you?" + "candidates": [{ + "content": { + "parts": [ + { "text": "Hello! How can I help you?" } + ] }, - "finish_reason": "stop" + "finishReason": "STOP" }], - "usage": { - "prompt_tokens": 10, - "completion_tokens": 8, - "total_tokens": 18 + "usageMetadata": { + "promptTokenCount": 10, + "candidatesTokenCount": 8, + "totalTokenCount": 18 } }); Mock::given(method("POST")) - .and(header("x-goog-api-key", "test-api-key")) - .and(header("Content-Type", "application/json")) + .and(path("/models/test-model:generateContent")) + .and(query_param("key", "test-api-key")) .respond_with(ResponseTemplate::new(200).set_body_json(&response_body)) .mount(&mock_server) .await; - let config = create_test_config(&mock_server.uri()); - let provider = GeminiLlmProvider::new(config); - - let result = provider.generate(create_test_request()).await; + let provider = create_provider(&mock_server); + let result = provider.complete(create_test_request()).await; assert!(result.is_ok()); let response = result.unwrap(); - assert_eq!(response.id, "resp-123"); - assert_eq!(response.choices.len(), 1); - assert_eq!( - response.choices[0] - .message - .as_ref() - .unwrap() - .content - .as_deref(), - Some("Hello! How can I help you?") - ); + assert_eq!(response.model, "test-model"); + assert_eq!(response.text(), "Hello! How can I help you?"); + assert!(!response.is_truncated()); } -/// Test error handling for API errors #[tokio::test] -async fn test_gemini_provider_api_error() { +async fn test_google_ai_provider_api_error_rate_limited() { let mock_server = MockServer::start().await; Mock::given(method("POST")) + .and(path("/models/test-model:generateContent")) + .and(query_param("key", "test-api-key")) .respond_with( ResponseTemplate::new(429).set_body_json(&serde_json::json!({ "error": { @@ -106,161 +76,119 @@ async fn test_gemini_provider_api_error() { .mount(&mock_server) .await; - let config = create_test_config(&mock_server.uri()); - let provider = GeminiLlmProvider::new(config); - - let result = provider.generate(create_test_request()).await; + let provider = create_provider(&mock_server); + let result = provider.complete(create_test_request()).await; assert!(result.is_err()); let error = result.unwrap_err(); - assert!(error.to_string().contains("429")); + assert!(error.to_string().contains("Rate limited")); } -/// Test error handling for network timeout #[tokio::test] -async fn test_gemini_provider_timeout() { +async fn test_google_ai_provider_server_error() { let mock_server = MockServer::start().await; Mock::given(method("POST")) - .respond_with(ResponseTemplate::new(200).set_delay(std::time::Duration::from_secs(60))) + .and(path("/models/test-model:generateContent")) + .and(query_param("key", "test-api-key")) + .respond_with( + ResponseTemplate::new(500).set_body_json(&serde_json::json!({ + "error": "Internal server error" + })), + ) .mount(&mock_server) .await; - let mut config = create_test_config(&mock_server.uri()); - config.timeout_seconds = 1; // Very short timeout - let provider = GeminiLlmProvider::new(config); - - let result = provider.generate(create_test_request()).await; - - assert!(result.is_err()); -} - -/// Test error handling when API key is missing -#[tokio::test] -async fn test_gemini_provider_missing_api_key() { - let mock_server = MockServer::start().await; - - let mut config = create_test_config(&mock_server.uri()); - config.gemini_api_key = None; - let provider = GeminiLlmProvider::new(config); - - let result = provider.generate(create_test_request()).await; + let provider = create_provider(&mock_server); + let result = provider.complete(create_test_request()).await; assert!(result.is_err()); assert!( result .unwrap_err() .to_string() - .contains("API key not configured") + .contains("Service unavailable") ); } -/// Test streaming generation with mocked API #[tokio::test] -async fn test_gemini_provider_generate_stream() { - let mock_server = MockServer::start().await; - - // Simulate SSE response - let sse_response = "data: {\"id\":\"stream-1\",\"object\":\"chat.completion.chunk\",\"created\":1234567890,\"model\":\"test-model\",\"choices\":[{\"index\":0,\"delta\":{\"role\":\"assistant\",\"content\":\"Hello\"},\"finish_reason\":null}]}\n\ndata: {\"id\":\"stream-2\",\"object\":\"chat.completion.chunk\",\"created\":1234567890,\"model\":\"test-model\",\"choices\":[{\"index\":0,\"delta\":{\"role\":\"assistant\",\"content\":\" World\"},\"finish_reason\":null}]}\n\ndata: [DONE]\n"; - - Mock::given(method("POST")) - .and(header("x-goog-api-key", "test-api-key")) - .respond_with( - ResponseTemplate::new(200) - .set_body_string(sse_response) - .insert_header("Content-Type", "text/event-stream"), - ) - .mount(&mock_server) - .await; - - let config = create_test_config(&mock_server.uri()); - let provider = GeminiLlmProvider::new(config); - - let mut request = create_test_request(); - request.stream = Some(true); - - let result = provider.generate_stream(request).await; - - assert!(result.is_ok()); - let mut rx = result.unwrap(); - - // Collect streamed responses - let mut responses = Vec::new(); - while let Some(chunk) = rx.recv().await { - if let Ok(response) = chunk { - responses.push(response); - } - } - - assert!( - !responses.is_empty(), - "Should receive at least one response chunk" - ); -} - -/// Test handling of malformed API response -#[tokio::test] -async fn test_gemini_provider_malformed_response() { +async fn test_google_ai_provider_malformed_response() { let mock_server = MockServer::start().await; Mock::given(method("POST")) + .and(path("/models/test-model:generateContent")) + .and(query_param("key", "test-api-key")) .respond_with(ResponseTemplate::new(200).set_body_string("not valid json")) .mount(&mock_server) .await; - let config = create_test_config(&mock_server.uri()); - let provider = GeminiLlmProvider::new(config); - - let result = provider.generate(create_test_request()).await; + let provider = create_provider(&mock_server); + let result = provider.complete(create_test_request()).await; assert!(result.is_err()); } -/// Test handling of 500 Internal Server Error #[tokio::test] -async fn test_gemini_provider_server_error() { +async fn test_google_ai_provider_stream_success() { let mock_server = MockServer::start().await; + let stream_body = [ + serde_json::json!({ + "candidates": [{ + "content": { "parts": [{ "text": "Hello" }] }, + "finishReason": null + }], + "usageMetadata": { + "promptTokenCount": 10, + "candidatesTokenCount": 3, + "totalTokenCount": 13 + } + }) + .to_string(), + serde_json::json!({ + "candidates": [{ + "content": { "parts": [{ "text": " world" }] }, + "finishReason": "STOP" + }], + "usageMetadata": { + "promptTokenCount": 10, + "candidatesTokenCount": 5, + "totalTokenCount": 15 + } + }) + .to_string(), + ] + .join("\n"); + Mock::given(method("POST")) + .and(path("/models/test-model:streamGenerateContent")) + .and(query_param("key", "test-api-key")) .respond_with( - ResponseTemplate::new(500).set_body_json(&serde_json::json!({ - "error": "Internal server error" - })), + ResponseTemplate::new(200) + .set_body_string(stream_body) + .insert_header("Content-Type", "application/json"), ) .mount(&mock_server) .await; - let config = create_test_config(&mock_server.uri()); - let provider = GeminiLlmProvider::new(config); - - let result = provider.generate(create_test_request()).await; - - assert!(result.is_err()); - assert!(result.unwrap_err().to_string().contains("500")); -} - -/// Test handling of 401 Unauthorized -#[tokio::test] -async fn test_gemini_provider_unauthorized() { - let mock_server = MockServer::start().await; + let provider = create_provider(&mock_server); + let mut request = create_test_request(); + request = request.with_stream(true); - Mock::given(method("POST")) - .respond_with( - ResponseTemplate::new(401).set_body_json(&serde_json::json!({ - "error": { - "message": "Invalid API key", - "code": "invalid_api_key" - } - })), - ) - .mount(&mock_server) - .await; + let result = provider.complete_stream(request).await; + assert!(result.is_ok()); - let config = create_test_config(&mock_server.uri()); - let provider = GeminiLlmProvider::new(config); + let mut stream = result.unwrap(); + let mut chunks = Vec::new(); - let result = provider.generate(create_test_request()).await; + while let Some(item) = stream.next().await { + let chunk = item.expect("stream chunk should be Ok"); + if let Some(delta) = chunk.delta { + if let ContentBlock::Text { text } = delta { + chunks.push(text); + } + } + } - assert!(result.is_err()); - assert!(result.unwrap_err().to_string().contains("401")); + assert_eq!(chunks.join(""), "Hello world"); } diff --git a/vulnera-llm/tests/integration/test_streaming_chunk_parsing.rs b/vulnera-llm/tests/integration/test_streaming_chunk_parsing.rs deleted file mode 100644 index 1e27cd8a..00000000 --- a/vulnera-llm/tests/integration/test_streaming_chunk_parsing.rs +++ /dev/null @@ -1,133 +0,0 @@ -//! Test for streaming chunk parsing from SSE data -//! This test verifies that the Message struct can properly deserialize -//! streaming chunks where the role field is optional. - -use vulnera_llm::domain::{Choice, LlmResponse, Message}; - -#[test] -fn test_parse_streaming_chunk_without_role() { - // This simulates a streaming chunk from DeepSeek API where only content is sent - let json_chunk = r#"{ - "id": "chat-35f498af02d04bab805a5618cd618e37", - "object": "chat.completion.chunk", - "created": 1764419768, - "model": "deepseek-v3.1", - "choices": [ - { - "index": 0, - "delta": { - "content": " malicious" - }, - "logprobs": null, - "finish_reason": null - } - ], - "usage": { - "prompt_tokens": 82, - "total_tokens": 434, - "completion_tokens": 352 - } - }"#; - - // This should deserialize successfully now - let response: LlmResponse = serde_json::from_str(json_chunk) - .expect("Failed to parse streaming chunk - the fix didn't work!"); - - // Verify the structure - assert_eq!(response.id, "chat-35f498af02d04bab805a5618cd618e37"); - assert_eq!(response.object, "chat.completion.chunk"); - assert_eq!(response.model, "deepseek-v3.1"); - assert_eq!(response.choices.len(), 1); - - let choice = &response.choices[0]; - assert_eq!(choice.index, 0); - assert!(choice.message.is_none()); - - // The delta should have content but no role (since it's optional now) - let delta = choice.delta.as_ref().expect("Delta should exist"); - assert!( - delta.role.is_none(), - "Role should be None for streaming chunks" - ); - assert_eq!(delta.content, Some(" malicious".to_string())); -} - -#[test] -fn test_parse_streaming_chunk_with_role_in_first_delta() { - // First delta in a stream includes the role - let json_chunk = r#"{ - "id": "chat-35f498af02d04bab805a5618cd618e37", - "object": "chat.completion.chunk", - "created": 1764419768, - "model": "deepseek-v3.1", - "choices": [ - { - "index": 0, - "delta": { - "role": "assistant", - "content": "The response starts here" - }, - "finish_reason": null - } - ] - }"#; - - let response: LlmResponse = - serde_json::from_str(json_chunk).expect("Failed to parse streaming chunk with role"); - - let delta = &response.choices[0] - .delta - .as_ref() - .expect("Delta should exist"); - assert_eq!( - delta.role, - Some("assistant".to_string()), - "Role should be present in first delta" - ); - assert_eq!(delta.content, Some("The response starts here".to_string())); -} - -#[test] -fn test_message_new_creates_message_with_role() { - let msg = Message::new("user", "Hello, world!"); - - assert_eq!(msg.role, Some("user".to_string())); - assert_eq!(msg.content, Some("Hello, world!".to_string())); -} - -#[test] -fn test_parse_full_response_still_works() { - // Ensure non-streaming responses still work - let json_response = r#"{ - "id": "test-id", - "object": "chat.completion", - "created": 1764419768, - "model": "test-model", - "choices": [ - { - "index": 0, - "message": { - "role": "assistant", - "content": "This is the complete response" - }, - "finish_reason": "stop" - } - ], - "usage": { - "prompt_tokens": 10, - "completion_tokens": 8, - "total_tokens": 18 - } - }"#; - - let response: LlmResponse = - serde_json::from_str(json_response).expect("Failed to parse non-streaming response"); - - assert!(response.choices[0].message.is_some()); - let msg = response.choices[0].message.as_ref().unwrap(); - assert_eq!(msg.role, Some("assistant".to_string())); - assert_eq!( - msg.content, - Some("This is the complete response".to_string()) - ); -} diff --git a/vulnera-llm/tests/unit/test_explain_vulnerability.rs b/vulnera-llm/tests/unit/test_explain_vulnerability.rs index 87a7f5a0..99d9c32e 100644 --- a/vulnera-llm/tests/unit/test_explain_vulnerability.rs +++ b/vulnera-llm/tests/unit/test_explain_vulnerability.rs @@ -1,18 +1,20 @@ //! Unit tests for ExplainVulnerabilityUseCase +use futures::StreamExt; use std::sync::Arc; use vulnera_llm::application::use_cases::ExplainVulnerabilityUseCase; +use vulnera_llm::domain::LlmError; mod common { include!("../common/mod.rs"); } -use common::{create_streaming_response, create_test_config, MockLlmProvider}; +use common::{MockLlmProvider, create_completion_response, create_test_config}; /// Test successful vulnerability explanation streaming #[tokio::test] async fn test_explain_vulnerability_stream_success() { - let response = create_streaming_response( + let response = create_completion_response( "This vulnerability allows attackers to execute arbitrary code...", ); let provider = Arc::new(MockLlmProvider::new().with_response(response)); @@ -29,8 +31,8 @@ async fn test_explain_vulnerability_stream_success() { assert!(result.is_ok()); - let mut rx = result.unwrap(); - let chunk = rx.recv().await; + let mut stream = result.unwrap(); + let chunk = stream.next().await; assert!(chunk.is_some()); assert!(chunk.unwrap().is_ok()); } @@ -38,7 +40,8 @@ async fn test_explain_vulnerability_stream_success() { /// Test error handling when provider fails #[tokio::test] async fn test_explain_vulnerability_stream_provider_error() { - let provider = Arc::new(MockLlmProvider::new().with_error("Network timeout")); + let provider = + Arc::new(MockLlmProvider::new().with_error(LlmError::network("Network timeout"))); let config = create_test_config(); let use_case = ExplainVulnerabilityUseCase::new(provider, config); @@ -53,7 +56,7 @@ async fn test_explain_vulnerability_stream_provider_error() { /// Test that explanation model is used when configured #[tokio::test] async fn test_explain_vulnerability_uses_configured_model() { - let response = create_streaming_response("Explanation text"); + let response = create_completion_response("Explanation text"); let provider = Arc::new(MockLlmProvider::new().with_response(response)); let config = create_test_config(); let use_case = ExplainVulnerabilityUseCase::new(provider.clone(), config); @@ -70,7 +73,7 @@ async fn test_explain_vulnerability_uses_configured_model() { /// Test that default model is used when explanation model is not set #[tokio::test] async fn test_explain_vulnerability_uses_default_model() { - let response = create_streaming_response("Explanation text"); + let response = create_completion_response("Explanation text"); let provider = Arc::new(MockLlmProvider::new().with_response(response)); let mut config = create_test_config(); config.explanation_model = None; @@ -88,14 +91,12 @@ async fn test_explain_vulnerability_uses_default_model() { /// Test that streaming is enabled in the request #[tokio::test] async fn test_explain_vulnerability_enables_streaming() { - let response = create_streaming_response("Explanation"); + let response = create_completion_response("Explanation"); let provider = Arc::new(MockLlmProvider::new().with_response(response)); let config = create_test_config(); let use_case = ExplainVulnerabilityUseCase::new(provider.clone(), config); - let _ = use_case - .execute_stream("CVE-001", "Medium", "Desc") - .await; + let _ = use_case.execute_stream("CVE-001", "Medium", "Desc").await; let requests = provider.captured_requests.lock().await; assert_eq!(requests.len(), 1); @@ -105,7 +106,7 @@ async fn test_explain_vulnerability_enables_streaming() { /// Test request contains finding details in the prompt #[tokio::test] async fn test_explain_vulnerability_includes_finding_details() { - let response = create_streaming_response("Explanation"); + let response = create_completion_response("Explanation"); let provider = Arc::new(MockLlmProvider::new().with_response(response)); let config = create_test_config(); let use_case = ExplainVulnerabilityUseCase::new(provider.clone(), config); @@ -121,7 +122,7 @@ async fn test_explain_vulnerability_includes_finding_details() { let requests = provider.captured_requests.lock().await; assert_eq!(requests.len(), 1); - let user_message = &requests[0].messages[0].content; + let user_message = requests[0].messages[0].text(); assert!(user_message.contains("CVE-2023-12345")); assert!(user_message.contains("Critical")); assert!(user_message.contains("deserialization")); @@ -133,7 +134,7 @@ async fn test_explain_vulnerability_various_severities() { let severities = ["Critical", "High", "Medium", "Low", "Informational"]; for severity in severities { - let response = create_streaming_response(&format!("{} severity explanation", severity)); + let response = create_completion_response(&format!("{} severity explanation", severity)); let provider = Arc::new(MockLlmProvider::new().with_response(response)); let config = create_test_config(); let use_case = ExplainVulnerabilityUseCase::new(provider.clone(), config); @@ -145,6 +146,6 @@ async fn test_explain_vulnerability_various_severities() { assert!(result.is_ok(), "Failed for severity: {}", severity); let requests = provider.captured_requests.lock().await; - assert!(requests[0].messages[0].content.contains(severity)); + assert!(requests[0].messages[0].text().contains(severity)); } } diff --git a/vulnera-llm/tests/unit/test_generate_code_fix.rs b/vulnera-llm/tests/unit/test_generate_code_fix.rs index ef33e070..8e149613 100644 --- a/vulnera-llm/tests/unit/test_generate_code_fix.rs +++ b/vulnera-llm/tests/unit/test_generate_code_fix.rs @@ -3,12 +3,13 @@ use rstest::rstest; use std::sync::Arc; use vulnera_llm::application::use_cases::GenerateCodeFixUseCase; +use vulnera_llm::domain::LlmError; mod common { include!("../common/mod.rs"); } -use common::{create_test_config, MockLlmProvider}; +use common::{MockLlmProvider, create_test_config}; /// Test successful code fix generation with valid JSON response #[tokio::test] @@ -76,7 +77,9 @@ This ensures safe HTML handling."#; /// Test error handling when provider returns an error #[tokio::test] async fn test_generate_code_fix_provider_error() { - let provider = Arc::new(MockLlmProvider::new().with_error("API rate limit exceeded")); + let provider = Arc::new( + MockLlmProvider::new().with_error(LlmError::rate_limited("API rate limit exceeded")), + ); let config = create_test_config(); let use_case = GenerateCodeFixUseCase::new(provider, config); diff --git a/vulnera-llm/tests/unit/test_natural_language_query.rs b/vulnera-llm/tests/unit/test_natural_language_query.rs index f70fed94..1f717a23 100644 --- a/vulnera-llm/tests/unit/test_natural_language_query.rs +++ b/vulnera-llm/tests/unit/test_natural_language_query.rs @@ -2,17 +2,20 @@ use std::sync::Arc; use vulnera_llm::application::use_cases::NaturalLanguageQueryUseCase; +use vulnera_llm::domain::LlmError; mod common { include!("../common/mod.rs"); } -use common::{create_llm_response, create_test_config, MockLlmProvider}; +use common::{MockLlmProvider, create_completion_response, create_test_config}; /// Test successful natural language query #[tokio::test] async fn test_natural_language_query_success() { - let response = create_llm_response("Based on the findings, there are 3 critical SQL injection vulnerabilities in the authentication module."); + let response = create_completion_response( + "Based on the findings, there are 3 critical SQL injection vulnerabilities in the authentication module.", + ); let provider = Arc::new(MockLlmProvider::new().with_response(response)); let config = create_test_config(); let use_case = NaturalLanguageQueryUseCase::new(provider, config); @@ -24,7 +27,10 @@ async fn test_natural_language_query_success() { ]"#; let result = use_case - .execute("How many critical SQL injection issues are there?", findings_json) + .execute( + "How many critical SQL injection issues are there?", + findings_json, + ) .await; assert!(result.is_ok()); @@ -36,14 +42,13 @@ async fn test_natural_language_query_success() { /// Test query with empty findings #[tokio::test] async fn test_natural_language_query_empty_findings() { - let response = create_llm_response("No vulnerabilities were found in the provided findings."); + let response = + create_completion_response("No vulnerabilities were found in the provided findings."); let provider = Arc::new(MockLlmProvider::new().with_response(response)); let config = create_test_config(); let use_case = NaturalLanguageQueryUseCase::new(provider, config); - let result = use_case - .execute("What vulnerabilities exist?", "[]") - .await; + let result = use_case.execute("What vulnerabilities exist?", "[]").await; assert!(result.is_ok()); let answer = result.unwrap(); @@ -53,23 +58,29 @@ async fn test_natural_language_query_empty_findings() { /// Test error handling when provider fails #[tokio::test] async fn test_natural_language_query_provider_error() { - let provider = Arc::new(MockLlmProvider::new().with_error("Service unavailable")); + let provider = Arc::new( + MockLlmProvider::new().with_error(LlmError::ServiceUnavailable( + "Service unavailable".to_string(), + )), + ); let config = create_test_config(); let use_case = NaturalLanguageQueryUseCase::new(provider, config); let result = use_case.execute("Any query", "[]").await; assert!(result.is_err()); - assert!(result - .unwrap_err() - .to_string() - .contains("Service unavailable")); + assert!( + result + .unwrap_err() + .to_string() + .contains("Service unavailable") + ); } /// Test that default model is always used #[tokio::test] async fn test_natural_language_query_uses_default_model() { - let response = create_llm_response("Answer"); + let response = create_completion_response("Answer"); let provider = Arc::new(MockLlmProvider::new().with_response(response)); let config = create_test_config(); let use_case = NaturalLanguageQueryUseCase::new(provider.clone(), config); @@ -84,7 +95,7 @@ async fn test_natural_language_query_uses_default_model() { /// Test that streaming is disabled for queries #[tokio::test] async fn test_natural_language_query_disables_streaming() { - let response = create_llm_response("Answer"); + let response = create_completion_response("Answer"); let provider = Arc::new(MockLlmProvider::new().with_response(response)); let config = create_test_config(); let use_case = NaturalLanguageQueryUseCase::new(provider.clone(), config); @@ -93,13 +104,13 @@ async fn test_natural_language_query_disables_streaming() { let requests = provider.captured_requests.lock().await; assert_eq!(requests.len(), 1); - assert_eq!(requests[0].stream, Some(false)); + assert_eq!(requests[0].stream, None); } /// Test request contains query and findings in prompt #[tokio::test] async fn test_natural_language_query_includes_context() { - let response = create_llm_response("Answer"); + let response = create_completion_response("Answer"); let provider = Arc::new(MockLlmProvider::new().with_response(response)); let config = create_test_config(); let use_case = NaturalLanguageQueryUseCase::new(provider.clone(), config); @@ -110,7 +121,7 @@ async fn test_natural_language_query_includes_context() { let _ = use_case.execute(query, findings).await; let requests = provider.captured_requests.lock().await; - let user_message = &requests[0].messages[0].content; + let user_message = requests[0].messages[0].text(); assert!(user_message.contains("payment module")); assert!(user_message.contains("PAY-001")); } @@ -118,7 +129,7 @@ async fn test_natural_language_query_includes_context() { /// Test config parameters are applied #[tokio::test] async fn test_natural_language_query_applies_config() { - let response = create_llm_response("Answer"); + let response = create_completion_response("Answer"); let provider = Arc::new(MockLlmProvider::new().with_response(response)); let mut config = create_test_config(); config.max_tokens = 2048; @@ -144,7 +155,7 @@ async fn test_natural_language_query_various_queries() { ]; for query in queries { - let response = create_llm_response(&format!("Answer to: {}", query)); + let response = create_completion_response(&format!("Answer to: {}", query)); let provider = Arc::new(MockLlmProvider::new().with_response(response)); let config = create_test_config(); let use_case = NaturalLanguageQueryUseCase::new(provider, config); @@ -157,8 +168,7 @@ async fn test_natural_language_query_various_queries() { /// Test error when LLM returns empty response #[tokio::test] async fn test_natural_language_query_empty_response() { - let mut response = create_llm_response(""); - response.choices[0].message = None; + let response = create_completion_response(""); let provider = Arc::new(MockLlmProvider::new().with_response(response)); let config = create_test_config(); @@ -166,6 +176,6 @@ async fn test_natural_language_query_empty_response() { let result = use_case.execute("Query", "[]").await; - assert!(result.is_err()); - assert!(result.unwrap_err().to_string().contains("No content")); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), ""); }