diff --git a/Cargo.lock b/Cargo.lock index a307d9a..6e49b88 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -196,7 +196,6 @@ dependencies = [ "objc2", "objc2-foundation", "objc2-natural-language", - "once_cell", "regex", "reqwest", "serde", @@ -206,7 +205,6 @@ dependencies = [ "tempfile", "thiserror 2.0.17", "tokio", - "tokio-test", ] [[package]] @@ -2034,28 +2032,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "tokio-stream" -version = "0.1.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70" -dependencies = [ - "futures-core", - "pin-project-lite", - "tokio", -] - -[[package]] -name = "tokio-test" -version = "0.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f6d24790a10a7af737693a3e8f1d03faef7e6ca0cc99aae5066f533766de545" -dependencies = [ - "futures-core", - "tokio", - "tokio-stream", -] - [[package]] name = "tokio-util" version = "0.7.18" diff --git a/Cargo.toml b/Cargo.toml index e75a2f8..ae9e283 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,7 +8,7 @@ license = "MIT" repository = "https://github.com/jserv/cjk-token-reducer" keywords = ["claude", "cjk-token", "translation", "cjk", "chinese", "japanese", "korean"] categories = ["command-line-utilities", "text-processing"] -rust-version = "1.70" +rust-version = "1.80" [[bin]] name = "cjk-token-reducer" @@ -36,7 +36,6 @@ reqwest = { version = "0.12", features = ["json", "gzip", "brotli"] } serde = { version = "1", features = ["derive"] } serde_json = "1" regex = "1" -once_cell = "1" dirs = "5" chrono = { version = "0.4", features = ["serde"] } thiserror = "2" @@ -61,7 +60,6 @@ objc2-foundation = { version = "0.2", features = ["NSString", "NSArray", "NSRang objc2-natural-language = { version = "0.2", features = ["NLTagger", "NLTagScheme", "NLTokenizer"], optional = true } [dev-dependencies] -tokio-test = "0.4" tempfile = "3" [profile.release] diff --git a/src/cache.rs b/src/cache.rs index 686dc50..4f56fa2 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -467,6 +467,78 @@ mod tests { cache.clear().unwrap(); } + #[cfg(feature = "cache")] + #[test] + fn test_cache_ttl_expiration() { + use crate::config::CacheConfig; + + let temp_dir = TempDir::new().unwrap(); + let cache_path = temp_dir.path().join("test_ttl.db"); + + let config = CacheConfig { + enabled: true, + ttl_days: 1, // 1 day TTL + max_size_mb: 10, + }; + + let cache = TranslationCache::open_at_path(&config, &cache_path).unwrap(); + + // Insert an entry with a timestamp 2 days in the past + let key = TranslationCache::make_key("ko", "en", "안녕"); + let old_entry = CacheEntry { + translated: "Hello".to_string(), + timestamp: chrono::Utc::now().timestamp() - 2 * 24 * 60 * 60, // 2 days ago + source_lang: "ko".to_string(), + target_lang: "en".to_string(), + }; + + cache.put(&key, &old_entry); + + // Should return None (expired) and remove the entry + let retrieved = cache.get(&key); + assert!(retrieved.is_none(), "Expired entry should not be returned"); + } + + #[cfg(feature = "cache")] + #[test] + fn test_cache_miss_nonexistent_key() { + use crate::config::CacheConfig; + + let temp_dir = TempDir::new().unwrap(); + let cache_path = temp_dir.path().join("test_miss.db"); + let config = CacheConfig::default(); + let cache = TranslationCache::open_at_path(&config, &cache_path).unwrap(); + + assert!(cache.get("nonexistent_key").is_none()); + } + + #[cfg(feature = "cache")] + #[test] + fn test_cache_clear_empties_all() { + use crate::config::CacheConfig; + + let temp_dir = TempDir::new().unwrap(); + let cache_path = temp_dir.path().join("test_clear.db"); + let config = CacheConfig::default(); + let cache = TranslationCache::open_at_path(&config, &cache_path).unwrap(); + + // Insert multiple entries + for i in 0..5 { + let key = format!("key_{}", i); + let entry = CacheEntry { + translated: format!("value_{}", i), + timestamp: chrono::Utc::now().timestamp(), + source_lang: "ko".into(), + target_lang: "en".into(), + }; + cache.put(&key, &entry); + } + assert_eq!(cache.stats().entries, 5); + + cache.clear().unwrap(); + assert_eq!(cache.stats().entries, 0); + } + #[cfg(not(feature = "cache"))] #[test] fn test_stub_cache_operations() { diff --git a/src/config.rs b/src/config.rs index 6e7d151..36e63ee 100644 --- a/src/config.rs +++ b/src/config.rs @@ -2,174 +2,82 @@ use crate::preserver::PreserveConfig; use serde::{Deserialize, Serialize}; use std::path::PathBuf; -fn default_true() -> bool { - true -} - const CONFIG_FILENAME: &str = ".cjk-token.json"; /// Cache configuration with serde defaults #[derive(Debug, Clone, Serialize, Deserialize)] -#[serde(rename_all = "camelCase")] +#[serde(default, rename_all = "camelCase")] pub struct CacheConfig { - #[serde(default = "default_cache_enabled")] pub enabled: bool, - - #[serde(default = "default_ttl_days")] pub ttl_days: u32, - - #[serde(default = "default_max_size_mb")] pub max_size_mb: u32, } +impl Default for CacheConfig { + fn default() -> Self { + Self { + enabled: true, + ttl_days: 30, + max_size_mb: 10, + } + } +} + /// Resilience configuration for retry, timeout, and circuit breaker #[derive(Debug, Clone, Serialize, Deserialize)] -#[serde(rename_all = "camelCase")] +#[serde(default, rename_all = "camelCase")] pub struct ResilienceConfig { /// Request timeout in seconds (default: 30) - #[serde(default = "default_timeout_secs")] pub timeout_secs: u64, - /// Connection timeout in seconds (default: 5) - #[serde(default = "default_connect_timeout_secs")] pub connect_timeout_secs: u64, - /// Maximum retry attempts for transient failures (default: 3) - #[serde(default = "default_max_retries")] pub max_retries: u32, - /// Base delay for exponential backoff in milliseconds (default: 200) - #[serde(default = "default_retry_base_delay_ms")] pub retry_base_delay_ms: u64, - /// Circuit breaker failure threshold before opening (default: 5) - #[serde(default = "default_circuit_breaker_threshold")] pub circuit_breaker_threshold: u32, - /// Circuit breaker reset timeout in seconds (default: 60) - #[serde(default = "default_circuit_breaker_reset_secs")] pub circuit_breaker_reset_secs: u64, - /// Enable graceful fallback to passthrough on failure (default: true) - #[serde(default = "default_true")] pub fallback_to_passthrough: bool, } -// Resilience defaults -const DEFAULT_TIMEOUT_SECS: u64 = 30; -const DEFAULT_CONNECT_TIMEOUT_SECS: u64 = 5; -const DEFAULT_MAX_RETRIES: u32 = 3; -const DEFAULT_RETRY_BASE_DELAY_MS: u64 = 200; -const DEFAULT_CIRCUIT_BREAKER_THRESHOLD: u32 = 5; -const DEFAULT_CIRCUIT_BREAKER_RESET_SECS: u64 = 60; - -fn default_timeout_secs() -> u64 { - DEFAULT_TIMEOUT_SECS -} -fn default_connect_timeout_secs() -> u64 { - DEFAULT_CONNECT_TIMEOUT_SECS -} -fn default_max_retries() -> u32 { - DEFAULT_MAX_RETRIES -} -fn default_retry_base_delay_ms() -> u64 { - DEFAULT_RETRY_BASE_DELAY_MS -} -fn default_circuit_breaker_threshold() -> u32 { - DEFAULT_CIRCUIT_BREAKER_THRESHOLD -} -fn default_circuit_breaker_reset_secs() -> u64 { - DEFAULT_CIRCUIT_BREAKER_RESET_SECS -} - impl Default for ResilienceConfig { fn default() -> Self { Self { - timeout_secs: DEFAULT_TIMEOUT_SECS, - connect_timeout_secs: DEFAULT_CONNECT_TIMEOUT_SECS, - max_retries: DEFAULT_MAX_RETRIES, - retry_base_delay_ms: DEFAULT_RETRY_BASE_DELAY_MS, - circuit_breaker_threshold: DEFAULT_CIRCUIT_BREAKER_THRESHOLD, - circuit_breaker_reset_secs: DEFAULT_CIRCUIT_BREAKER_RESET_SECS, + timeout_secs: 30, + connect_timeout_secs: 5, + max_retries: 3, + retry_base_delay_ms: 200, + circuit_breaker_threshold: 5, + circuit_breaker_reset_secs: 60, fallback_to_passthrough: true, } } } -// Cache defaults -const DEFAULT_CACHE_ENABLED: bool = true; -const DEFAULT_TTL_DAYS: u32 = 30; -const DEFAULT_MAX_SIZE_MB: u32 = 10; - -fn default_cache_enabled() -> bool { - DEFAULT_CACHE_ENABLED -} -fn default_ttl_days() -> u32 { - DEFAULT_TTL_DAYS -} -fn default_max_size_mb() -> u32 { - DEFAULT_MAX_SIZE_MB -} - -impl Default for CacheConfig { - fn default() -> Self { - Self { - enabled: DEFAULT_CACHE_ENABLED, - ttl_days: DEFAULT_TTL_DAYS, - max_size_mb: DEFAULT_MAX_SIZE_MB, - } - } -} - #[derive(Debug, Clone, Serialize, Deserialize)] -#[serde(rename_all = "camelCase")] +#[serde(default, rename_all = "camelCase")] pub struct Config { - #[serde(default = "default_output_language")] pub output_language: String, - - #[serde(default = "default_enable_stats")] pub enable_stats: bool, - - #[serde(default = "default_threshold")] pub threshold: f64, - /// Collapse internal whitespace to single spaces for token reduction. /// WARNING: This destroys code indentation. Only enable for non-code prompts. /// Default: false (safe) - #[serde(default)] pub normalize_whitespace: bool, - - #[serde(default)] pub cache: CacheConfig, - - #[serde(default)] pub preserve: PreserveConfig, - - #[serde(default)] pub resilience: ResilienceConfig, } -// Config defaults -const DEFAULT_OUTPUT_LANGUAGE: &str = "en"; -const DEFAULT_ENABLE_STATS: bool = true; -const DEFAULT_THRESHOLD: f64 = 0.1; - -fn default_output_language() -> String { - DEFAULT_OUTPUT_LANGUAGE.into() -} -fn default_enable_stats() -> bool { - DEFAULT_ENABLE_STATS -} -fn default_threshold() -> f64 { - DEFAULT_THRESHOLD -} - impl Default for Config { fn default() -> Self { Self { - output_language: DEFAULT_OUTPUT_LANGUAGE.into(), - enable_stats: DEFAULT_ENABLE_STATS, - threshold: DEFAULT_THRESHOLD, + output_language: "en".into(), + enable_stats: true, + threshold: 0.1, normalize_whitespace: false, cache: CacheConfig::default(), preserve: PreserveConfig::default(), @@ -335,4 +243,57 @@ mod tests { assert_eq!(config.resilience.max_retries, 3); assert!(config.resilience.fallback_to_passthrough); } + + #[test] + fn test_cache_config_deserialization() { + let json = r#"{"enabled": false, "ttlDays": 7, "maxSizeMb": 5}"#; + let config: CacheConfig = serde_json::from_str(json).unwrap(); + assert!(!config.enabled); + assert_eq!(config.ttl_days, 7); + assert_eq!(config.max_size_mb, 5); + } + + #[test] + fn test_cache_config_partial_deserialization() { + // Only override ttlDays, rest should be defaults + let json = r#"{"ttlDays": 14}"#; + let config: CacheConfig = serde_json::from_str(json).unwrap(); + assert!(config.enabled); // default + assert_eq!(config.ttl_days, 14); + assert_eq!(config.max_size_mb, 10); // default + } + + #[test] + fn test_config_with_nested_cache() { + let json = r#"{"cache": {"enabled": false, "ttlDays": 1}}"#; + let config: Config = serde_json::from_str(json).unwrap(); + assert!(!config.cache.enabled); + assert_eq!(config.cache.ttl_days, 1); + assert_eq!(config.cache.max_size_mb, 10); // default + } + + #[test] + fn test_config_serialization_roundtrip() { + let config = Config::default(); + let json = serde_json::to_string(&config).unwrap(); + let deserialized: Config = serde_json::from_str(&json).unwrap(); + assert_eq!(deserialized.output_language, config.output_language); + assert_eq!(deserialized.threshold, config.threshold); + assert_eq!(deserialized.enable_stats, config.enable_stats); + assert_eq!(deserialized.cache.enabled, config.cache.enabled); + assert_eq!(deserialized.cache.ttl_days, config.cache.ttl_days); + assert_eq!( + deserialized.resilience.max_retries, + config.resilience.max_retries + ); + } + + #[test] + fn test_resilience_config_fallback_override() { + let json = r#"{"fallbackToPassthrough": false}"#; + let config: ResilienceConfig = serde_json::from_str(json).unwrap(); + assert!(!config.fallback_to_passthrough); + // Other fields should still be defaults + assert_eq!(config.timeout_secs, 30); + } } diff --git a/src/detector.rs b/src/detector.rs index e2d93f9..45289dd 100644 --- a/src/detector.rs +++ b/src/detector.rs @@ -220,4 +220,55 @@ mod tests { assert!(result.ratio > 0.0); assert!(result.ratio < 1.0); } + + #[test] + fn test_language_code() { + assert_eq!(Language::Chinese.code(), "zh-TW"); + assert_eq!(Language::Japanese.code(), "ja"); + assert_eq!(Language::Korean.code(), "ko"); + assert_eq!(Language::English.code(), "en"); + assert_eq!(Language::Unknown.code(), "auto"); + } + + #[test] + fn test_is_cjk_char_extended_ranges() { + // CJK Extension A + assert!(is_cjk_char(&'\u{3400}')); + // CJK Compatibility Ideographs + assert!(is_cjk_char(&'\u{F900}')); + // Katakana Phonetic Extensions + assert!(is_cjk_char(&'\u{31F0}')); + // Hangul Compatibility Jamo + assert!(is_cjk_char(&'\u{3130}')); + // CJK Symbols and Punctuation (ideographic comma) + assert!(is_cjk_char(&'\u{3001}')); + // Bopomofo + assert!(is_cjk_char(&'\u{3100}')); + // Fullwidth Forms (fullwidth exclamation) + assert!(is_cjk_char(&'\u{FF01}')); + // Non-CJK + assert!(!is_cjk_char(&'A')); + assert!(!is_cjk_char(&' ')); + assert!(!is_cjk_char(&'é')); + } + + #[test] + fn test_is_cjk_char_supplementary_planes() { + // CJK Extension B (outside BMP) + assert!(is_cjk_char(&'\u{20000}')); + assert!(is_cjk_char(&'\u{2A6DF}')); + // CJK Extension C + assert!(is_cjk_char(&'\u{2A700}')); + // CJK Extension G + assert!(is_cjk_char(&'\u{30000}')); + } + + #[test] + fn test_detect_hangul_jamo_extended() { + // Hangul Jamo Extended-A (U+A960..U+A97F) and Extended-B (U+D7B0..U+D7FF) + // are in is_cjk_char but NOT counted by detect_language's match arms, + // so they contribute to total but not to any language score + assert!(is_cjk_char(&'\u{A960}')); + assert!(is_cjk_char(&'\u{D7B0}')); + } } diff --git a/src/error.rs b/src/error.rs index 6fd3cfb..c61e9bc 100644 --- a/src/error.rs +++ b/src/error.rs @@ -169,10 +169,6 @@ impl Error { /// Crate-level Result type alias for convenience pub type Result = std::result::Result; -/// Backward compatibility alias -#[deprecated(note = "Use Error instead")] -pub type TokenSaverError = Error; - #[cfg(test)] mod tests { use super::*; @@ -213,10 +209,19 @@ mod tests { } .is_retryable()); assert!(Error::Timeout.is_retryable()); + assert!(Error::ConnectionFailed.is_retryable()); assert!(!Error::Config { message: "bad config".into() } .is_retryable()); + assert!(!Error::AuthError { + status: StatusCode::UNAUTHORIZED + } + .is_retryable()); + assert!(!Error::QuotaExceeded { + status: StatusCode::PAYMENT_REQUIRED + } + .is_retryable()); } #[test] @@ -269,4 +274,95 @@ mod tests { let err = Error::Timeout; assert_eq!(err.retry_after_secs(), None); } + + #[test] + fn test_error_category_advice_all_variants() { + // Every category should return non-empty advice + let categories = [ + ErrorCategory::Auth, + ErrorCategory::RateLimit, + ErrorCategory::Quota, + ErrorCategory::Network, + ErrorCategory::Server, + ErrorCategory::Client, + ErrorCategory::Config, + ErrorCategory::Cache, + ErrorCategory::Unknown, + ]; + for cat in categories { + assert!(!cat.advice().is_empty(), "{:?} should have advice", cat); + } + } + + #[test] + fn test_error_display_all_variants() { + // Every error variant should produce a non-empty Display string + let errors: Vec = vec![ + Error::Io(std::io::Error::new(std::io::ErrorKind::Other, "test")), + Error::Json(serde_json::from_str::<()>("bad").unwrap_err()), + Error::RateLimited { + retry_after_secs: Some(10), + }, + Error::RetryableHttp { + status: StatusCode::BAD_GATEWAY, + }, + Error::AuthError { + status: StatusCode::UNAUTHORIZED, + }, + Error::QuotaExceeded { + status: StatusCode::PAYMENT_REQUIRED, + }, + Error::Translation { + message: "test".into(), + }, + Error::Config { + message: "test".into(), + }, + Error::Cache { + message: "test".into(), + }, + Error::CircuitOpen(30), + Error::Timeout, + Error::ConnectionFailed, + ]; + for err in errors { + let msg = err.to_string(); + assert!(!msg.is_empty(), "{:?} should have Display output", err); + } + } + + #[test] + fn test_category_all_error_variants() { + // Test category() for variants not covered by other tests + let io_err = Error::Io(std::io::Error::new(std::io::ErrorKind::Other, "test")); + assert_eq!(io_err.category(), ErrorCategory::Cache); + + let json_err = Error::Json(serde_json::from_str::<()>("bad").unwrap_err()); + assert_eq!(json_err.category(), ErrorCategory::Client); + + let translation_err = Error::Translation { + message: "test".into(), + }; + assert_eq!(translation_err.category(), ErrorCategory::Client); + + let circuit_err = Error::CircuitOpen(60); + assert_eq!(circuit_err.category(), ErrorCategory::Server); + + let conn_err = Error::ConnectionFailed; + assert_eq!(conn_err.category(), ErrorCategory::Network); + } + + #[test] + fn test_from_status_client_error() { + // 400 BAD_REQUEST should become Translation variant + let err = Error::from_status(StatusCode::BAD_REQUEST); + assert!(matches!(err, Error::Translation { .. })); + assert_eq!(err.category(), ErrorCategory::Client); + } + + #[test] + fn test_from_status_with_retry_after() { + let err = Error::from_status_with_retry_after(StatusCode::TOO_MANY_REQUESTS, Some(42)); + assert_eq!(err.retry_after_secs(), Some(42)); + } } diff --git a/src/main.rs b/src/main.rs index d5a05f0..a47ba96 100644 --- a/src/main.rs +++ b/src/main.rs @@ -306,64 +306,55 @@ fn handle_show_preserved() { ); println!(); - // Print each category - if !code_blocks.is_empty() { - println!("{} ({})", "Code Blocks".green().bold(), code_blocks.len()); - for seg in &code_blocks { - let preview = if seg.original.len() > 60 { - format!("{}...", &seg.original[..57]) - } else { - seg.original.clone() - }; - println!(" {}", preview.replace('\n', "\\n").dimmed()); - } - println!(); - } - - if !inline_code.is_empty() { - println!("{} ({})", "Inline Code".green().bold(), inline_code.len()); - for seg in &inline_code { - println!(" {}", seg.original.dimmed()); - } - println!(); - } - - if !no_translate.is_empty() { - println!( - "{} ({})", - "No-Translate Markers".yellow().bold(), - no_translate.len() - ); - for seg in &no_translate { - println!(" {} (markers stripped)", seg.original.yellow()); - } - println!(); - } - - if !english_terms.is_empty() { - println!( - "{} ({})", - "English Technical Terms".blue().bold(), - english_terms.len() - ); - for seg in &english_terms { - println!(" {}", seg.original.blue()); - } - println!(); - } - - if !urls.is_empty() { - println!("{} ({})", "URLs".cyan().bold(), urls.len()); - for seg in &urls { - println!(" {}", seg.original.dimmed()); + // Print each category with distinct colors + let categories: &[(&str, SegmentType, &[&PreservedSegment])] = &[ + ("Code Blocks", SegmentType::CodeBlock, &code_blocks), + ("Inline Code", SegmentType::InlineCode, &inline_code), + ( + "No-Translate Markers", + SegmentType::NoTranslate, + &no_translate, + ), + ( + "English Technical Terms", + SegmentType::EnglishTerm, + &english_terms, + ), + ("URLs", SegmentType::Url, &urls), + ("File Paths", SegmentType::FilePath, &paths), + ]; + for (name, seg_type, segments) in categories { + if segments.is_empty() { + continue; } - println!(); - } - - if !paths.is_empty() { - println!("{} ({})", "File Paths".cyan().bold(), paths.len()); - for seg in &paths { - println!(" {}", seg.original.dimmed()); + // Color header by category: code=green, markers=yellow, terms=blue, urls/paths=cyan + let header = match seg_type { + SegmentType::CodeBlock | SegmentType::InlineCode => name.green().bold(), + SegmentType::NoTranslate => name.yellow().bold(), + SegmentType::EnglishTerm => name.blue().bold(), + _ => name.cyan().bold(), + }; + println!("{} ({})", header, segments.len()); + for seg in *segments { + match seg_type { + SegmentType::CodeBlock => { + let display = if seg.original.len() > 60 { + format!("{}...", &seg.original[..57]) + } else { + seg.original.clone() + }; + println!(" {}", display.replace('\n', "\\n").dimmed()); + } + SegmentType::NoTranslate => { + println!(" {} (markers stripped)", seg.original.yellow()); + } + SegmentType::EnglishTerm => { + println!(" {}", seg.original.blue()); + } + _ => { + println!(" {}", seg.original.dimmed()); + } + } } println!(); } diff --git a/src/preserver.rs b/src/preserver.rs index a387070..4d3afde 100644 --- a/src/preserver.rs +++ b/src/preserver.rs @@ -1,5 +1,5 @@ -use once_cell::sync::Lazy; use regex::Regex; +use std::sync::LazyLock; #[derive(Debug, Clone)] pub struct PreservedSegment { @@ -23,21 +23,21 @@ pub struct PreserveResult { pub segments: Vec, } -// Lazy-compiled regexes (compiled once, reused) -static CODE_BLOCK_RE: Lazy = Lazy::new(|| Regex::new(r"```[\s\S]*?```").unwrap()); -static INLINE_CODE_RE: Lazy = Lazy::new(|| Regex::new(r"`[^`]+`").unwrap()); +// LazyLock-compiled regexes (compiled once, reused) +static CODE_BLOCK_RE: LazyLock = LazyLock::new(|| Regex::new(r"```[\s\S]*?```").unwrap()); +static INLINE_CODE_RE: LazyLock = LazyLock::new(|| Regex::new(r"`[^`]+`").unwrap()); // Exclude trailing punctuation from URLs -static URL_RE: Lazy = Lazy::new(|| Regex::new(r"https?://[^\s]*[^\s.,;)]").unwrap()); -static FILE_PATH_RE: Lazy = - Lazy::new(|| Regex::new(r"(?:\.\.?/)?(?:[\w.\-]+/)+[\w.\-]+(?:\.\w+)?").unwrap()); +static URL_RE: LazyLock = LazyLock::new(|| Regex::new(r"https?://[^\s]*[^\s.,;)]").unwrap()); +static FILE_PATH_RE: LazyLock = + LazyLock::new(|| Regex::new(r"(?:\.\.?/)?(?:[\w.\-]+/)+[\w.\-]+(?:\.\w+)?").unwrap()); // No-translate markers: [[text]] and ==text== -static WIKI_MARKER_RE: Lazy = Lazy::new(|| Regex::new(r"\[\[([^\]]+)\]\]").unwrap()); -static HIGHLIGHT_MARKER_RE: Lazy = Lazy::new(|| Regex::new(r"==([^=]+)==").unwrap()); +static WIKI_MARKER_RE: LazyLock = LazyLock::new(|| Regex::new(r"\[\[([^\]]+)\]\]").unwrap()); +static HIGHLIGHT_MARKER_RE: LazyLock = LazyLock::new(|| Regex::new(r"==([^=]+)==").unwrap()); // English technical terms: camelCase, PascalCase, SCREAMING_CASE, snake_case identifiers // Matches: getUserData, API_KEY, MyClass, fetch_results, MAX_SIZE, getURLData, XMLParser -static ENGLISH_TERM_RE: Lazy = Lazy::new(|| { +static ENGLISH_TERM_RE: LazyLock = LazyLock::new(|| { Regex::new(r"(?x) # camelCase with acronyms: getURLData, parseXMLFile, myHTTPClient [a-z]+(?:[A-Z]+[a-z]*)+ | @@ -611,26 +611,18 @@ pub fn get_term_detector(use_nlp: bool) -> Box { /// Configuration for preservation behavior #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] -#[serde(rename_all = "camelCase")] +#[serde(default, rename_all = "camelCase")] pub struct PreserveConfig { /// Enable [[...]] wiki-style markers - #[serde(default = "default_true")] pub wiki_markers: bool, /// Enable ==...== highlight-style markers - #[serde(default = "default_true")] pub highlight_markers: bool, /// Enable auto-detection of English technical terms in CJK text - #[serde(default = "default_true")] pub english_terms: bool, /// Use macOS NLP for term detection (macOS only, falls back to regex) - #[serde(default = "default_true")] pub use_nlp: bool, } -fn default_true() -> bool { - true -} - impl Default for PreserveConfig { fn default() -> Self { Self { diff --git a/src/resilience.rs b/src/resilience.rs index 54f8ffd..07761d4 100644 --- a/src/resilience.rs +++ b/src/resilience.rs @@ -634,4 +634,78 @@ mod tests { assert_eq!(rl.rate_limit_hits(), 3); } + + #[test] + fn test_rate_limiter_reset() { + let rl = RateLimiter::new(); + rl.record_rate_limit(Some(10)); + assert!(rl.current_delay_ms() > 0); + + rl.reset(); + assert_eq!(rl.current_delay_ms(), 0); + } + + #[test] + fn test_rate_limiter_default_trait() { + let rl = RateLimiter::default(); + assert_eq!(rl.current_delay_ms(), 0); + assert_eq!(rl.rate_limit_hits(), 0); + } + + #[test] + fn test_rate_limiter_success_no_delay_noop() { + // When delay is 0, record_success should be a noop + let rl = RateLimiter::new(); + rl.record_success(); + assert_eq!(rl.current_delay_ms(), 0); + } + + #[test] + fn test_rate_limiter_exponential_backoff_progression() { + let rl = RateLimiter::new(); + + // Each call should at least double + rl.record_rate_limit(None); // 100 -> 200 + let d1 = rl.current_delay_ms(); + rl.record_rate_limit(None); // 200 -> 400 + let d2 = rl.current_delay_ms(); + rl.record_rate_limit(None); // 400 -> 800 + let d3 = rl.current_delay_ms(); + + assert!(d2 > d1, "d2 ({}) should be > d1 ({})", d2, d1); + assert!(d3 > d2, "d3 ({}) should be > d2 ({})", d3, d2); + } + + #[test] + fn test_circuit_breaker_failure_during_open_no_extend() { + let base_time = 4000u64; + let mock_time = MockTimeGuard::new(base_time); + + let cb = CircuitBreaker::with_params(2, 10); + + cb.record_failure(); + cb.record_failure(); + assert_eq!(cb.state(), CircuitState::Open); + + // Record more failures 5s later + mock_time.set(base_time + 5); + cb.record_failure(); + + // opened_at should still be base_time, not base_time+5 + let opened = cb.opened_at.load(Ordering::Acquire); + assert_eq!(opened, base_time); + } + + #[test] + fn test_circuit_breaker_new_from_config() { + let config = ResilienceConfig { + circuit_breaker_threshold: 10, + circuit_breaker_reset_secs: 120, + ..Default::default() + }; + let cb = CircuitBreaker::new(&config); + assert_eq!(cb.threshold, 10); + assert_eq!(cb.reset_timeout_secs, 120); + assert_eq!(cb.state(), CircuitState::Closed); + } } diff --git a/src/security.rs b/src/security.rs index e1ad2be..a82ac14 100644 --- a/src/security.rs +++ b/src/security.rs @@ -7,9 +7,9 @@ //! //! Security principle: Never log API keys or full prompt contents. -use once_cell::sync::Lazy; use regex::Regex; use std::borrow::Cow; +use std::sync::LazyLock; /// Maximum length for prompt content in error messages/logs const MAX_PROMPT_PREVIEW_LEN: usize = 50; @@ -30,7 +30,7 @@ const SECRET_PATTERNS: &[&str] = &[ ]; /// Pre-compiled regex patterns for secret redaction (compiled once at startup) -static REDACTION_PATTERNS: Lazy> = Lazy::new(|| { +static REDACTION_PATTERNS: LazyLock> = LazyLock::new(|| { let mut patterns = Vec::new(); for pattern in SECRET_PATTERNS { // Match key=value or key: value (handles "Bearer " style) @@ -310,4 +310,50 @@ mod tests { assert!(result.len() <= 23); // 20 + "..." assert!(result.contains("\\n")); // Newlines should be escaped } + + #[test] + fn test_redact_secrets_no_secrets() { + let input = "This is perfectly normal text without any sensitive data"; + let result = redact_secrets(input); + assert_eq!(result, input); + } + + #[test] + fn test_redact_secrets_preserves_surrounding() { + let input = "before api_key=secret123 after"; + let result = redact_secrets(input); + assert!(result.contains("before")); + assert!(result.contains("after")); + assert!(result.contains("[REDACTED]")); + assert!(!result.contains("secret123")); + } + + #[test] + fn test_sanitize_for_log_carriage_return() { + let text = "line1\r\nline2"; + let result = sanitize_for_log(text, 100); + assert!(result.contains("\\r")); + assert!(result.contains("\\n")); + } + + #[test] + fn test_format_prompt_preview_short() { + let prompt = "Hi"; + let preview = format_prompt_preview(prompt); + assert!(preview.contains("2 chars")); + assert!(!preview.contains("...")); + } + + #[test] + fn test_looks_like_secret_case_insensitive() { + assert!(looks_like_secret("MY_API_KEY=abc")); + assert!(looks_like_secret("Authorization: Bearer xyz")); + assert!(looks_like_secret("CREDENTIAL_FILE")); + } + + #[test] + fn test_sensitive_data_warning_constant() { + assert!(SENSITIVE_DATA_WARNING.contains("WARNING")); + assert!(SENSITIVE_DATA_WARNING.contains("sensitive")); + } } diff --git a/src/stats.rs b/src/stats.rs index 85ce36d..17af674 100644 --- a/src/stats.rs +++ b/src/stats.rs @@ -386,4 +386,54 @@ mod tests { let avg_cost = (INPUT_COST_PER_MTOK + OUTPUT_COST_PER_MTOK) / 2.0; assert_eq!(avg_cost, 45.0); } + + #[test] + fn test_record_translation_output_exceeds_input() { + // When output > input (English expansion), saved should be 0 via saturating_sub + let temp_dir = tempfile::tempdir().unwrap(); + let test_path = temp_dir.path().join("test_neg.json"); + + record_translation_to_path(&test_path, 50, 100); + + let stats = load_stats_from_path(&test_path); + assert_eq!(stats.estimated_saved_tokens, 0); + assert_eq!(stats.total_input_tokens, 50); + assert_eq!(stats.total_output_tokens, 100); + } + + #[test] + fn test_estimate_cost_savings_zero() { + assert_eq!(estimate_cost_savings(0), 0.0); + } + + #[test] + fn test_format_stats_csv_empty() { + let stats = TokenStats::default(); + let csv = format_stats_csv(&stats); + assert_eq!( + csv, + "date,translations,input_tokens,output_tokens,estimated_saved" + ); + } + + #[test] + fn test_atomic_write_preserves_on_failure() { + // Verify temp file is used (the .json.tmp extension) + let temp_dir = tempfile::tempdir().unwrap(); + let test_path = temp_dir.path().join("test_atomic.json"); + + let stats = TokenStats { + total_translations: 42, + ..Default::default() + }; + save_stats_to_path(&test_path, &stats); + + // File should exist and be valid + let loaded = load_stats_from_path(&test_path); + assert_eq!(loaded.total_translations, 42); + + // Temp file should not linger + let tmp_path = test_path.with_extension("json.tmp"); + assert!(!tmp_path.exists()); + } } diff --git a/src/translator.rs b/src/translator.rs index b366d03..11de056 100644 --- a/src/translator.rs +++ b/src/translator.rs @@ -555,8 +555,6 @@ pub fn reset_resilience_state() { mod tests { use super::*; use crate::config::Config; - use crate::error::{Error, ErrorCategory}; - use reqwest::StatusCode; #[test] fn test_output_language_instruction() { @@ -574,6 +572,13 @@ mod tests { assert_eq!(chunks[0], text); } + #[test] + fn test_chunk_text_empty() { + let chunks = chunk_text(""); + assert_eq!(chunks.len(), 1); + assert_eq!(chunks[0], ""); + } + #[test] fn test_chunk_text_exactly_max_size() { let text = "a".repeat(MAX_CHUNK_SIZE); @@ -914,243 +919,35 @@ mod tests { } #[test] - fn test_error_category_from_http_status() { - // Test various HTTP status code classifications - assert_eq!( - Error::from_status(StatusCode::UNAUTHORIZED).category(), - ErrorCategory::Auth - ); - assert_eq!( - Error::from_status(StatusCode::FORBIDDEN).category(), - ErrorCategory::Auth - ); - assert_eq!( - Error::from_status(StatusCode::TOO_MANY_REQUESTS).category(), - ErrorCategory::RateLimit - ); - assert_eq!( - Error::from_status(StatusCode::PAYMENT_REQUIRED).category(), - ErrorCategory::Quota - ); - assert_eq!( - Error::from_status(StatusCode::BAD_REQUEST).category(), - ErrorCategory::Client - ); - assert_eq!( - Error::from_status(StatusCode::INTERNAL_SERVER_ERROR).category(), - ErrorCategory::Server - ); - assert_eq!( - Error::from_status(StatusCode::BAD_GATEWAY).category(), - ErrorCategory::Server - ); - } - - #[test] - fn test_error_retryable() { - // Test which errors are retryable - assert!(Error::RateLimited { - retry_after_secs: None - } - .is_retryable()); - assert!(Error::RetryableHttp { - status: StatusCode::SERVICE_UNAVAILABLE - } - .is_retryable()); - assert!(Error::Timeout.is_retryable()); - assert!(Error::ConnectionFailed.is_retryable()); - - assert!(!Error::Config { - message: "bad config".into() - } - .is_retryable()); - assert!(!Error::AuthError { - status: StatusCode::UNAUTHORIZED - } - .is_retryable()); - assert!(!Error::QuotaExceeded { - status: StatusCode::PAYMENT_REQUIRED - } - .is_retryable()); - } - - #[test] - fn test_get_http_client() { - // Verify that we can get an HTTP client without error - let _client = get_http_client(); - // The mere fact that we got the client without panic is sufficient - } - - #[test] - fn test_user_agents_pool() { - // Verify that USER_AGENTS contains expected values - assert!(!USER_AGENTS.is_empty()); - for ua in USER_AGENTS { - assert!(!ua.is_empty()); - assert!(ua.contains("Mozilla/5.0")); + fn test_user_agent_rotation_cycles() { + // Verify round-robin cycles through all agents. + // Only check that we see every agent -- don't assert on UA_COUNTER value + // because other tests running in parallel may also call get_user_agent(). + let mut seen = std::collections::HashSet::new(); + // Call enough times to guarantee full coverage even with concurrent increments + for _ in 0..USER_AGENTS.len() * 2 { + seen.insert(get_user_agent()); } + assert_eq!(seen.len(), USER_AGENTS.len()); } #[test] - fn test_ua_counter_initial_value() { - // Test that the counter is accessible - let initial = UA_COUNTER.load(Ordering::Relaxed); - // Verify counter is within valid range for USER_AGENTS rotation - assert!(initial < usize::MAX); - } - - #[test] - fn test_get_user_agent_returns_valid() { - let ua = get_user_agent(); - assert!(USER_AGENTS.contains(&ua)); - } - - #[test] - fn test_max_chunk_size_constant() { - // Verify constant is accessible and non-zero - assert_ne!(MAX_CHUNK_SIZE, 0); - } - - #[test] - fn test_max_concurrent_translations_constant() { - // Verify the constant is set appropriately - assert_eq!(MAX_CONCURRENT_TRANSLATIONS, 5); - } - - #[test] - fn test_google_translate_url_constant() { - // Verify the URL is set correctly - assert_eq!( - GOOGLE_TRANSLATE_URL, - "https://translate.googleapis.com/translate_a/single" - ); - } - - #[test] - fn test_get_resilience_stats() { - // Verify that we can get resilience stats without error + fn test_resilience_stats_accessible() { let stats = get_resilience_stats(); - // Verify struct is accessible (rate_limit_hits is usize, always valid) - let _ = stats.rate_limit_hits; - } - - #[test] - fn test_reset_resilience_state() { - // Verify that we can reset resilience state without error - reset_resilience_state(); - } - - #[test] - fn test_normalize_whitespace_internal_empty() { - assert_eq!(normalize_whitespace_internal(""), ""); - } - - #[test] - fn test_normalize_whitespace_internal_single_word() { - assert_eq!(normalize_whitespace_internal("hello"), "hello"); - } - - #[test] - fn test_normalize_whitespace_internal_multiple_spaces() { + // Verify struct fields are consistent + assert!(stats.rate_limit_delay_ms < u64::MAX); assert_eq!( - normalize_whitespace_internal("hello world"), - "hello world" + stats.circuit_breaker.state, + crate::resilience::CircuitState::Closed ); } #[test] - fn test_normalize_whitespace_internal_tabs() { - assert_eq!( - normalize_whitespace_internal("hello\t\tworld"), - "hello world" - ); - } - - #[test] - fn test_normalize_whitespace_internal_mixed_whitespace() { - assert_eq!( - normalize_whitespace_internal("hello \t\n world"), - "hello world" - ); - } - - #[test] - fn test_normalize_whitespace_internal_leading_trailing() { - assert_eq!( - normalize_whitespace_internal(" hello world "), - "hello world" - ); - } - - #[test] - fn test_chunk_text_empty() { - let chunks = chunk_text(""); - assert_eq!(chunks.len(), 1); - assert_eq!(chunks[0], ""); - } - - #[test] - fn test_chunk_text_shorter_than_max() { - let text = "Short text"; - let chunks = chunk_text(text); - assert_eq!(chunks.len(), 1); - assert_eq!(chunks[0], text); - } - - #[test] - fn test_chunk_text_exactly_max_size_additional() { - let text = "a".repeat(MAX_CHUNK_SIZE); - let chunks = chunk_text(&text); - assert_eq!(chunks.len(), 1); - assert_eq!(chunks[0], text); - } - - #[test] - fn test_translation_result_debug_format() { - let result = TranslationResult { - original: "Hello".to_string(), - translated: "Bonjour".to_string(), - was_translated: true, - source_language: Language::English, - input_tokens: 10, - output_tokens: 12, - cache_hit: false, - }; - - // Just ensure it doesn't panic when debug formatted - let _debug_str = format!("{:?}", result); - } - - #[test] - fn test_translation_result_equality() { - let result1 = TranslationResult { - original: "Hello".to_string(), - translated: "Bonjour".to_string(), - was_translated: true, - source_language: Language::English, - input_tokens: 10, - output_tokens: 12, - cache_hit: false, - }; - - let result2 = TranslationResult { - original: "Hello".to_string(), - translated: "Bonjour".to_string(), - was_translated: true, - source_language: Language::English, - input_tokens: 10, - output_tokens: 12, - cache_hit: false, - }; - - // We can't directly compare TranslationResult as it doesn't implement PartialEq, - // but we can verify the fields are as expected - assert_eq!(result1.original, result2.original); - assert_eq!(result1.translated, result2.translated); - assert_eq!(result1.was_translated, result2.was_translated); - assert_eq!(result1.source_language, result2.source_language); - assert_eq!(result1.input_tokens, result2.input_tokens); - assert_eq!(result1.output_tokens, result2.output_tokens); - assert_eq!(result1.cache_hit, result2.cache_hit); + fn test_reset_resilience_clears_delay() { + let rl = get_rate_limiter(); + rl.record_rate_limit(Some(5)); + assert!(rl.current_delay_ms() > 0); + reset_resilience_state(); + assert_eq!(rl.current_delay_ms(), 0); } }