From b497c09224d9b965b747db0721a95841069e32ee Mon Sep 17 00:00:00 2001 From: Amit Singh Date: Mon, 23 Mar 2026 18:14:19 +0530 Subject: [PATCH 1/4] feat(range): improve range computation for fuzzy search matches and add out-of-bounds error handling --- .../src/tool_services/fs_patch.rs | 185 ++++++++++++++---- 1 file changed, 143 insertions(+), 42 deletions(-) diff --git a/crates/forge_services/src/tool_services/fs_patch.rs b/crates/forge_services/src/tool_services/fs_patch.rs index e073f722ce..63f68799c3 100644 --- a/crates/forge_services/src/tool_services/fs_patch.rs +++ b/crates/forge_services/src/tool_services/fs_patch.rs @@ -58,54 +58,59 @@ impl Range { } } - /// Create a range from a fuzzy search match + /// Create a range from a fuzzy search match. + /// + /// Uses actual byte positions from the source string rather than + /// assuming uniform line ending lengths, which avoids incorrect byte + /// offsets with mixed line endings (e.g. some `\r\n` and some `\n`). fn from_search_match(source: &str, search_match: &SearchMatch) -> Self { - let lines: Vec<&str> = source.lines().collect(); - // Handle empty source - if lines.is_empty() { + if source.is_empty() { return Self::new(0, 0); } - // Detect the line ending style used in the source - let line_ending = Self::detect_line_ending(source); - let line_ending_len = line_ending.len(); - - // SearchMatch uses 0-based inclusive line numbers - // Convert to 0-based array indices - let start_idx = (search_match.start_line as usize).min(lines.len()); - // end_line is 0-based inclusive, convert to 0-based exclusive for slicing - // Add 1 to make it exclusive: line 0 to line 0 means [0..1], one line - let end_idx = ((search_match.end_line as usize) + 1).min(lines.len()); - - // Find the character position of the start line - // Sum the lengths of all lines before start_idx, adding the appropriate line - // ending length - let start_pos = lines[..start_idx] - .iter() - .map(|l| l.len() + line_ending_len) - .sum::(); - - // Calculate the length - let length = if start_idx == end_idx { - // Single line match: just the line content, no trailing newline - if start_idx >= lines.len() { - 0 // Out of bounds match - } else { - lines[start_idx].len() + // Collect (byte_offset, line_content) for each line by scanning + // through the source. This gives us exact byte positions regardless + // of line ending style per line. + let mut line_starts: Vec = Vec::new(); + let mut line_ends: Vec = Vec::new(); + let mut pos = 0; + for line in source.lines() { + line_starts.push(pos); + let line_end = pos + line.len(); + line_ends.push(line_end); + // Advance past the line content + pos = line_end; + // Advance past the line ending (could be \r\n or \n or nothing at EOF) + if pos < source.len() { + if source.as_bytes()[pos] == b'\r' + && pos + 1 < source.len() + && source.as_bytes()[pos + 1] == b'\n' + { + pos += 2; + } else if source.as_bytes()[pos] == b'\n' { + pos += 1; + } } - } else { - // Multi-line match: include newlines between lines but NOT after the last line - // Sum lengths of lines from start_idx to end_idx (exclusive) - // Add appropriate line ending length for each newline between lines - let content_len: usize = if start_idx >= lines.len() || end_idx > lines.len() { - 0 // Out of bounds match - } else { - lines[start_idx..end_idx].iter().map(|l| l.len()).sum() - }; - let newlines_between = end_idx - start_idx - 1; - content_len + (newlines_between * line_ending_len) - }; + } + + let num_lines = line_starts.len(); + if num_lines == 0 { + return Self::new(0, 0); + } + + // Clamp indices to valid range + let start_idx = (search_match.start_line as usize).min(num_lines.saturating_sub(1)); + let end_idx = (search_match.end_line as usize).min(num_lines.saturating_sub(1)); + + // Compute byte range: from start of start_line to end of end_line + let start_pos = line_starts[start_idx]; + let end_pos = line_ends[end_idx]; + + // Safety: clamp to source length + let start_pos = start_pos.min(source.len()); + let end_pos = end_pos.min(source.len()); + let length = end_pos.saturating_sub(start_pos); Self::new(start_pos, length) } @@ -135,6 +140,10 @@ enum Error { "Multiple matches found for search text: '{0}'. Either provide a more specific search pattern or use replace_all to replace all occurrences." )] MultipleMatches(String), + #[error( + "Match range [{0}..{1}) is out of bounds for content of length {2}. File may have changed externally, consider reading the file again." + )] + RangeOutOfBounds(usize, usize, usize), } /// Compute a range from search text, with operation-aware error handling @@ -189,6 +198,15 @@ fn apply_replacement( let normalized_content = Range::normalize_search_line_endings(&haystack, content); // Handle case where range is provided (match found) if let Some(patch) = range { + // Validate the range is within bounds before indexing + if patch.end() > haystack.len() { + return Err(Error::RangeOutOfBounds( + patch.start, + patch.end(), + haystack.len(), + )); + } + // Extract the matched text from haystack let needle = &haystack[patch.start..patch.end()]; @@ -1043,4 +1061,87 @@ mod tests { assert_eq!(actual, expected); } + + // --- Out-of-bounds safety tests --- + + #[test] + fn test_range_from_search_match_out_of_bounds_start_line() { + let source = "line1\nline2\nline3"; + // start_line way past end of file + let search_match = SearchMatch { start_line: 100, end_line: 200 }; + + let range = super::Range::from_search_match(source, &search_match); + // Should not panic; range should be clamped so it doesn't exceed source + assert!(range.end() <= source.len()); + } + + #[test] + fn test_range_from_search_match_end_line_past_eof() { + let source = "line1\nline2\nline3"; + // start_line valid, end_line past end + let search_match = SearchMatch { start_line: 1, end_line: 100 }; + + let range = super::Range::from_search_match(source, &search_match); + assert!(range.end() <= source.len()); + // Should include from line2 to end of source + let actual = &source[range.start..range.end()]; + assert!(actual.contains("line2")); + assert!(actual.contains("line3")); + } + + #[test] + fn test_range_from_search_match_trailing_newline() { + let source = "line1\nline2\nline3\n"; // trailing newline + let search_match = SearchMatch { start_line: 2, end_line: 2 }; + + let range = super::Range::from_search_match(source, &search_match); + assert!(range.end() <= source.len()); + let actual = &source[range.start..range.end()]; + assert_eq!(actual, "line3"); + } + + #[test] + fn test_range_from_search_match_unicode_content() { + let source = "héllo\nwørld\n🌍"; + let search_match = SearchMatch { start_line: 1, end_line: 1 }; + + let range = super::Range::from_search_match(source, &search_match); + assert!(range.end() <= source.len()); + let actual = &source[range.start..range.end()]; + assert_eq!(actual, "wørld"); + } + + #[test] + fn test_range_from_search_match_unicode_multiline() { + let source = "héllo\nwørld\n🌍"; + let search_match = SearchMatch { start_line: 0, end_line: 2 }; + + let range = super::Range::from_search_match(source, &search_match); + assert!(range.end() <= source.len()); + let actual = &source[range.start..range.end()]; + assert_eq!(actual, source); + } + + #[test] + fn test_range_from_search_match_mixed_line_endings() { + let source = "line1\r\nline2\nline3"; + let search_match = SearchMatch { start_line: 1, end_line: 1 }; + + let range = super::Range::from_search_match(source, &search_match); + assert!(range.end() <= source.len()); + let actual = &source[range.start..range.end()]; + assert_eq!(actual, "line2"); + } + + #[test] + fn test_apply_replacement_with_out_of_bounds_range_returns_error() { + let source = "short"; + // Simulate a bad range that exceeds source length + let bad_range = Some(super::Range::new(0, 1000)); + let operation = PatchOperation::Replace; + let content = "replacement"; + + let result = super::apply_replacement(source.to_string(), bad_range, &operation, content); + assert!(result.is_err()); + } } From dea42457ed50ebfc2e2707c5fb49722788f75899 Mon Sep 17 00:00:00 2001 From: Amit Singh Date: Wed, 25 Mar 2026 12:54:48 +0530 Subject: [PATCH 2/4] fix(fs_patch): track exact line starts to support mixed line endings --- .../src/tool_services/fs_patch.rs | 49 +++++++------------ 1 file changed, 18 insertions(+), 31 deletions(-) diff --git a/crates/forge_services/src/tool_services/fs_patch.rs b/crates/forge_services/src/tool_services/fs_patch.rs index 63f68799c3..80b50b2591 100644 --- a/crates/forge_services/src/tool_services/fs_patch.rs +++ b/crates/forge_services/src/tool_services/fs_patch.rs @@ -60,56 +60,43 @@ impl Range { /// Create a range from a fuzzy search match. /// - /// Uses actual byte positions from the source string rather than - /// assuming uniform line ending lengths, which avoids incorrect byte - /// offsets with mixed line endings (e.g. some `\r\n` and some `\n`). + /// Tracks actual byte positions per line to handle mixed line endings + /// (e.g. some `\r\n` and some `\n`), avoiding out-of-bounds offsets. fn from_search_match(source: &str, search_match: &SearchMatch) -> Self { + let lines: Vec<&str> = source.lines().collect(); + // Handle empty source - if source.is_empty() { + if lines.is_empty() { return Self::new(0, 0); } - // Collect (byte_offset, line_content) for each line by scanning - // through the source. This gives us exact byte positions regardless - // of line ending style per line. - let mut line_starts: Vec = Vec::new(); - let mut line_ends: Vec = Vec::new(); + // Compute exact byte start positions per line by scanning the source. + // This handles mixed line endings correctly, unlike a uniform line_ending_len. + let mut line_starts = vec![0usize; lines.len()]; let mut pos = 0; - for line in source.lines() { - line_starts.push(pos); - let line_end = pos + line.len(); - line_ends.push(line_end); - // Advance past the line content - pos = line_end; - // Advance past the line ending (could be \r\n or \n or nothing at EOF) + for (i, line) in lines.iter().enumerate() { + line_starts[i] = pos; + pos += line.len(); + // Advance past the line ending (\r\n or \n) if pos < source.len() { if source.as_bytes()[pos] == b'\r' && pos + 1 < source.len() && source.as_bytes()[pos + 1] == b'\n' { pos += 2; - } else if source.as_bytes()[pos] == b'\n' { + } else { pos += 1; } } } - let num_lines = line_starts.len(); - if num_lines == 0 { - return Self::new(0, 0); - } - // Clamp indices to valid range - let start_idx = (search_match.start_line as usize).min(num_lines.saturating_sub(1)); - let end_idx = (search_match.end_line as usize).min(num_lines.saturating_sub(1)); - - // Compute byte range: from start of start_line to end of end_line - let start_pos = line_starts[start_idx]; - let end_pos = line_ends[end_idx]; + let last = lines.len() - 1; + let start_idx = (search_match.start_line as usize).min(last); + let end_idx = (search_match.end_line as usize).min(last); - // Safety: clamp to source length - let start_pos = start_pos.min(source.len()); - let end_pos = end_pos.min(source.len()); + let start_pos = line_starts[start_idx].min(source.len()); + let end_pos = (line_starts[end_idx] + lines[end_idx].len()).min(source.len()); let length = end_pos.saturating_sub(start_pos); Self::new(start_pos, length) From ac04562a1d9bd9cce07d2ecde3e72361eeacf505 Mon Sep 17 00:00:00 2001 From: Amit Singh Date: Wed, 25 Mar 2026 13:03:16 +0530 Subject: [PATCH 3/4] fix(fs_patch): walk actual byte positions per line to handle mixed line endings --- .gitignore | 1 + .../src/tool_services/fs_patch.rs | 43 +++++++++---------- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/.gitignore b/.gitignore index 5ca524a1e3..077bfbece7 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ # will have compiled files and executables debug/ target/ +jobs/** # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html diff --git a/crates/forge_services/src/tool_services/fs_patch.rs b/crates/forge_services/src/tool_services/fs_patch.rs index 80b50b2591..45d4d77c98 100644 --- a/crates/forge_services/src/tool_services/fs_patch.rs +++ b/crates/forge_services/src/tool_services/fs_patch.rs @@ -60,7 +60,7 @@ impl Range { /// Create a range from a fuzzy search match. /// - /// Tracks actual byte positions per line to handle mixed line endings + /// Walks actual byte positions per line to handle mixed line endings /// (e.g. some `\r\n` and some `\n`), avoiding out-of-bounds offsets. fn from_search_match(source: &str, search_match: &SearchMatch) -> Self { let lines: Vec<&str> = source.lines().collect(); @@ -70,36 +70,35 @@ impl Range { return Self::new(0, 0); } - // Compute exact byte start positions per line by scanning the source. - // This handles mixed line endings correctly, unlike a uniform line_ending_len. - let mut line_starts = vec![0usize; lines.len()]; - let mut pos = 0; + // Clamp indices to valid line numbers (0-based inclusive) + let start_idx = (search_match.start_line as usize).min(lines.len() - 1); + let end_idx = (search_match.end_line as usize).min(lines.len() - 1); + + // Walk byte positions to correctly handle mixed line endings (\r\n vs \n). + // Record start_pos when we reach start_idx; stop after end_idx. + let mut pos = 0usize; + let mut start_pos = 0usize; for (i, line) in lines.iter().enumerate() { - line_starts[i] = pos; + if i == start_idx { + start_pos = pos; + } pos += line.len(); + if i == end_idx { + break; + } // Advance past the line ending (\r\n or \n) if pos < source.len() { - if source.as_bytes()[pos] == b'\r' - && pos + 1 < source.len() - && source.as_bytes()[pos + 1] == b'\n' + pos += if source.as_bytes()[pos] == b'\r' + && source.as_bytes().get(pos + 1) == Some(&b'\n') { - pos += 2; + 2 } else { - pos += 1; - } + 1 + }; } } - // Clamp indices to valid range - let last = lines.len() - 1; - let start_idx = (search_match.start_line as usize).min(last); - let end_idx = (search_match.end_line as usize).min(last); - - let start_pos = line_starts[start_idx].min(source.len()); - let end_pos = (line_starts[end_idx] + lines[end_idx].len()).min(source.len()); - let length = end_pos.saturating_sub(start_pos); - - Self::new(start_pos, length) + Self::new(start_pos, pos.min(source.len()).saturating_sub(start_pos)) } // Fuzzy matching removed - we only use exact matching From 7ef1d9eadcc00f86915710366fe2f3575231698a Mon Sep 17 00:00:00 2001 From: Amit Singh Date: Wed, 25 Mar 2026 13:11:21 +0530 Subject: [PATCH 4/4] fix(fs_patch): refine range computation for fuzzy search matches to handle mixed line endings --- .../src/tool_services/fs_patch.rs | 75 +++++++++++-------- 1 file changed, 44 insertions(+), 31 deletions(-) diff --git a/crates/forge_services/src/tool_services/fs_patch.rs b/crates/forge_services/src/tool_services/fs_patch.rs index 45d4d77c98..cdb1883de3 100644 --- a/crates/forge_services/src/tool_services/fs_patch.rs +++ b/crates/forge_services/src/tool_services/fs_patch.rs @@ -58,10 +58,7 @@ impl Range { } } - /// Create a range from a fuzzy search match. - /// - /// Walks actual byte positions per line to handle mixed line endings - /// (e.g. some `\r\n` and some `\n`), avoiding out-of-bounds offsets. + /// Create a range from a fuzzy search match fn from_search_match(source: &str, search_match: &SearchMatch) -> Self { let lines: Vec<&str> = source.lines().collect(); @@ -70,35 +67,51 @@ impl Range { return Self::new(0, 0); } - // Clamp indices to valid line numbers (0-based inclusive) - let start_idx = (search_match.start_line as usize).min(lines.len() - 1); - let end_idx = (search_match.end_line as usize).min(lines.len() - 1); - - // Walk byte positions to correctly handle mixed line endings (\r\n vs \n). - // Record start_pos when we reach start_idx; stop after end_idx. - let mut pos = 0usize; - let mut start_pos = 0usize; - for (i, line) in lines.iter().enumerate() { - if i == start_idx { - start_pos = pos; + // SearchMatch uses 0-based inclusive line numbers + // Convert to 0-based array indices + let start_idx = (search_match.start_line as usize).min(lines.len()); + // end_line is 0-based inclusive, convert to 0-based exclusive for slicing + // Add 1 to make it exclusive: line 0 to line 0 means [0..1], one line + let end_idx = ((search_match.end_line as usize) + 1).min(lines.len()); + + // Find the byte position of the start line. + // Split on '\n' so each segment retains its '\r' (if any), giving the + // correct per-line byte length regardless of mixed line endings. + let start_pos = source + .split('\n') + .take(start_idx) + .map(|l| l.len() + 1) + .sum::() + .min(source.len()); + + // Calculate the length + let length = if start_idx == end_idx { + // Single line match: just the line content, no trailing newline + if start_idx >= lines.len() { + 0 // Out of bounds match + } else { + lines[start_idx].len() } - pos += line.len(); - if i == end_idx { - break; - } - // Advance past the line ending (\r\n or \n) - if pos < source.len() { - pos += if source.as_bytes()[pos] == b'\r' - && source.as_bytes().get(pos + 1) == Some(&b'\n') - { - 2 - } else { - 1 - }; - } - } + } else { + // Multi-line match: include newlines between lines but NOT after the last line + // Sum lengths of lines from start_idx to end_idx (exclusive) + let content_len: usize = if start_idx >= lines.len() || end_idx > lines.len() { + 0 // Out of bounds match + } else { + lines[start_idx..end_idx].iter().map(|l| l.len()).sum() + }; + let newlines_between = end_idx - start_idx - 1; + // Count actual newline bytes (\r\n = 2, \n = 1) to handle mixed endings + let newline_bytes: usize = source + .split('\n') + .skip(start_idx) + .take(newlines_between) + .map(|l| if l.ends_with('\r') { 2 } else { 1 }) + .sum(); + content_len + newline_bytes + }; - Self::new(start_pos, pos.min(source.len()).saturating_sub(start_pos)) + Self::new(start_pos, length) } // Fuzzy matching removed - we only use exact matching