From d23f5ea71e4386240e6012b577b099790dcac2f6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 9 Mar 2026 23:25:58 +0000 Subject: [PATCH 1/3] Initial plan From 7ad329d7966d8f8116b3cf82fbb4444898872a2b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 9 Mar 2026 23:35:59 +0000 Subject: [PATCH 2/3] fix(ast-engine): address review comments on TAB indentation support - template.rs:119: use *indent/*is_tab (Copy types) instead of .to_owned() - indent.rs: fix get_indent_at_offset_with_tab to only set is_tab=true for pure-tab indentation; mixed indentation falls back to spaces - indent.rs:331: use get_indent_at_offset_with_tab in test_deindent for accurate is_tab detection instead of source.contains('\t') - indent.rs:104-106: update doc comments to reflect tab/mixed support Co-authored-by: bashandbone <89049923+bashandbone@users.noreply.github.com> --- crates/ast-engine/src/replacer/indent.rs | 34 ++++++++++++++-------- crates/ast-engine/src/replacer/template.rs | 2 +- 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/crates/ast-engine/src/replacer/indent.rs b/crates/ast-engine/src/replacer/indent.rs index e53cb01..fce7f96 100644 --- a/crates/ast-engine/src/replacer/indent.rs +++ b/crates/ast-engine/src/replacer/indent.rs @@ -84,7 +84,8 @@ //! //! ## Limitations //! -//! - Only supports space-based indentation (tabs not fully supported) +//! - Handles both space-based and tab-based indentation; mixed indentation +//! (spaces and tabs on the same line) falls back to space-based re-indentation //! - Assumes well-formed input indentation //! - Performance overhead for large code blocks //! - Complex algorithm with edge cases @@ -120,13 +121,13 @@ pub enum DeindentedExtract<'a, C: Content> { /// Multi-line content with original indentation level recorded. /// - /// Contains the content bytes and the number of spaces that were used - /// for indentation in the original context. The first line's indentation - /// is not included in the content. + /// Contains the content bytes and the number of whitespace characters + /// (spaces or tabs) used for indentation in the original context. The first + /// line's indentation is not included in the content. /// /// # Fields /// - Content bytes with relative indentation preserved - /// - Original indentation level (number of spaces) + /// - Original indentation level (number of whitespace characters) MultiLine(&'a [C::Underlying], usize), } @@ -251,32 +252,40 @@ pub fn get_indent_at_offset(src: &[C::Underlying]) -> usize { get_indent_at_offset_with_tab::(src).0 } -/// returns (indent, `is_tab`) +/// Returns `(indent_count, is_tab)` for the current line's leading whitespace. +/// +/// `is_tab` is `true` only when the entire indentation prefix consists of tab +/// characters. For mixed indentation (e.g. `" \t"`) `is_tab` is `false` so that +/// re-indentation falls back to space-based expansion rather than silently +/// replacing the prefix with all tabs. pub fn get_indent_at_offset_with_tab(src: &[C::Underlying]) -> (usize, bool) { let lookahead = src.len().max(MAX_LOOK_AHEAD) - MAX_LOOK_AHEAD; let mut indent = 0; - let mut is_tab = false; + let mut has_tab = false; + let mut has_space = false; let new_line = get_new_line::(); let space = get_space::(); let tab = get_tab::(); for c in src[lookahead..].iter().rev() { if *c == new_line { - return (indent, is_tab); + return (indent, has_tab && !has_space); } if *c == space { indent += 1; + has_space = true; } else if *c == tab { indent += 1; - is_tab = true; + has_tab = true; } else { indent = 0; - is_tab = false; + has_tab = false; + has_space = false; } } // lookahead == 0 means we have indentation at first line. if lookahead == 0 && indent != 0 { - (indent, is_tab) + (indent, has_tab && !has_space) } else { (0, false) } @@ -328,7 +337,8 @@ mod test { .count(); let end = source.chars().count() - trailing_white; let extracted = extract_with_deindent(&source, start..end); - let result_bytes = indent_lines::(0, &extracted, source.contains('\t')); + let (_, is_tab) = get_indent_at_offset_with_tab::(&source.as_bytes()[..start]); + let result_bytes = indent_lines::(0, &extracted, is_tab); let actual = std::str::from_utf8(&result_bytes).unwrap(); assert_eq!(actual, expected); } diff --git a/crates/ast-engine/src/replacer/template.rs b/crates/ast-engine/src/replacer/template.rs index 72423c0..3aeb06d 100644 --- a/crates/ast-engine/src/replacer/template.rs +++ b/crates/ast-engine/src/replacer/template.rs @@ -116,7 +116,7 @@ fn replace_fixer(fixer: &TemplateFix, env: &MetaVarEnv<'_, D>) -> Underl ret.extend_from_slice(&D::Source::decode_str(frag)); } for ((var, indent, is_tab), frag) in vars.zip(frags) { - if let Some(bytes) = maybe_get_var(env, var, indent.to_owned(), is_tab.to_owned()) { + if let Some(bytes) = maybe_get_var(env, var, *indent, *is_tab) { ret.extend_from_slice(&bytes); } ret.extend_from_slice(&D::Source::decode_str(frag)); From 36da33f6d5546c72ca94f13d9a73ade0f3557e65 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 9 Mar 2026 23:47:06 +0000 Subject: [PATCH 3/3] fix(ast-engine): use byte indices in test_deindent helper Replace .chars().count() with str::trim_start/trim_end length arithmetic so start/end are byte offsets throughout, making the helper correct for non-ASCII / multi-byte UTF-8 input. Co-authored-by: bashandbone <89049923+bashandbone@users.noreply.github.com> --- crates/ast-engine/src/replacer/indent.rs | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/crates/ast-engine/src/replacer/indent.rs b/crates/ast-engine/src/replacer/indent.rs index fce7f96..242bb12 100644 --- a/crates/ast-engine/src/replacer/indent.rs +++ b/crates/ast-engine/src/replacer/indent.rs @@ -325,17 +325,12 @@ mod test { fn test_deindent(source: &str, expected: &str, offset: usize) { let source = source.to_string(); let expected = expected.trim(); - let start = source[offset..] - .chars() - .take_while(|n| n.is_whitespace()) - .count() - + offset; - let trailing_white = source - .chars() - .rev() - .take_while(|n| n.is_whitespace()) - .count(); - let end = source.chars().count() - trailing_white; + // Derive byte indices rather than character counts so that the slice + // operations (`extract_with_deindent`, `get_indent_at_offset_with_tab`) + // work correctly for non-ASCII / multi-byte UTF-8 input as well. + let leading_ws_bytes = source[offset..].len() - source[offset..].trim_start().len(); + let start = offset + leading_ws_bytes; + let end = source.trim_end().len(); let extracted = extract_with_deindent(&source, start..end); let (_, is_tab) = get_indent_at_offset_with_tab::(&source.as_bytes()[..start]); let result_bytes = indent_lines::(0, &extracted, is_tab);