Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions _typos.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ extend-ignore-identifiers-re = [
"RET",
"prev",
"normalises",
"inout",
"goes",
]

Expand Down
8 changes: 6 additions & 2 deletions crates/ast-engine/src/language.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,13 @@ pub trait Language: Clone + std::fmt::Debug + Send + Sync + 'static {
extract_meta_var(source, self.expando_char())
}
/// Return the file language from path. Return None if the file type is not supported.
/// Will panic with an unimplimented error if called and not implemented
fn from_path<P: AsRef<Path>>(_path: P) -> Option<Self> {
// TODO: throw panic here if not implemented properly?
None
unimplemented!(
"Language::from_path is not implemented for type `{}`. \
Override Language::from_path for this type if path-based detection is required.",
std::any::type_name::<Self>()
)
Comment on lines 70 to +77
Copy link

Copilot AI Mar 10, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The docstring says this default implementation returns None when the file type is not supported, but the default body now panics via unimplemented!(). Please update the documentation to reflect that the default panics unless overridden (or restore a non-panicking default if the docstring is intended to be accurate).

Copilot uses AI. Check for mistakes.
}

fn kind_to_id(&self, kind: &str) -> u16;
Expand Down
97 changes: 74 additions & 23 deletions crates/ast-engine/src/replacer/indent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,9 @@ fn get_new_line<C: Content>() -> C::Underlying {
fn get_space<C: Content>() -> C::Underlying {
C::decode_str(" ")[0].clone()
}
fn get_tab<C: Content>() -> C::Underlying {
C::decode_str("\t")[0].clone()
}

const MAX_LOOK_AHEAD: usize = 512;

Expand Down Expand Up @@ -183,21 +186,16 @@ pub fn formatted_slice<'a, C: Content>(
if !slice.contains(&get_new_line::<C>()) {
return Cow::Borrowed(slice);
}
let (indent, is_tab) = get_indent_at_offset_with_tab::<C>(content.get_range(0..start));
Cow::Owned(
indent_lines::<C>(
0,
&DeindentedExtract::MultiLine(
slice,
get_indent_at_offset::<C>(content.get_range(0..start)),
),
)
.into_owned(),
indent_lines::<C>(0, &DeindentedExtract::MultiLine(slice, indent), is_tab).into_owned(),
)
}

pub fn indent_lines<'a, C: Content>(
indent: usize,
extract: &'a DeindentedExtract<'a, C>,
is_tab: bool,
) -> Cow<'a, [C::Underlying]> {
use DeindentedExtract::{MultiLine, SingleLine};
let (lines, original_indent) = match extract {
Expand All @@ -213,18 +211,27 @@ pub fn indent_lines<'a, C: Content>(
Ordering::Less => Cow::Owned(indent_lines_impl::<C, _>(
indent - original_indent,
lines.split(|b| *b == get_new_line::<C>()),
is_tab,
)),
}
}

fn indent_lines_impl<'a, C, Lines>(indent: usize, mut lines: Lines) -> Vec<C::Underlying>
fn indent_lines_impl<'a, C, Lines>(
indent: usize,
mut lines: Lines,
is_tab: bool,
) -> Vec<C::Underlying>
where
C: Content + 'a,
Lines: Iterator<Item = &'a [C::Underlying]>,
{
let mut ret = vec![];
let space = get_space::<C>();
let leading: Vec<_> = std::iter::repeat_n(space, indent).collect();
let indent_char = if is_tab {
get_tab::<C>()
} else {
get_space::<C>()
};
let leading: Vec<_> = std::iter::repeat_n(indent_char, indent).collect();
// first line wasn't indented, so we don't add leading spaces
if let Some(line) = lines.next() {
ret.extend(line.iter().cloned());
Expand All @@ -241,43 +248,65 @@ where
/// returns 0 if no indent is found before the offset
/// either truly no indent exists, or the offset is in a long line
pub fn get_indent_at_offset<C: Content>(src: &[C::Underlying]) -> usize {
get_indent_at_offset_with_tab::<C>(src).0
}

/// returns (indent, `is_tab`)
pub fn get_indent_at_offset_with_tab<C: Content>(src: &[C::Underlying]) -> (usize, bool) {
let lookahead = src.len().max(MAX_LOOK_AHEAD) - MAX_LOOK_AHEAD;

let mut indent = 0;
let mut is_tab = false;
let new_line = get_new_line::<C>();
let space = get_space::<C>();
// TODO: support TAB. only whitespace is supported now
let tab = get_tab::<C>();
for c in src[lookahead..].iter().rev() {
if *c == new_line {
return indent;
return (indent, is_tab);
}
if *c == space {
indent += 1;
} else if *c == tab {
indent += 1;
is_tab = true;
} else {
indent = 0;
is_tab = false;
}
}
// lookahead == 0 means we have indentation at first line.
if lookahead == 0 && indent != 0 {
indent
(indent, is_tab)
} else {
0
(0, false)
}
}

// NOTE: we assume input is well indented.
// following lines should have fewer indentations than initial line
fn remove_indent<C: Content>(indent: usize, src: &[C::Underlying]) -> Vec<C::Underlying> {
let indentation: Vec<_> = std::iter::repeat_n(get_space::<C>(), indent).collect();
let new_line = get_new_line::<C>();
let space = get_space::<C>();
let tab = get_tab::<C>();
let lines: Vec<_> = src
.split(|b| *b == new_line)
.map(|line| match line.strip_prefix(&*indentation) {
Some(stripped) => stripped,
None => line,
.map(|line| {
let mut stripped = line;
let mut count = 0;
while count < indent {
if let Some(rest) = stripped.strip_prefix(std::slice::from_ref(&space)) {
stripped = rest;
} else if let Some(rest) = stripped.strip_prefix(std::slice::from_ref(&tab)) {
stripped = rest;
} else {
break;
}
count += 1;
}
stripped
})
.collect();
lines.join(&new_line).clone()
lines.join(&new_line)
}

#[cfg(test)]
Expand All @@ -299,7 +328,7 @@ mod test {
.count();
let end = source.chars().count() - trailing_white;
let extracted = extract_with_deindent(&source, start..end);
let result_bytes = indent_lines::<String>(0, &extracted);
let result_bytes = indent_lines::<String>(0, &extracted, source.contains('\t'));
let actual = std::str::from_utf8(&result_bytes).unwrap();
assert_eq!(actual, expected);
}
Expand Down Expand Up @@ -391,8 +420,8 @@ pass
fn test_replace_with_indent(target: &str, start: usize, inserted: &str) -> String {
let target = target.to_string();
let replace_lines = DeindentedExtract::MultiLine(inserted.as_bytes(), 0);
let indent = get_indent_at_offset::<String>(&target.as_bytes()[..start]);
let ret = indent_lines::<String>(indent, &replace_lines);
let (indent, is_tab) = get_indent_at_offset_with_tab::<String>(&target.as_bytes()[..start]);
let ret = indent_lines::<String>(indent, &replace_lines, is_tab);
String::from_utf8(ret.to_vec()).unwrap()
}

Expand Down Expand Up @@ -445,4 +474,26 @@ pass
let actual = test_replace_with_indent(target, 6, inserted);
assert_eq!(actual, "def abc():\n pass");
}

#[test]
fn test_tab_indent() {
let src = "\n\t\tdef test():\n\t\t\tpass";
let expected = "def test():\n\tpass";
test_deindent(src, expected, 0);
}

#[test]
fn test_tab_replace() {
let target = "\t\t";
let inserted = "def abc(): pass";
let actual = test_replace_with_indent(target, 2, inserted);
assert_eq!(actual, "def abc(): pass");
let inserted = "def abc():\n\tpass";
let actual = test_replace_with_indent(target, 2, inserted);
assert_eq!(actual, "def abc():\n\t\t\tpass");

let target = "\t\tdef abc():\n\t\t\t";
let actual = test_replace_with_indent(target, 14, inserted);
assert_eq!(actual, "def abc():\n\t\tpass");
}
}
23 changes: 13 additions & 10 deletions crates/ast-engine/src/replacer/template.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
//
// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT

use super::indent::{DeindentedExtract, extract_with_deindent, get_indent_at_offset, indent_lines};
use super::indent::{DeindentedExtract, extract_with_deindent, indent_lines};
use super::{MetaVarExtract, Replacer, split_first_meta_var};
use crate::NodeMatch;
use crate::language::Language;
Expand Down Expand Up @@ -52,10 +52,10 @@ impl TemplateFix {
impl<D: Doc> Replacer<D> for TemplateFix {
fn generate_replacement(&self, nm: &NodeMatch<'_, D>) -> Underlying<D> {
let leading = nm.get_doc().get_source().get_range(0..nm.range().start);
let indent = get_indent_at_offset::<D::Source>(leading);
let (indent, is_tab) = super::indent::get_indent_at_offset_with_tab::<D::Source>(leading);
let bytes = replace_fixer(self, nm.get_env());
let replaced = DeindentedExtract::MultiLine(&bytes, 0);
indent_lines::<D::Source>(indent, &replaced).to_vec()
indent_lines::<D::Source>(indent, &replaced, is_tab).to_vec()
}
}

Expand All @@ -64,7 +64,7 @@ type Indent = usize;
#[derive(Debug, Clone)]
pub struct Template {
fragments: Vec<String>,
vars: Vec<(MetaVarExtract, Indent)>,
vars: Vec<(MetaVarExtract, Indent, bool)>, // the third element is is_tab
}

fn create_template(
Expand All @@ -82,8 +82,10 @@ fn create_template(
{
fragments.push(tmpl[len..len + offset + i].to_string());
// NB we have to count ident of the full string
let indent = get_indent_at_offset::<String>(&tmpl.as_bytes()[..len + offset + i]);
vars.push((meta_var, indent));
let (indent, is_tab) = super::indent::get_indent_at_offset_with_tab::<String>(
&tmpl.as_bytes()[..len + offset + i],
);
vars.push((meta_var, indent, is_tab));
len += skipped + offset + i;
offset = 0;
continue;
Expand Down Expand Up @@ -113,8 +115,8 @@ fn replace_fixer<D: Doc>(fixer: &TemplateFix, env: &MetaVarEnv<'_, D>) -> Underl
if let Some(frag) = frags.next() {
ret.extend_from_slice(&D::Source::decode_str(frag));
}
for ((var, indent), frag) in vars.zip(frags) {
if let Some(bytes) = maybe_get_var(env, var, indent.to_owned()) {
for ((var, indent, is_tab), frag) in vars.zip(frags) {
if let Some(bytes) = maybe_get_var(env, var, indent.to_owned(), is_tab.to_owned()) {
ret.extend_from_slice(&bytes);
}
ret.extend_from_slice(&D::Source::decode_str(frag));
Expand All @@ -126,6 +128,7 @@ fn maybe_get_var<'e, 't, C, D>(
env: &'e MetaVarEnv<'t, D>,
var: &MetaVarExtract,
indent: usize,
is_tab: bool,
) -> Option<Cow<'e, [C::Underlying]>>
where
C: Content + 'e,
Expand All @@ -136,7 +139,7 @@ where
// transformed source does not have range, directly return bytes
let source = env.get_transformed(name)?;
let de_intended = DeindentedExtract::MultiLine(source, 0);
let bytes = indent_lines::<D::Source>(indent, &de_intended);
let bytes = indent_lines::<D::Source>(indent, &de_intended, is_tab);
return Some(Cow::Owned(bytes.into()));
}
MetaVarExtract::Single(name) => {
Expand All @@ -160,7 +163,7 @@ where
}
};
let extracted = extract_with_deindent(source, range);
let bytes = indent_lines::<D::Source>(indent, &extracted);
let bytes = indent_lines::<D::Source>(indent, &extracted, is_tab);
Some(Cow::Owned(bytes.into()))
}

Expand Down
22 changes: 22 additions & 0 deletions crates/rule-engine/src/transform/trans.rs
Original file line number Diff line number Diff line change
Expand Up @@ -552,4 +552,26 @@ if (true) {
}

// TODO: add a symbolic test for Rewrite
#[test]
fn test_rewrite() -> R {
let trans = parse(
r#"
rewrite:
source: "$A"
rewriters: ["re1", "re2"]
joinBy: ", "
"#,
)?;
let parsed = trans.parse(&TypeScript::Tsx).expect("should parse");
match &parsed {
Trans::Rewrite(r) => {
assert_eq!(r.rewriters, vec!["re1", "re2"]);
assert_eq!(r.join_by, Some(", ".to_string()));
}
_ => panic!("should be rewrite"),
}
assert_eq!(parsed.used_rewriters(), &["re1", "re2"]);
assert_eq!(parsed.used_vars(), "A");
Ok(())
}
}
2 changes: 1 addition & 1 deletion crates/services/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
//! async fn analyze_code(document: &ParsedDocument<String>) {
//! // Access underlying ast-grep functionality directly
//! let _root = document.ast_grep_root();
// Note: To use find_all, the document must be typed with actual AST types
//! // Note: To use find_all, the document must be typed with actual AST types
//! // let matches = _root.root().find_all("fn $NAME($$$PARAMS) { $$$BODY }");
//!
//! // Plus codebase-level metadata
Expand Down
8 changes: 4 additions & 4 deletions crates/services/src/traits/analyzer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ pub trait CodeAnalyzer<D: Doc + Send + Sync>: Send + Sync {
document: &ParsedDocument<D>,
pattern: &str,
context: &AnalysisContext,
) -> ServiceResult<Vec<CodeMatch<'_, String>>>;
) -> ServiceResult<Vec<CodeMatch<'_, D>>>;

/// Find matches for multiple patterns efficiently.
///
Expand All @@ -154,7 +154,7 @@ pub trait CodeAnalyzer<D: Doc + Send + Sync>: Send + Sync {
document: &ParsedDocument<D>,
patterns: &[&str],
context: &AnalysisContext,
) -> ServiceResult<Vec<CodeMatch<'_, String>>>;
) -> ServiceResult<Vec<CodeMatch<'_, D>>>;

/// Replace matches for a pattern with replacement content.
///
Expand Down Expand Up @@ -206,7 +206,7 @@ pub trait CodeAnalyzer<D: Doc + Send + Sync>: Send + Sync {
document: &ParsedDocument<D>,
node_kind: &str,
context: &AnalysisContext,
) -> Result<Vec<thread_services::types::CodeMatch<'_, String>>, Box<dyn std::error::Error + Send + Sync>> {
) -> ServiceResult<Vec<CodeMatch<'_, D>>> {
// Default: use pattern matching based on node kind
let pattern = match node_kind {
"function_declaration" => "fn $NAME($$$PARAMS) { $$$BODY }",
Expand Down Expand Up @@ -292,7 +292,7 @@ pub trait CodeAnalyzer<D: Doc + Send + Sync>: Send + Sync {
results.push(doc_results);
}

Ok(vec![])
Ok(results)
}

/// Extract symbols and metadata from documents.
Expand Down
6 changes: 3 additions & 3 deletions crates/services/src/traits/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ cfg_if::cfg_if!(
if #[cfg(feature = "ast-grep-backend")] {
use thread_ast_engine::source::Doc;
use thread_ast_engine::Language;
use thread_services::types::SupportLang;
use thread_language::SupportLang;
} else {
use crate::types::{Doc, SupportLang};
}
Expand Down Expand Up @@ -231,10 +231,10 @@ pub trait CodeParser<D: Doc + Send + Sync>: Send + Sync {
&self,
mut document: ParsedDocument<D>,
context: &AnalysisContext,
) -> Result<thread_services::types::ParsedDocument<String>, Box<dyn std::error::Error + Send + Sync>> {
) -> ServiceResult<ParsedDocument<D>> {
// Default: collect basic metadata
self.collect_basic_metadata(&mut document, context).await?;
Ok(todo!())
Ok(document)
}

/// Collect basic metadata for codebase-level analysis.
Expand Down
2 changes: 0 additions & 2 deletions resolve.sh

This file was deleted.

Loading