diff --git a/_typos.toml b/_typos.toml index 45b430f..62962ab 100755 --- a/_typos.toml +++ b/_typos.toml @@ -29,6 +29,7 @@ extend-ignore-identifiers-re = [ "RET", "prev", "normalises", + "inout", "goes", ] diff --git a/crates/ast-engine/src/language.rs b/crates/ast-engine/src/language.rs index 97a1ae4..ec2d489 100644 --- a/crates/ast-engine/src/language.rs +++ b/crates/ast-engine/src/language.rs @@ -68,9 +68,13 @@ pub trait Language: Clone + std::fmt::Debug + Send + Sync + 'static { extract_meta_var(source, self.expando_char()) } /// Return the file language from path. Return None if the file type is not supported. + /// Will panic with an unimplimented error if called and not implemented fn from_path>(_path: P) -> Option { - // TODO: throw panic here if not implemented properly? - None + unimplemented!( + "Language::from_path is not implemented for type `{}`. \ + Override Language::from_path for this type if path-based detection is required.", + std::any::type_name::() + ) } fn kind_to_id(&self, kind: &str) -> u16; diff --git a/crates/ast-engine/src/replacer/indent.rs b/crates/ast-engine/src/replacer/indent.rs index 59262cd..e754a55 100644 --- a/crates/ast-engine/src/replacer/indent.rs +++ b/crates/ast-engine/src/replacer/indent.rs @@ -101,6 +101,9 @@ fn get_new_line() -> C::Underlying { fn get_space() -> C::Underlying { C::decode_str(" ")[0].clone() } +fn get_tab() -> C::Underlying { + C::decode_str("\t")[0].clone() +} const MAX_LOOK_AHEAD: usize = 512; @@ -183,21 +186,16 @@ pub fn formatted_slice<'a, C: Content>( if !slice.contains(&get_new_line::()) { return Cow::Borrowed(slice); } + let (indent, is_tab) = get_indent_at_offset_with_tab::(content.get_range(0..start)); Cow::Owned( - indent_lines::( - 0, - &DeindentedExtract::MultiLine( - slice, - get_indent_at_offset::(content.get_range(0..start)), - ), - ) - .into_owned(), + indent_lines::(0, &DeindentedExtract::MultiLine(slice, indent), is_tab).into_owned(), ) } pub fn indent_lines<'a, C: Content>( indent: usize, extract: &'a DeindentedExtract<'a, C>, + is_tab: bool, ) -> Cow<'a, [C::Underlying]> { use DeindentedExtract::{MultiLine, SingleLine}; let (lines, original_indent) = match extract { @@ -213,18 +211,27 @@ pub fn indent_lines<'a, C: Content>( Ordering::Less => Cow::Owned(indent_lines_impl::( indent - original_indent, lines.split(|b| *b == get_new_line::()), + is_tab, )), } } -fn indent_lines_impl<'a, C, Lines>(indent: usize, mut lines: Lines) -> Vec +fn indent_lines_impl<'a, C, Lines>( + indent: usize, + mut lines: Lines, + is_tab: bool, +) -> Vec where C: Content + 'a, Lines: Iterator, { let mut ret = vec![]; - let space = get_space::(); - let leading: Vec<_> = std::iter::repeat_n(space, indent).collect(); + let indent_char = if is_tab { + get_tab::() + } else { + get_space::() + }; + let leading: Vec<_> = std::iter::repeat_n(indent_char, indent).collect(); // first line wasn't indented, so we don't add leading spaces if let Some(line) = lines.next() { ret.extend(line.iter().cloned()); @@ -241,43 +248,65 @@ where /// returns 0 if no indent is found before the offset /// either truly no indent exists, or the offset is in a long line pub fn get_indent_at_offset(src: &[C::Underlying]) -> usize { + get_indent_at_offset_with_tab::(src).0 +} + +/// returns (indent, `is_tab`) +pub fn get_indent_at_offset_with_tab(src: &[C::Underlying]) -> (usize, bool) { let lookahead = src.len().max(MAX_LOOK_AHEAD) - MAX_LOOK_AHEAD; let mut indent = 0; + let mut is_tab = false; let new_line = get_new_line::(); let space = get_space::(); - // TODO: support TAB. only whitespace is supported now + let tab = get_tab::(); for c in src[lookahead..].iter().rev() { if *c == new_line { - return indent; + return (indent, is_tab); } if *c == space { indent += 1; + } else if *c == tab { + indent += 1; + is_tab = true; } else { indent = 0; + is_tab = false; } } // lookahead == 0 means we have indentation at first line. if lookahead == 0 && indent != 0 { - indent + (indent, is_tab) } else { - 0 + (0, false) } } // NOTE: we assume input is well indented. // following lines should have fewer indentations than initial line fn remove_indent(indent: usize, src: &[C::Underlying]) -> Vec { - let indentation: Vec<_> = std::iter::repeat_n(get_space::(), indent).collect(); let new_line = get_new_line::(); + let space = get_space::(); + let tab = get_tab::(); let lines: Vec<_> = src .split(|b| *b == new_line) - .map(|line| match line.strip_prefix(&*indentation) { - Some(stripped) => stripped, - None => line, + .map(|line| { + let mut stripped = line; + let mut count = 0; + while count < indent { + if let Some(rest) = stripped.strip_prefix(std::slice::from_ref(&space)) { + stripped = rest; + } else if let Some(rest) = stripped.strip_prefix(std::slice::from_ref(&tab)) { + stripped = rest; + } else { + break; + } + count += 1; + } + stripped }) .collect(); - lines.join(&new_line).clone() + lines.join(&new_line) } #[cfg(test)] @@ -299,7 +328,7 @@ mod test { .count(); let end = source.chars().count() - trailing_white; let extracted = extract_with_deindent(&source, start..end); - let result_bytes = indent_lines::(0, &extracted); + let result_bytes = indent_lines::(0, &extracted, source.contains('\t')); let actual = std::str::from_utf8(&result_bytes).unwrap(); assert_eq!(actual, expected); } @@ -391,8 +420,8 @@ pass fn test_replace_with_indent(target: &str, start: usize, inserted: &str) -> String { let target = target.to_string(); let replace_lines = DeindentedExtract::MultiLine(inserted.as_bytes(), 0); - let indent = get_indent_at_offset::(&target.as_bytes()[..start]); - let ret = indent_lines::(indent, &replace_lines); + let (indent, is_tab) = get_indent_at_offset_with_tab::(&target.as_bytes()[..start]); + let ret = indent_lines::(indent, &replace_lines, is_tab); String::from_utf8(ret.to_vec()).unwrap() } @@ -445,4 +474,26 @@ pass let actual = test_replace_with_indent(target, 6, inserted); assert_eq!(actual, "def abc():\n pass"); } + + #[test] + fn test_tab_indent() { + let src = "\n\t\tdef test():\n\t\t\tpass"; + let expected = "def test():\n\tpass"; + test_deindent(src, expected, 0); + } + + #[test] + fn test_tab_replace() { + let target = "\t\t"; + let inserted = "def abc(): pass"; + let actual = test_replace_with_indent(target, 2, inserted); + assert_eq!(actual, "def abc(): pass"); + let inserted = "def abc():\n\tpass"; + let actual = test_replace_with_indent(target, 2, inserted); + assert_eq!(actual, "def abc():\n\t\t\tpass"); + + let target = "\t\tdef abc():\n\t\t\t"; + let actual = test_replace_with_indent(target, 14, inserted); + assert_eq!(actual, "def abc():\n\t\tpass"); + } } diff --git a/crates/ast-engine/src/replacer/template.rs b/crates/ast-engine/src/replacer/template.rs index e95d843..72423c0 100644 --- a/crates/ast-engine/src/replacer/template.rs +++ b/crates/ast-engine/src/replacer/template.rs @@ -4,7 +4,7 @@ // // SPDX-License-Identifier: AGPL-3.0-or-later AND MIT -use super::indent::{DeindentedExtract, extract_with_deindent, get_indent_at_offset, indent_lines}; +use super::indent::{DeindentedExtract, extract_with_deindent, indent_lines}; use super::{MetaVarExtract, Replacer, split_first_meta_var}; use crate::NodeMatch; use crate::language::Language; @@ -52,10 +52,10 @@ impl TemplateFix { impl Replacer for TemplateFix { fn generate_replacement(&self, nm: &NodeMatch<'_, D>) -> Underlying { let leading = nm.get_doc().get_source().get_range(0..nm.range().start); - let indent = get_indent_at_offset::(leading); + let (indent, is_tab) = super::indent::get_indent_at_offset_with_tab::(leading); let bytes = replace_fixer(self, nm.get_env()); let replaced = DeindentedExtract::MultiLine(&bytes, 0); - indent_lines::(indent, &replaced).to_vec() + indent_lines::(indent, &replaced, is_tab).to_vec() } } @@ -64,7 +64,7 @@ type Indent = usize; #[derive(Debug, Clone)] pub struct Template { fragments: Vec, - vars: Vec<(MetaVarExtract, Indent)>, + vars: Vec<(MetaVarExtract, Indent, bool)>, // the third element is is_tab } fn create_template( @@ -82,8 +82,10 @@ fn create_template( { fragments.push(tmpl[len..len + offset + i].to_string()); // NB we have to count ident of the full string - let indent = get_indent_at_offset::(&tmpl.as_bytes()[..len + offset + i]); - vars.push((meta_var, indent)); + let (indent, is_tab) = super::indent::get_indent_at_offset_with_tab::( + &tmpl.as_bytes()[..len + offset + i], + ); + vars.push((meta_var, indent, is_tab)); len += skipped + offset + i; offset = 0; continue; @@ -113,8 +115,8 @@ fn replace_fixer(fixer: &TemplateFix, env: &MetaVarEnv<'_, D>) -> Underl if let Some(frag) = frags.next() { ret.extend_from_slice(&D::Source::decode_str(frag)); } - for ((var, indent), frag) in vars.zip(frags) { - if let Some(bytes) = maybe_get_var(env, var, indent.to_owned()) { + for ((var, indent, is_tab), frag) in vars.zip(frags) { + if let Some(bytes) = maybe_get_var(env, var, indent.to_owned(), is_tab.to_owned()) { ret.extend_from_slice(&bytes); } ret.extend_from_slice(&D::Source::decode_str(frag)); @@ -126,6 +128,7 @@ fn maybe_get_var<'e, 't, C, D>( env: &'e MetaVarEnv<'t, D>, var: &MetaVarExtract, indent: usize, + is_tab: bool, ) -> Option> where C: Content + 'e, @@ -136,7 +139,7 @@ where // transformed source does not have range, directly return bytes let source = env.get_transformed(name)?; let de_intended = DeindentedExtract::MultiLine(source, 0); - let bytes = indent_lines::(indent, &de_intended); + let bytes = indent_lines::(indent, &de_intended, is_tab); return Some(Cow::Owned(bytes.into())); } MetaVarExtract::Single(name) => { @@ -160,7 +163,7 @@ where } }; let extracted = extract_with_deindent(source, range); - let bytes = indent_lines::(indent, &extracted); + let bytes = indent_lines::(indent, &extracted, is_tab); Some(Cow::Owned(bytes.into())) } diff --git a/crates/rule-engine/src/transform/trans.rs b/crates/rule-engine/src/transform/trans.rs index 0a80a89..dbc592a 100644 --- a/crates/rule-engine/src/transform/trans.rs +++ b/crates/rule-engine/src/transform/trans.rs @@ -552,4 +552,26 @@ if (true) { } // TODO: add a symbolic test for Rewrite + #[test] + fn test_rewrite() -> R { + let trans = parse( + r#" + rewrite: + source: "$A" + rewriters: ["re1", "re2"] + joinBy: ", " + "#, + )?; + let parsed = trans.parse(&TypeScript::Tsx).expect("should parse"); + match &parsed { + Trans::Rewrite(r) => { + assert_eq!(r.rewriters, vec!["re1", "re2"]); + assert_eq!(r.join_by, Some(", ".to_string())); + } + _ => panic!("should be rewrite"), + } + assert_eq!(parsed.used_rewriters(), &["re1", "re2"]); + assert_eq!(parsed.used_vars(), "A"); + Ok(()) + } } diff --git a/crates/services/src/lib.rs b/crates/services/src/lib.rs index 63e5481..1d2556d 100644 --- a/crates/services/src/lib.rs +++ b/crates/services/src/lib.rs @@ -39,7 +39,7 @@ //! async fn analyze_code(document: &ParsedDocument) { //! // Access underlying ast-grep functionality directly //! let _root = document.ast_grep_root(); -// Note: To use find_all, the document must be typed with actual AST types +//! // Note: To use find_all, the document must be typed with actual AST types //! // let matches = _root.root().find_all("fn $NAME($$$PARAMS) { $$$BODY }"); //! //! // Plus codebase-level metadata diff --git a/crates/services/src/traits/analyzer.rs b/crates/services/src/traits/analyzer.rs index d6c157a..926f146 100644 --- a/crates/services/src/traits/analyzer.rs +++ b/crates/services/src/traits/analyzer.rs @@ -135,7 +135,7 @@ pub trait CodeAnalyzer: Send + Sync { document: &ParsedDocument, pattern: &str, context: &AnalysisContext, - ) -> ServiceResult>>; + ) -> ServiceResult>>; /// Find matches for multiple patterns efficiently. /// @@ -154,7 +154,7 @@ pub trait CodeAnalyzer: Send + Sync { document: &ParsedDocument, patterns: &[&str], context: &AnalysisContext, - ) -> ServiceResult>>; + ) -> ServiceResult>>; /// Replace matches for a pattern with replacement content. /// @@ -206,7 +206,7 @@ pub trait CodeAnalyzer: Send + Sync { document: &ParsedDocument, node_kind: &str, context: &AnalysisContext, - ) -> Result>, Box> { + ) -> ServiceResult>> { // Default: use pattern matching based on node kind let pattern = match node_kind { "function_declaration" => "fn $NAME($$$PARAMS) { $$$BODY }", @@ -292,7 +292,7 @@ pub trait CodeAnalyzer: Send + Sync { results.push(doc_results); } - Ok(vec![]) + Ok(results) } /// Extract symbols and metadata from documents. diff --git a/crates/services/src/traits/parser.rs b/crates/services/src/traits/parser.rs index 78805a0..5b6a92f 100644 --- a/crates/services/src/traits/parser.rs +++ b/crates/services/src/traits/parser.rs @@ -18,7 +18,7 @@ cfg_if::cfg_if!( if #[cfg(feature = "ast-grep-backend")] { use thread_ast_engine::source::Doc; use thread_ast_engine::Language; - use thread_services::types::SupportLang; + use thread_language::SupportLang; } else { use crate::types::{Doc, SupportLang}; } @@ -231,10 +231,10 @@ pub trait CodeParser: Send + Sync { &self, mut document: ParsedDocument, context: &AnalysisContext, - ) -> Result, Box> { + ) -> ServiceResult> { // Default: collect basic metadata self.collect_basic_metadata(&mut document, context).await?; - Ok(todo!()) + Ok(document) } /// Collect basic metadata for codebase-level analysis. diff --git a/resolve.sh b/resolve.sh deleted file mode 100644 index 3497f07..0000000 --- a/resolve.sh +++ /dev/null @@ -1,2 +0,0 @@ -sed -i '4,8d' crates/services/src/lib.rs -sed -i '3a #![allow(unexpected_cfgs)]' crates/services/src/lib.rs