diff --git a/crates/ironhtml-macro/src/lib.rs b/crates/ironhtml-macro/src/lib.rs index eb3adfc..29b1b27 100644 --- a/crates/ironhtml-macro/src/lib.rs +++ b/crates/ironhtml-macro/src/lib.rs @@ -313,7 +313,7 @@ struct ForLoop { impl Parse for ForLoop { fn parse(input: ParseStream) -> Result { - input.parse::()?; + let for_token: Token![for] = input.parse()?; let pat = syn::Pat::parse_single(input)?; input.parse::()?; input.parse::()?; @@ -329,6 +329,13 @@ impl Parse for ForLoop { children.push(content.parse()?); } + if children.len() != 1 || !matches!(children.first(), Some(Node::Element(_))) { + return Err(syn::Error::new( + for_token.span, + "for loop body must contain exactly one element", + )); + } + Ok(Self { pat, expr, @@ -464,8 +471,6 @@ fn to_pascal_case(s: &str) -> String { "Em" => "Em".to_string(), "Rp" => "Rp".to_string(), "Rt" => "Rt".to_string(), - "Rb" => "Rb".to_string(), - "Rtc" => "Rtc".to_string(), "Wbr" => "Wbr".to_string(), "Kbd" => "Kbd".to_string(), "Pre" => "Pre".to_string(), diff --git a/crates/ironhtml-parser/src/tree_builder.rs b/crates/ironhtml-parser/src/tree_builder.rs index 8f9d4b5..56b3bdd 100644 --- a/crates/ironhtml-parser/src/tree_builder.rs +++ b/crates/ironhtml-parser/src/tree_builder.rs @@ -18,12 +18,12 @@ use crate::tokenizer::Token; pub struct TreeBuilder { /// The document being built. document: Document, - /// Stack of open elements. + /// Stack of open element indices (used by `navigate_to_element`). open_elements: Vec, + /// Parallel stack of open element tag names (for `pop_until`). + open_element_names: Vec, /// Whether we're in fragment mode. fragment_mode: bool, - /// Fragment nodes (when in fragment mode). - fragment_nodes: Vec, /// Current insertion mode. insertion_mode: InsertionMode, /// Pending text to be inserted. @@ -43,6 +43,27 @@ enum InsertionMode { AfterAfterBody, } +/// Navigate from the document root to the element described by `path`. +/// +/// `path[0]` is the root sentinel (skipped); subsequent entries are +/// child indices at each nesting level. +fn navigate_to_element<'a>(root: &'a mut Element, path: &[usize]) -> &'a mut Element { + let mut current = root; + + for &idx in path.iter().skip(1) { + if idx < current.children.len() && matches!(current.children[idx], Node::Element(_)) { + current = match &mut current.children[idx] { + Node::Element(elem) => elem, + _ => unreachable!(), + }; + } else { + break; + } + } + + current +} + impl TreeBuilder { /// Create a new tree builder. #[must_use] @@ -50,18 +71,25 @@ impl TreeBuilder { Self { document: Document::new(), open_elements: Vec::new(), + open_element_names: Vec::new(), fragment_mode: false, - fragment_nodes: Vec::new(), insertion_mode: InsertionMode::Initial, pending_text: String::new(), } } /// Set fragment mode (for parsing HTML fragments). + /// + /// In fragment mode the tree builder skips implicit ``, + /// ``, and `` creation and inserts directly into + /// `document.root` which acts as a virtual container. pub fn set_fragment_mode(&mut self, fragment: bool) { self.fragment_mode = fragment; if fragment { self.insertion_mode = InsertionMode::InBody; + // Push a sentinel so navigate_to_element starts from root. + self.open_elements.push(0); + self.open_element_names.push(String::new()); } } @@ -231,38 +259,27 @@ impl TreeBuilder { } InsertionMode::InBody => { - if self.fragment_mode && self.open_elements.is_empty() { - // In fragment mode, just add to fragment nodes - let mut element = Element::new(&name_lower); - for (key, value) in attributes { - element.attributes.push(Attribute::new(key, value)); - } - self.fragment_nodes.push(Node::Element(element)); - self.open_elements.push(self.fragment_nodes.len() - 1); - } else { - // Check for void elements - let is_void = matches!( - name_lower.as_str(), - "area" - | "base" - | "br" - | "col" - | "embed" - | "hr" - | "img" - | "input" - | "link" - | "meta" - | "source" - | "track" - | "wbr" - ); - - self.insert_element(&name_lower, attributes); - - if is_void { - self.pop_element(); - } + let is_void = matches!( + name_lower.as_str(), + "area" + | "base" + | "br" + | "col" + | "embed" + | "hr" + | "img" + | "input" + | "link" + | "meta" + | "source" + | "track" + | "wbr" + ); + + self.insert_element(&name_lower, attributes); + + if is_void { + self.pop_element(); } } @@ -317,18 +334,7 @@ impl TreeBuilder { fn process_comment(&mut self, data: String) { let comment = Node::Comment(Comment::new(data)); - - if self.fragment_mode && self.open_elements.is_empty() { - self.fragment_nodes.push(comment); - } else if let Some(&idx) = self.open_elements.last() { - if self.fragment_mode { - if let Some(Node::Element(elem)) = self.fragment_nodes.get_mut(idx) { - elem.children.push(comment); - } - } else { - self.insert_into_current(comment); - } - } + self.insert_into_current(comment); } fn create_html_element(&mut self, attributes: Vec<(String, String)>) { @@ -340,6 +346,7 @@ impl TreeBuilder { .push(Attribute::new(key, value)); } self.open_elements.push(0); // html is always index 0 + self.open_element_names.push(String::from("html")); } fn insert_element(&mut self, tag_name: &str, attributes: Vec<(String, String)>) { @@ -348,84 +355,40 @@ impl TreeBuilder { element.attributes.push(Attribute::new(key, value)); } - if self.fragment_mode { - if self.open_elements.is_empty() { - let idx = self.fragment_nodes.len(); - self.fragment_nodes.push(Node::Element(element)); - self.open_elements.push(idx); - } else { - let parent_idx = *self.open_elements.last().unwrap(); - if let Some(Node::Element(parent)) = self.fragment_nodes.get_mut(parent_idx) { - let child_idx = parent.children.len(); - parent.children.push(Node::Element(element)); - // Track using a simple index scheme - self.open_elements.push(parent_idx * 1000 + child_idx); - } - } - } else { - let node = Node::Element(element); - let idx = self.insert_into_current(node); - self.open_elements.push(idx); - } + let node = Node::Element(element); + let idx = self.insert_into_current(node); + self.open_elements.push(idx); + self.open_element_names.push(String::from(tag_name)); } fn insert_into_current(&mut self, node: Node) -> usize { - // Get the indices for navigation - let path: Vec = self.open_elements.clone(); - - // Navigate to the correct parent element using indices - let parent = self.navigate_to_element(&path); + let parent = navigate_to_element(&mut self.document.root, &self.open_elements); let idx = parent.children.len(); parent.children.push(node); idx } - fn navigate_to_element(&mut self, path: &[usize]) -> &mut Element { - let indices: Vec = path.iter().skip(1).copied().collect(); - let mut current = &mut self.document.root; - - for idx in indices { - if idx < current.children.len() && matches!(current.children[idx], Node::Element(_)) { - current = match &mut current.children[idx] { - Node::Element(elem) => elem, - _ => unreachable!(), - }; - } else { - break; - } - } - - current - } - fn insert_text(&mut self, text: String) { let text_node = Node::Text(Text::new(text)); - - if self.fragment_mode { - if self.open_elements.is_empty() { - self.fragment_nodes.push(text_node); - } else { - let parent_idx = *self.open_elements.last().unwrap(); - if parent_idx < self.fragment_nodes.len() { - if let Some(Node::Element(parent)) = self.fragment_nodes.get_mut(parent_idx) { - parent.children.push(text_node); - } - } - } - } else { - self.insert_into_current(text_node); - } + self.insert_into_current(text_node); } fn pop_element(&mut self) { self.open_elements.pop(); + self.open_element_names.pop(); } - fn pop_until(&mut self, _tag_name: &str) { - // In a full implementation, we'd pop until we find an element matching tag_name. - // For simplicity, we just pop the current element once. - if self.open_elements.last().is_some() { + /// Pop elements from the stack until one matching `tag_name` is found + /// and popped. Never pops the root sentinel. + fn pop_until(&mut self, tag_name: &str) { + while self.open_element_names.len() > 1 { + if self.open_element_names.last().map(String::as_str) == Some(tag_name) { + self.open_elements.pop(); + self.open_element_names.pop(); + return; + } self.open_elements.pop(); + self.open_element_names.pop(); } } @@ -440,7 +403,7 @@ impl TreeBuilder { #[must_use] pub fn finish_fragment(mut self) -> Vec { self.flush_pending_text(); - self.fragment_nodes + self.document.root.children } } @@ -477,7 +440,8 @@ mod tests { #[test] fn test_simple_document() { let doc = parse( - "Test

Hello

", + "Test\ +

Hello

", ); assert!(doc.doctype.is_some()); assert_eq!(doc.doctype.as_ref().unwrap().name, "html"); @@ -518,4 +482,424 @@ mod tests { assert_eq!(div.get_attribute("id"), Some("main")); } } + + #[test] + fn test_pop_until_nested() { + let nodes = parse_fragment("
Hello World
"); + assert_eq!(nodes.len(), 1); + if let Some(Node::Element(div)) = nodes.first() { + assert_eq!(div.tag_name, "div"); + assert_eq!(div.children.len(), 2); + if let Some(Node::Element(span)) = div.children.first() { + assert_eq!(span.tag_name, "span"); + assert_eq!(span.text_content(), Some("Hello".into())); + } + } + } + + #[test] + fn test_deeply_nested_fragment() { + let nodes = parse_fragment("
  • Deep
"); + assert_eq!(nodes.len(), 1); + if let Some(Node::Element(div)) = nodes.first() { + let ul = div.find_element("ul").unwrap(); + let li = ul.find_element("li").unwrap(); + let span = li.find_element("span").unwrap(); + assert_eq!(span.text_content(), Some("Deep".into())); + } + } + + #[test] + fn test_fragment_void_elements() { + let nodes = parse_fragment("

After
"); + assert_eq!(nodes.len(), 1); + if let Some(Node::Element(div)) = nodes.first() { + // br is void, should not nest span inside it + assert_eq!(div.children.len(), 2); + if let Some(Node::Element(br)) = div.children.first() { + assert_eq!(br.tag_name, "br"); + assert!(br.children.is_empty()); + } + } + } + + #[test] + fn test_fragment_multiple_top_level() { + let nodes = parse_fragment("

One

Two

Three

"); + assert_eq!(nodes.len(), 3); + } + + #[test] + fn test_many_children_fragment() { + use core::fmt::Write; + let mut html = String::from("
"); + for i in 0..1100 { + let _ = write!(html, "{i}"); + } + html.push_str("
"); + let nodes = parse_fragment(&html); + assert_eq!(nodes.len(), 1); + if let Some(Node::Element(div)) = nodes.first() { + assert_eq!(div.children.len(), 1100); + } + } + + #[test] + fn test_unmatched_end_tag() { + // Unmatched should not crash or empty the stack + let nodes = parse_fragment("
Hello
"); + assert_eq!(nodes.len(), 1); + if let Some(Node::Element(div)) = nodes.first() { + assert_eq!(div.tag_name, "div"); + } + } + + // ── pop_until tests ────────────────────────────────────────────── + + #[test] + fn test_pop_until_skips_intermediate() { + // should pop both and , closing at
+ let nodes = parse_fragment("
Text
"); + assert_eq!(nodes.len(), 1); + let div = nodes[0].as_element().unwrap(); + assert_eq!(div.tag_name, "div"); + // span was opened, em was opened inside span, then
+ // pops em, span, div + let span = div.find_element("span").unwrap(); + let em = span.find_element("em").unwrap(); + assert_eq!(em.text_content(), Some("Text".into())); + } + + #[test] + fn test_pop_until_no_match_preserves_root() { + // pops elements but stops at root sentinel + let nodes = parse_fragment("

Hello

"); + assert_eq!(nodes.len(), 1); + let div = nodes[0].as_element().unwrap(); + assert_eq!(div.tag_name, "div"); + } + + #[test] + fn test_pop_until_closes_correct_level() { + // Nested
s: inner
should only close the inner one + let nodes = parse_fragment("
Inner
Outer
"); + assert_eq!(nodes.len(), 1); + let outer = nodes[0].as_element().unwrap(); + assert_eq!(outer.tag_name, "div"); + assert_eq!(outer.children.len(), 2); + // First child: inner div + let inner = outer.children[0].as_element().unwrap(); + assert_eq!(inner.tag_name, "div"); + assert_eq!( + inner.find_element("span").unwrap().text_content(), + Some("Inner".into()) + ); + // Second child: outer span (after inner div was closed) + let outer_span = outer.children[1].as_element().unwrap(); + assert_eq!(outer_span.tag_name, "span"); + assert_eq!(outer_span.text_content(), Some("Outer".into())); + } + + #[test] + fn test_pop_until_multiple_same_tag() { + // Three nested s, one closes only the innermost + let nodes = parse_fragment("
DeepMidTop
"); + assert_eq!(nodes.len(), 1); + let div = nodes[0].as_element().unwrap(); + let s1 = div.find_element("span").unwrap(); + let s2 = s1.find_element("span").unwrap(); + let s3 = s2.find_element("span").unwrap(); + assert_eq!(s3.text_content(), Some("Deep".into())); + // "Mid" is text after inner span closes, inside middle span + assert!(s2.children.len() >= 2); + // "Top" is text after middle span closes, inside outer span + assert!(s1.children.len() >= 2); + } + + // ── fragment nesting tests ─────────────────────────────────────── + + #[test] + fn test_fragment_five_levels_deep() { + let nodes = parse_fragment( + "

Title

\ +
", + ); + assert_eq!(nodes.len(), 1); + let div = nodes[0].as_element().unwrap(); + let section = div.find_element("section").unwrap(); + let article = section.find_element("article").unwrap(); + let header = article.find_element("header").unwrap(); + let h1 = header.find_element("h1").unwrap(); + assert_eq!(h1.text_content(), Some("Title".into())); + } + + #[test] + fn test_fragment_text_at_every_level() { + let nodes = parse_fragment("
ABCDE
"); + assert_eq!(nodes.len(), 1); + let div = nodes[0].as_element().unwrap(); + // div has: text("A"), span, text("E") + assert_eq!(div.children.len(), 3); + assert_eq!(div.children[0].as_text().unwrap().data, "A"); + let span = div.children[1].as_element().unwrap(); + // span has: text("B"), em, text("D") + assert_eq!(span.children.len(), 3); + assert_eq!(span.children[0].as_text().unwrap().data, "B"); + let em = span.children[1].as_element().unwrap(); + assert_eq!(em.text_content(), Some("C".into())); + assert_eq!(span.children[2].as_text().unwrap().data, "D"); + assert_eq!(div.children[2].as_text().unwrap().data, "E"); + } + + #[test] + fn test_fragment_siblings_with_children() { + let nodes = parse_fragment("
  • One!
  • Two
  • Three
"); + assert_eq!(nodes.len(), 1); + let ul = nodes[0].as_element().unwrap(); + assert_eq!(ul.children.len(), 3); + // First li has text + em + let li1 = ul.children[0].as_element().unwrap(); + assert_eq!(li1.children.len(), 2); + assert_eq!(li1.children[0].as_text().unwrap().data, "One"); + assert_eq!( + li1.children[1].as_element().unwrap().text_content(), + Some("!".into()) + ); + // Second and third are simple + let li2 = ul.children[1].as_element().unwrap(); + assert_eq!(li2.text_content(), Some("Two".into())); + let li3 = ul.children[2].as_element().unwrap(); + assert_eq!(li3.text_content(), Some("Three".into())); + } + + // ── fragment void element tests ────────────────────────────────── + + #[test] + fn test_fragment_multiple_void_elements() { + let nodes = parse_fragment("


"); + assert_eq!(nodes.len(), 1); + let div = nodes[0].as_element().unwrap(); + assert_eq!(div.children.len(), 4); + assert_eq!(div.children[0].as_element().unwrap().tag_name, "br"); + assert_eq!(div.children[1].as_element().unwrap().tag_name, "hr"); + assert_eq!(div.children[2].as_element().unwrap().tag_name, "img"); + assert_eq!(div.children[3].as_element().unwrap().tag_name, "input"); + // None should have children + for child in &div.children { + assert!(child.as_element().unwrap().children.is_empty()); + } + } + + #[test] + fn test_fragment_void_between_text() { + let nodes = parse_fragment("

Before
After

"); + assert_eq!(nodes.len(), 1); + let p = nodes[0].as_element().unwrap(); + assert_eq!(p.children.len(), 3); + assert_eq!(p.children[0].as_text().unwrap().data, "Before"); + assert_eq!(p.children[1].as_element().unwrap().tag_name, "br"); + assert_eq!(p.children[2].as_text().unwrap().data, "After"); + } + + #[test] + fn test_fragment_void_with_attributes() { + let nodes = parse_fragment(r#"
test
"#); + assert_eq!(nodes.len(), 1); + let div = nodes[0].as_element().unwrap(); + let img = div.children[0].as_element().unwrap(); + assert_eq!(img.get_attribute("src"), Some("a.png")); + assert_eq!(img.get_attribute("alt"), Some("test")); + assert!(img.children.is_empty()); + } + + #[test] + fn test_fragment_void_nested_inside() { + // Void element inside a nested structure + let nodes = parse_fragment("
"); + assert_eq!(nodes.len(), 1); + let table = nodes[0].as_element().unwrap(); + let tr = table.find_element("tr").unwrap(); + let td = tr.find_element("td").unwrap(); + let input = td.find_element("input").unwrap(); + assert!(input.children.is_empty()); + } + + // ── fragment comment tests ─────────────────────────────────────── + + #[test] + fn test_fragment_comment_top_level() { + let nodes = parse_fragment("
Hi
"); + assert_eq!(nodes.len(), 2); + assert!(matches!(nodes[0], Node::Comment(_))); + if let Node::Comment(c) = &nodes[0] { + assert_eq!(c.data, " top "); + } + assert_eq!(nodes[1].as_element().unwrap().tag_name, "div"); + } + + #[test] + fn test_fragment_comment_inside_element() { + let nodes = parse_fragment("
"); + assert_eq!(nodes.len(), 1); + let div = nodes[0].as_element().unwrap(); + assert_eq!(div.children.len(), 1); + assert!(matches!(div.children[0], Node::Comment(_))); + } + + #[test] + fn test_fragment_comment_between_elements() { + let nodes = parse_fragment("
  • A
  • B
"); + assert_eq!(nodes.len(), 1); + let ul = nodes[0].as_element().unwrap(); + assert_eq!(ul.children.len(), 3); + assert_eq!(ul.children[0].as_element().unwrap().tag_name, "li"); + assert!(matches!(ul.children[1], Node::Comment(_))); + assert_eq!(ul.children[2].as_element().unwrap().tag_name, "li"); + } + + // ── fragment top-level variety tests ───────────────────────────── + + #[test] + fn test_fragment_text_only() { + let nodes = parse_fragment("Just text"); + assert_eq!(nodes.len(), 1); + assert_eq!(nodes[0].as_text().unwrap().data, "Just text"); + } + + #[test] + fn test_fragment_mixed_top_level() { + let nodes = parse_fragment("Hello world and more!"); + // text, em, text, strong, text + assert_eq!(nodes.len(), 5); + assert_eq!(nodes[0].as_text().unwrap().data, "Hello "); + assert_eq!(nodes[1].as_element().unwrap().tag_name, "em"); + assert_eq!(nodes[2].as_text().unwrap().data, " and "); + assert_eq!(nodes[3].as_element().unwrap().tag_name, "strong"); + assert_eq!(nodes[4].as_text().unwrap().data, "!"); + } + + #[test] + fn test_fragment_empty() { + let nodes = parse_fragment(""); + assert!(nodes.is_empty()); + } + + #[test] + fn test_fragment_whitespace_only() { + // Whitespace in InBody mode is NOT skipped + let nodes = parse_fragment(" "); + assert_eq!(nodes.len(), 1); + assert_eq!(nodes[0].as_text().unwrap().data, " "); + } + + // ── malformed input tests ──────────────────────────────────────── + + #[test] + fn test_malformed_only_end_tags() { + let nodes = parse_fragment("

"); + // No start tags to match, nothing produced + assert!(nodes.is_empty()); + } + + #[test] + fn test_malformed_extra_end_tags() { + let nodes = parse_fragment("
Hello
"); + assert_eq!(nodes.len(), 1); + let div = nodes[0].as_element().unwrap(); + assert_eq!(div.text_content(), Some("Hello".into())); + } + + #[test] + fn test_malformed_unclosed_tags() { + // Tags that are never closed + let nodes = parse_fragment("
Text"); + assert_eq!(nodes.len(), 1); + let div = nodes[0].as_element().unwrap(); + let span = div.find_element("span").unwrap(); + let em = span.find_element("em").unwrap(); + assert_eq!(em.text_content(), Some("Text".into())); + } + + #[test] + fn test_malformed_interleaved_tags() { + // - interleaved close order + let nodes = parse_fragment("TextAfter"); + // After pops both i and b (pop_until finds b). + // "After" goes to root since both are closed. + // is unmatched, ignored. + assert!(!nodes.is_empty()); + let b = nodes[0].as_element().unwrap(); + assert_eq!(b.tag_name, "b"); + } + + #[test] + fn test_malformed_deeply_mismatched() { + let nodes = parse_fragment("Text"); + // pops e, d, c, b, a + assert_eq!(nodes.len(), 1); + let a = nodes[0].as_element().unwrap(); + assert_eq!(a.tag_name, "a"); + assert!(a.find_element("e").is_some()); + } + + // ── full document tests ────────────────────────────────────────── + + #[test] + fn test_document_head_elements() { + let doc = parse( + r#" + Test + + + "#, + ); + let head = doc.head().unwrap(); + assert!(head.find_element("title").is_some()); + assert!(head.find_element("meta").is_some()); + assert!(head.find_element("link").is_some()); + } + + #[test] + fn test_document_implicit_body() { + // No explicit tag, elements go into implicit body + let doc = parse("
Content
"); + let body = doc.body().unwrap(); + let div = body.find_element("div").unwrap(); + assert_eq!(div.text_content(), Some("Content".into())); + } + + #[test] + fn test_document_implicit_head_and_body() { + // No head or body, just content + let doc = parse("
Content
"); + assert_eq!(doc.root.tag_name, "html"); + assert!(doc.head().is_some()); + assert!(doc.body().is_some()); + let body = doc.body().unwrap(); + let div = body.find_element("div").unwrap(); + assert_eq!(div.text_content(), Some("Content".into())); + } + + #[test] + fn test_document_title() { + let doc = parse( + "Hello World\ + ", + ); + assert_eq!(doc.title(), Some(String::from("Hello World"))); + } + + #[test] + fn test_document_round_trip() { + let html = "Test\ +

Hello

"; + let doc = parse(html); + let output = doc.to_html(); + // Re-parse the output and verify structure + let doc2 = parse(&output); + assert_eq!(doc2.title(), Some(String::from("Test"))); + let body = doc2.body().unwrap(); + let p = body.find_element("p").unwrap(); + assert_eq!(p.text_content(), Some("Hello".into())); + } }