diff --git a/src/codeweaver/engine/chunker/delimiters/custom.py b/src/codeweaver/engine/chunker/delimiters/custom.py index 4884511e..423bd2d3 100644 --- a/src/codeweaver/engine/chunker/delimiters/custom.py +++ b/src/codeweaver/engine/chunker/delimiters/custom.py @@ -357,12 +357,14 @@ def generate_rst_character_ranges(character: str) -> list[str]: nestable=False, ) +HTML_BLOCK_TAGS = frozenset({"html", "body", "main", "section", "article"}) + HTML_TAGS_PATTERNS = [ DelimiterPattern( starts=[f"<{tag}"], ends=[f""], kind=DelimiterKind.BLOCK - if tag in ["html", "body", "main", "section", "article"] + if tag in HTML_BLOCK_TAGS else DelimiterKind.PARAGRAPH, inclusive=True, take_whole_lines=True,