diff --git a/crawl4ai/content_filter_strategy.py b/crawl4ai/content_filter_strategy.py index 0909be33d..5e954c31b 100644 --- a/crawl4ai/content_filter_strategy.py +++ b/crawl4ai/content_filter_strategy.py @@ -739,7 +739,7 @@ def _compute_composite_score(self, metrics, text_len, tag_len, link_text_len): if self.min_word_threshold: # Get raw text from metrics node - avoid extra processing text = metrics["node"].get_text(strip=True) - word_count = text.count(" ") + 1 + word_count = len(text.split()) if word_count < self.min_word_threshold: return -1.0 # Guaranteed removal score = 0.0