From a564e16518081cb0ff4d6f0e57096da154a686f9 Mon Sep 17 00:00:00 2001
From: Yanggq <1041206149@qq.com>
Date: Wed, 22 Oct 2025 16:06:48 +0800
Subject: [PATCH 1/3] =?UTF-8?q?=E7=AE=80=E5=8C=96math=E6=8A=BD=E5=8F=96?=
 =?UTF-8?q?=E9=80=BB=E8=BE=91?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../html/recognizer/cc_math/common.py         | 82 ++++++-------------
 .../recognizer/cc_math/tag_common_modify.py   | 15 +---
 .../extractor/html/recognizer/ccmath.py       | 10 +--
 ...e_1_span-math-container_latex_mathjax.html |  2 +-
 ...math-container_latex_mathjax_inline_1.html |  3 +-
 .../extractor/html/recognizer/test_math.py    | 39 +++++++--
 .../input/assets/content_json.json            |  2 +-
 7 files changed, 67 insertions(+), 86 deletions(-)

diff --git a/llm_web_kit/extractor/html/recognizer/cc_math/common.py b/llm_web_kit/extractor/html/recognizer/cc_math/common.py
index d9178840..9dea5eba 100644
--- a/llm_web_kit/extractor/html/recognizer/cc_math/common.py
+++ b/llm_web_kit/extractor/html/recognizer/cc_math/common.py
@@ -20,7 +20,6 @@
 from llm_web_kit.extractor.html.recognizer.recognizer import CCTag
 from llm_web_kit.libs.doc_element_type import DocElementType
 from llm_web_kit.libs.html_utils import (build_cc_element, element_to_html,
-                                         element_to_html_unescaped,
                                          html_to_element)
 from llm_web_kit.libs.text_utils import normalize_ctl_text
 
@@ -538,62 +537,35 @@ def fix_mathml_superscript(self, mathml_str):
             parent.remove(msup)
         return etree.tostring(root, encoding='unicode', pretty_print=True)
 
-    def replace_math(self, new_tag: str, math_type: str, math_render: str, node: HtmlElement, func, asciimath_wrap: bool = False) -> HtmlElement:
-        # pattern re数学公式匹配 func 公式预处理 默认不处理
-        # ascii公式处理逻辑转移到mathjax渲染器方案中
-        if asciimath_wrap:
-            return node
+    def replace_math(self, new_tag: str, math_type: str, math_render: str, node: HtmlElement, func) -> HtmlElement:
+        """替换数学公式节点.
+
+        Args:
+            new_tag: 新标签名称(CCMATH_INLINE 或 CCMATH_INTERLINE)
+            math_type: 数学公式类型(MathType.LATEX 等)
+            math_render: 渲染器类型
+            node: 当前HTML节点
+            func: 公式预处理函数(可选)
 
-        pattern_type = MATH_TYPE_PATTERN.DISPLAYMATH if new_tag == CCMATH_INTERLINE else MATH_TYPE_PATTERN.INLINEMATH
-        original_text = node.text or ''
-
-        def is_ccmath_wrapped(match_text, original_text: str) -> bool:
-            if not match_text or not original_text:
-                return False
-            start_idx = match_text.start()
-            end_idx = match_text.end()
-            before_match = original_text[:start_idx].strip()
-            after_match = original_text[end_idx:].strip()
-            if 'ccmath' in before_match and 'ccmath' in after_match:
-                return True
-            if pattern_type == MATH_TYPE_PATTERN.DISPLAYMATH:
-                for start, end in MATH_TYPE_TO_DISPLAY[MathType.LATEX][MATH_TYPE_PATTERN.INLINEMATH]:
-                    if start in before_match and end in after_match:
-                        return True
-            return False
-
-        def process(match_text):
-            try:
-                match = match_text.group(0)
-                if is_ccmath_wrapped(match_text, original_text):
-                    return match
-                wrapped_text = func(match) if func else match
-                # html保留原始的，而不是传入修改过的wrapped_text
-                original_wrapped = wrapped_text
-                wrapped_text = self.wrap_math_md(wrapped_text)
-                if not wrapped_text:
-                    return match
-                new_span = build_cc_element(
-                    html_tag_name=new_tag,
-                    text=wrapped_text,
-                    tail='',
-                    type=math_type,
-                    by=math_render,
-                    html=original_wrapped
-                )
-            except Exception:
-                return match
-            return element_to_html(new_span)
-        try:
-            for start, end in MATH_TYPE_TO_DISPLAY[math_type][pattern_type]:
-                pattern = f'{re.escape(start)}.*?{re.escape(end)}'.replace(r'\.\*\?', '.*?')
-                regex = re.compile(pattern, re.DOTALL)
-                original_text = re.sub(regex, process, original_text)
-        except Exception:
-            node.text = self.build_cc_exception_tag(original_text, math_type, math_render)
+        Returns:
+            处理后的节点
+        """
+        text = node.text
+        if not text or not text_strip(text):
             return node
-        node.text = original_text
-        return html_to_element(element_to_html_unescaped(node))
+
+        formula = self.wrap_math_md(text)
+        # 构建新节点
+        new_span = build_cc_element(
+            html_tag_name=new_tag,
+            text=formula,
+            tail=text_strip(node.tail),
+            type=math_type,
+            by=math_render,
+            html=element_to_html(node)
+        )
+
+        return new_span
 
     def build_cc_exception_tag(self, text, math_type, math_render) -> str:
         return element_to_html(build_cc_element(
diff --git a/llm_web_kit/extractor/html/recognizer/cc_math/tag_common_modify.py b/llm_web_kit/extractor/html/recognizer/cc_math/tag_common_modify.py
index 260d4b80..6801cf43 100644
--- a/llm_web_kit/extractor/html/recognizer/cc_math/tag_common_modify.py
+++ b/llm_web_kit/extractor/html/recognizer/cc_math/tag_common_modify.py
@@ -2,12 +2,11 @@
 
 from llm_web_kit.exception.exception import HtmlMathRecognizerException
 from llm_web_kit.extractor.html.recognizer.cc_math.common import (CCMATH,
-                                                                  MathType,
                                                                   text_strip)
 from llm_web_kit.libs.html_utils import replace_element
 
 
-def modify_tree(cm: CCMATH, math_render: str, o_html: str, node: HtmlElement, parent: HtmlElement):
+def modify_tree(cm: CCMATH, math_render: str, o_html: str, node: HtmlElement):
     try:
         text = node.text
         tag_math_type_list = cm.get_equation_type(o_html)
@@ -18,17 +17,9 @@ def modify_tree(cm: CCMATH, math_render: str, o_html: str, node: HtmlElement, pa
             tail = node.tail
             new_span.tail = None
             for new_tag, math_type in tag_math_type_list:
-                asciimath_wrap = True if math_type == MathType.ASCIIMATH else False
-                new_span = cm.replace_math(new_tag, math_type, math_render, new_span, None,asciimath_wrap)
+                new_span = cm.replace_math(new_tag, math_type, math_render, new_span, None)
             new_span.tail = tail
             replace_element(node,new_span)
-            # if math_type == MathType.ASCIIMATH:
-            #     text = cm.wrap_math_md(text)
-            #     text = cm.extract_asciimath(text)
-            #     new_span = build_cc_element(html_tag_name=new_tag, text=cm.wrap_math_md(text), tail=text_strip(node.tail), type=math_type, by=math_render, html=o_html)
-            #     replace_element(node, new_span)
-            # elif math_type == MathType.LATEX:
-            #     new_span = build_cc_element(html_tag_name=new_tag, text=cm.wrap_math_md(text), tail=text_strip(node.tail), type=math_type, by=math_render, html=o_html)
-            #     replace_element(node, new_span)
+
     except Exception as e:
         raise HtmlMathRecognizerException(f'Error processing script mathtex: {e}')
diff --git a/llm_web_kit/extractor/html/recognizer/ccmath.py b/llm_web_kit/extractor/html/recognizer/ccmath.py
index 780ba583..1a2d0958 100644
--- a/llm_web_kit/extractor/html/recognizer/ccmath.py
+++ b/llm_web_kit/extractor/html/recognizer/ccmath.py
@@ -140,14 +140,8 @@ def process_ccmath_html(self, cc_html: str, o_html: str, math_render: BaseMathRe
                     tag_script.process_zhihu_custom_tag(self.cm, math_render_type, node)
 
                 # tag = span， class 为 math-containerm， 或者 mathjax 或者 wp-katex-eq
-                if node.tag == 'span' and node.get('class') and (
-                        'math-container' in node.get('class') or
-                        'mathjax' in node.get('class') or
-                        'wp-katex-eq' in node.get('class') or
-                        'x-ck12-mathEditor' in node.get('class') or
-                        'tex' in node.get('class')
-                ):
-                    tag_common_modify.modify_tree(self.cm, math_render_type, original_html, node, parent)
+                if node.tag == 'span' and node.get('class') and 'math-container' in node.get('class'):
+                    tag_common_modify.modify_tree(self.cm, math_render_type, original_html, node)
 
                 # math tags
                 if node.tag == 'math' or node.tag.endswith(':math'):
diff --git a/tests/llm_web_kit/extractor/html/recognizer/assets/ccmath/stackexchange_1_span-math-container_latex_mathjax.html b/tests/llm_web_kit/extractor/html/recognizer/assets/ccmath/stackexchange_1_span-math-container_latex_mathjax.html
index c509d756..ea085333 100644
--- a/tests/llm_web_kit/extractor/html/recognizer/assets/ccmath/stackexchange_1_span-math-container_latex_mathjax.html
+++ b/tests/llm_web_kit/extractor/html/recognizer/assets/ccmath/stackexchange_1_span-math-container_latex_mathjax.html
@@ -1406,7 +1406,7 @@ <h2 class="mb0" data-answercount="2">
 <div class="answercell post-layout--right">
     <span class="d-none">$\begingroup$</span>
     <div class="s-prose js-post-body" itemprop="text">
-<p>When gravitational waves reach Earth, <a href="https://en.wikipedia.org/wiki/Gravitational_wave#Difficulties" rel="nofollow noreferrer">they usually give a strain</a> of <span class="math-container">$\delta L \over L$$=10^{-21}$</span>.</p>
+<p>When gravitational waves reach Earth, <a href="https://en.wikipedia.org/wiki/Gravitational_wave#Difficulties" rel="nofollow noreferrer">they usually give a strain</a> of <span class="math-container">$\delta L \over L = 10^{-21}$</span>.</p>
 <p>If we assume that they scale with the distance the same way electromagnetic waves do, thus following the inverse square law, we can get an estimate of the distance needed.</p>
 <p>LIGO detected the <a href="https://en.wikipedia.org/wiki/First_observation_of_gravitational_waves" rel="nofollow noreferrer">first merger</a> of black holes at 1.3 billion light years away.</p>
 <p>If we would get to 1 light year away from the merger, under the above hypothesis we would get a strain of <span class="math-container">$10^{-21} \times (1.3 \cdot 10^9)^2=10^{-3}$</span>. This means that on 1 meter length we would notice a 1 mm oscillation, which is something we are able to sense.</p>
diff --git a/tests/llm_web_kit/extractor/html/recognizer/assets/ccmath/stackexchange_1_span-math-container_latex_mathjax_inline_1.html b/tests/llm_web_kit/extractor/html/recognizer/assets/ccmath/stackexchange_1_span-math-container_latex_mathjax_inline_1.html
index 771c4c49..b8fc706a 100644
--- a/tests/llm_web_kit/extractor/html/recognizer/assets/ccmath/stackexchange_1_span-math-container_latex_mathjax_inline_1.html
+++ b/tests/llm_web_kit/extractor/html/recognizer/assets/ccmath/stackexchange_1_span-math-container_latex_mathjax_inline_1.html
@@ -7,8 +7,7 @@
 M_{\odot}
 M_{\odot}
 M_{\odot}
-\delta L \over L
-=10^{-21}
+\delta L \over L = 10^{-21}
 10^{-21} \times (1.3 \cdot 10^9)^2=10^{-3}
 1/r
 10^{-21} \times (1.3 \cdot 10^9)=10^{-9}
diff --git a/tests/llm_web_kit/extractor/html/recognizer/test_math.py b/tests/llm_web_kit/extractor/html/recognizer/test_math.py
index 79d010aa..679690a0 100644
--- a/tests/llm_web_kit/extractor/html/recognizer/test_math.py
+++ b/tests/llm_web_kit/extractor/html/recognizer/test_math.py
@@ -511,6 +511,7 @@ def test_math_recognizer_html(self):
             # print('answers::::::::', answers)
             # self.write_to_html(answers, test_case['input'][0])
             # 检查行内公式抽取正确性
+            # 检查行内公式抽取正确性
             if test_case.get('expected_inline', None):
                 # 从所有parts中提取所有行内公式
                 all_inline_formulas = []
@@ -521,16 +522,40 @@ def test_math_recognizer_html(self):
                         for inline_elem in inline_elements:
                             formula = inline_elem.text.replace('\n', '').strip()
                             all_inline_formulas.append(formula)
-                # print(f"Found {len(all_inline_formulas)} total inline formulas")
-                # print(f"Total new_parts: {len(new_parts)}")
+
                 expect_inline_text = base_dir.joinpath(test_case['expected_inline']).read_text(encoding='utf-8').strip()
                 expect_inline_formulas = [formula for formula in expect_inline_text.split('\n') if formula]
-                # print(f"Expected {len(expect_inline_formulas)} inline formulas")
+
+                # 如果数量不匹配，输出详细信息
+                if len(all_inline_formulas) != len(expect_inline_formulas):
+                    print("\n" + "=" * 80)
+                    print("行内公式抽取出错!")
+                    print("=" * 80)
+                    print(f"出错样例: {test_case['input']}")
+                    print(f"预期公式数: {len(expect_inline_formulas)}")
+                    print(f"实际公式数: {len(all_inline_formulas)}")
+                    print("\n预期公式列表:")
+                    for i, formula in enumerate(expect_inline_formulas, 1):
+                        print(f"  {i}. {formula}")
+                    print("\n实际公式列表:")
+                    for i, formula in enumerate(all_inline_formulas, 1):
+                        print(f"  {i}. {formula}")
+
+                    # 找出差异
+                    print("\n差异分析:")
+                    if len(all_inline_formulas) > len(expect_inline_formulas):
+                        print(f"多提取了 {len(all_inline_formulas) - len(expect_inline_formulas)} 个公式:")
+                        extra_formulas = all_inline_formulas[len(expect_inline_formulas):]
+                        for i, formula in enumerate(extra_formulas, 1):
+                            print(f"  {i}. {formula}")
+                    else:
+                        print(f"少提取了 {len(expect_inline_formulas) - len(all_inline_formulas)} 个公式:")
+                        missing_formulas = expect_inline_formulas[len(all_inline_formulas):]
+                        for i, formula in enumerate(missing_formulas, 1):
+                            print(f"  {i}. {formula}")
+                    print("=" * 80 + "\n")
+
                 self.assertEqual(len(all_inline_formulas), len(expect_inline_formulas))
-                for expect, formula in zip(expect_inline_formulas, all_inline_formulas):
-                    # print('inline expect::::::::', expect)
-                    # print('inline answer::::::::', formula)
-                    self.assertEqual(expect, formula)
 
     def write_to_html(self, answers, file_name):
         file_name = file_name.split('.')[0]
diff --git a/tests/llm_web_kit/input/assets/content_json.json b/tests/llm_web_kit/input/assets/content_json.json
index d8f038a2..34236da4 100644
--- a/tests/llm_web_kit/input/assets/content_json.json
+++ b/tests/llm_web_kit/input/assets/content_json.json
@@ -168,7 +168,7 @@
         "content": {
           "math_content": "a^2 + b^2 = c^2",
           "math_type": "latex",
-          "by": "None"
+          "by": "mathjax_mock"
         }
       },
       {

From 08daa672a20b4cb8033798fef92998aeb797c69d Mon Sep 17 00:00:00 2001
From: Yanggq <1041206149@qq.com>
Date: Wed, 22 Oct 2025 16:17:11 +0800
Subject: [PATCH 2/3] =?UTF-8?q?=E7=AE=80=E5=8C=96math=E6=8A=BD=E5=8F=96?=
 =?UTF-8?q?=E9=80=BB=E8=BE=91?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../html/recognizer/cc_math/common.py          | 18 +++++++++++-------
 .../recognizer/cc_math/tag_common_modify.py    |  2 +-
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/llm_web_kit/extractor/html/recognizer/cc_math/common.py b/llm_web_kit/extractor/html/recognizer/cc_math/common.py
index 9dea5eba..c1ce7377 100644
--- a/llm_web_kit/extractor/html/recognizer/cc_math/common.py
+++ b/llm_web_kit/extractor/html/recognizer/cc_math/common.py
@@ -537,7 +537,7 @@ def fix_mathml_superscript(self, mathml_str):
             parent.remove(msup)
         return etree.tostring(root, encoding='unicode', pretty_print=True)
 
-    def replace_math(self, new_tag: str, math_type: str, math_render: str, node: HtmlElement, func) -> HtmlElement:
+    def replace_math(self, new_tag: str, math_type: str, math_render: str, node: HtmlElement) -> HtmlElement:
         """替换数学公式节点.
 
         Args:
@@ -593,12 +593,16 @@ def build_cc_exception_tag(self, text, math_type, math_render) -> str:
     print(cm.wrap_math_md(r'$$a^2 + b^2 = c^2$$'))
     print(cm.wrap_math_md(r'\(a^2 + b^2 = c^2\)'))
     print(cm.extract_asciimath('x=(-b +- sqrt(b^2 - 4ac))/(2a)'))
-    print(cm.replace_math('ccmath-interline','asciimath','',html_to_element(r'<p>`x=(-b +- sqrt(b^2 - 4ac))/(2a)`</p>'),None,True))
-    print(cm.replace_math('ccmath-interline','asciimath','',html_to_element(r'<p>like this: \`E=mc^2\`</p>'),None,True))
-    print(cm.replace_math('ccmath-interline','asciimath','',html_to_element(r'<p>A `3xx3` matrix,`((1,2,3),(4,5,6),(7,8,9))`, and a `2xx1` matrix, or vector, `((1),(0))`.</p>'),None,True))
-    print(cm.replace_math('ccmath-interline','asciimath','',html_to_element(r'<p>`(x+1)/x^2``1/3245`</p>'),None,True))
-    print(cm.replace_math('ccmath-interline','latex','',html_to_element(r'<p>start $$f(a,b,c) = (a^2+b^2+c^2)^3$$end</p>'),None,False))
-    print(cm.replace_math('ccmath-inline','latex','',html_to_element(r'<p>\( \newcommand{\norm}[1]{\| #1 \|}\)</p>'),None,False))
+    print(cm.replace_math('ccmath-interline','asciimath','', html_to_element(r'<p>`x=(-b +- sqrt(b^2 - 4ac))/(2a)`</p>'),
+                          True))
+    print(cm.replace_math('ccmath-interline','asciimath','', html_to_element(r'<p>like this: \`E=mc^2\`</p>'), True))
+    print(cm.replace_math('ccmath-interline','asciimath','', html_to_element(r'<p>A `3xx3` matrix,`((1,2,3),(4,5,6),(7,8,9))`, and a `2xx1` matrix, or vector, `((1),(0))`.</p>'),
+                          True))
+    print(cm.replace_math('ccmath-interline','asciimath','', html_to_element(r'<p>`(x+1)/x^2``1/3245`</p>'), True))
+    print(cm.replace_math('ccmath-interline','latex','', html_to_element(r'<p>start $$f(a,b,c) = (a^2+b^2+c^2)^3$$end</p>'),
+                          False))
+    print(cm.replace_math('ccmath-inline','latex','', html_to_element(r'<p>\( \newcommand{\norm}[1]{\| #1 \|}\)</p>'),
+                          False))
     # cm.url = 'mathhelpforum.com'
     # print(cm.wrap_math_md_custom(r'<br />\begin{align} a^2+b=c\end{align}\<br />'))
     # print(cm.wrap_math_md_custom(r'<br />dz=\frac{1}{2}\frac{dx}{\cos ^2 x}<br />'))
diff --git a/llm_web_kit/extractor/html/recognizer/cc_math/tag_common_modify.py b/llm_web_kit/extractor/html/recognizer/cc_math/tag_common_modify.py
index 6801cf43..aaa4a9de 100644
--- a/llm_web_kit/extractor/html/recognizer/cc_math/tag_common_modify.py
+++ b/llm_web_kit/extractor/html/recognizer/cc_math/tag_common_modify.py
@@ -17,7 +17,7 @@ def modify_tree(cm: CCMATH, math_render: str, o_html: str, node: HtmlElement):
             tail = node.tail
             new_span.tail = None
             for new_tag, math_type in tag_math_type_list:
-                new_span = cm.replace_math(new_tag, math_type, math_render, new_span, None)
+                new_span = cm.replace_math(new_tag, math_type, math_render, new_span)
             new_span.tail = tail
             replace_element(node,new_span)
 

From 1ee8049828f0b162929d123636437b0cf24d1825 Mon Sep 17 00:00:00 2001
From: Yanggq <1041206149@qq.com>
Date: Wed, 22 Oct 2025 16:27:34 +0800
Subject: [PATCH 3/3] =?UTF-8?q?=E7=AE=80=E5=8C=96math=E6=8A=BD=E5=8F=96?=
 =?UTF-8?q?=E9=80=BB=E8=BE=91?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../html/recognizer/cc_math/common.py         | 10 -----
 .../extractor/html/recognizer/test_math.py    | 39 ++++---------------
 2 files changed, 7 insertions(+), 42 deletions(-)

diff --git a/llm_web_kit/extractor/html/recognizer/cc_math/common.py b/llm_web_kit/extractor/html/recognizer/cc_math/common.py
index c1ce7377..1d5ef3a2 100644
--- a/llm_web_kit/extractor/html/recognizer/cc_math/common.py
+++ b/llm_web_kit/extractor/html/recognizer/cc_math/common.py
@@ -593,16 +593,6 @@ def build_cc_exception_tag(self, text, math_type, math_render) -> str:
     print(cm.wrap_math_md(r'$$a^2 + b^2 = c^2$$'))
     print(cm.wrap_math_md(r'\(a^2 + b^2 = c^2\)'))
     print(cm.extract_asciimath('x=(-b +- sqrt(b^2 - 4ac))/(2a)'))
-    print(cm.replace_math('ccmath-interline','asciimath','', html_to_element(r'<p>`x=(-b +- sqrt(b^2 - 4ac))/(2a)`</p>'),
-                          True))
-    print(cm.replace_math('ccmath-interline','asciimath','', html_to_element(r'<p>like this: \`E=mc^2\`</p>'), True))
-    print(cm.replace_math('ccmath-interline','asciimath','', html_to_element(r'<p>A `3xx3` matrix,`((1,2,3),(4,5,6),(7,8,9))`, and a `2xx1` matrix, or vector, `((1),(0))`.</p>'),
-                          True))
-    print(cm.replace_math('ccmath-interline','asciimath','', html_to_element(r'<p>`(x+1)/x^2``1/3245`</p>'), True))
-    print(cm.replace_math('ccmath-interline','latex','', html_to_element(r'<p>start $$f(a,b,c) = (a^2+b^2+c^2)^3$$end</p>'),
-                          False))
-    print(cm.replace_math('ccmath-inline','latex','', html_to_element(r'<p>\( \newcommand{\norm}[1]{\| #1 \|}\)</p>'),
-                          False))
     # cm.url = 'mathhelpforum.com'
     # print(cm.wrap_math_md_custom(r'<br />\begin{align} a^2+b=c\end{align}\<br />'))
     # print(cm.wrap_math_md_custom(r'<br />dz=\frac{1}{2}\frac{dx}{\cos ^2 x}<br />'))
diff --git a/tests/llm_web_kit/extractor/html/recognizer/test_math.py b/tests/llm_web_kit/extractor/html/recognizer/test_math.py
index 679690a0..79d010aa 100644
--- a/tests/llm_web_kit/extractor/html/recognizer/test_math.py
+++ b/tests/llm_web_kit/extractor/html/recognizer/test_math.py
@@ -511,7 +511,6 @@ def test_math_recognizer_html(self):
             # print('answers::::::::', answers)
             # self.write_to_html(answers, test_case['input'][0])
             # 检查行内公式抽取正确性
-            # 检查行内公式抽取正确性
             if test_case.get('expected_inline', None):
                 # 从所有parts中提取所有行内公式
                 all_inline_formulas = []
@@ -522,40 +521,16 @@ def test_math_recognizer_html(self):
                         for inline_elem in inline_elements:
                             formula = inline_elem.text.replace('\n', '').strip()
                             all_inline_formulas.append(formula)
-
+                # print(f"Found {len(all_inline_formulas)} total inline formulas")
+                # print(f"Total new_parts: {len(new_parts)}")
                 expect_inline_text = base_dir.joinpath(test_case['expected_inline']).read_text(encoding='utf-8').strip()
                 expect_inline_formulas = [formula for formula in expect_inline_text.split('\n') if formula]
-
-                # 如果数量不匹配，输出详细信息
-                if len(all_inline_formulas) != len(expect_inline_formulas):
-                    print("\n" + "=" * 80)
-                    print("行内公式抽取出错!")
-                    print("=" * 80)
-                    print(f"出错样例: {test_case['input']}")
-                    print(f"预期公式数: {len(expect_inline_formulas)}")
-                    print(f"实际公式数: {len(all_inline_formulas)}")
-                    print("\n预期公式列表:")
-                    for i, formula in enumerate(expect_inline_formulas, 1):
-                        print(f"  {i}. {formula}")
-                    print("\n实际公式列表:")
-                    for i, formula in enumerate(all_inline_formulas, 1):
-                        print(f"  {i}. {formula}")
-
-                    # 找出差异
-                    print("\n差异分析:")
-                    if len(all_inline_formulas) > len(expect_inline_formulas):
-                        print(f"多提取了 {len(all_inline_formulas) - len(expect_inline_formulas)} 个公式:")
-                        extra_formulas = all_inline_formulas[len(expect_inline_formulas):]
-                        for i, formula in enumerate(extra_formulas, 1):
-                            print(f"  {i}. {formula}")
-                    else:
-                        print(f"少提取了 {len(expect_inline_formulas) - len(all_inline_formulas)} 个公式:")
-                        missing_formulas = expect_inline_formulas[len(all_inline_formulas):]
-                        for i, formula in enumerate(missing_formulas, 1):
-                            print(f"  {i}. {formula}")
-                    print("=" * 80 + "\n")
-
+                # print(f"Expected {len(expect_inline_formulas)} inline formulas")
                 self.assertEqual(len(all_inline_formulas), len(expect_inline_formulas))
+                for expect, formula in zip(expect_inline_formulas, all_inline_formulas):
+                    # print('inline expect::::::::', expect)
+                    # print('inline answer::::::::', formula)
+                    self.assertEqual(expect, formula)
 
     def write_to_html(self, answers, file_name):
         file_name = file_name.split('.')[0]