From ebbc54c00d1b018a7832e1d55fb23b43fa8e8ccb Mon Sep 17 00:00:00 2001 From: jujn <2087687391@qq.com> Date: Wed, 25 Mar 2026 22:29:39 +0800 Subject: [PATCH 1/2] fix(skill): ignore complex YAML frontmatter to prevent parsing errors --- .../core/skill/util/MarkdownSkillParser.java | 37 ++++++---- .../skill/util/MarkdownSkillParserTest.java | 70 ++++++++++++++----- 2 files changed, 78 insertions(+), 29 deletions(-) diff --git a/agentscope-core/src/main/java/io/agentscope/core/skill/util/MarkdownSkillParser.java b/agentscope-core/src/main/java/io/agentscope/core/skill/util/MarkdownSkillParser.java index 5bce28ced..d7d5201d2 100644 --- a/agentscope-core/src/main/java/io/agentscope/core/skill/util/MarkdownSkillParser.java +++ b/agentscope-core/src/main/java/io/agentscope/core/skill/util/MarkdownSkillParser.java @@ -20,6 +20,8 @@ import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Utility for parsing and generating Markdown files with YAML frontmatter. @@ -54,6 +56,8 @@ */ public class MarkdownSkillParser { + private static final Logger logger = LoggerFactory.getLogger(MarkdownSkillParser.class); + /** * Private constructor to prevent instantiation. */ @@ -81,7 +85,6 @@ private MarkdownSkillParser() {} * * @param markdown Markdown content (may or may not have frontmatter) * @return ParsedMarkdown containing metadata and content - * @throws IllegalArgumentException if YAML syntax is invalid */ public static ParsedMarkdown parse(String markdown) { if (markdown == null || markdown.isEmpty()) { @@ -102,14 +105,8 @@ public static ParsedMarkdown parse(String markdown) { return new ParsedMarkdown(Map.of(), markdownContent); } - try { - Map metadata = SimpleYamlParser.parse(yamlContent); - return new ParsedMarkdown(metadata, markdownContent); - } catch (IllegalArgumentException e) { - throw e; - } catch (RuntimeException e) { - throw new IllegalArgumentException("Invalid YAML frontmatter syntax", e); - } + Map metadata = SimpleYamlParser.parse(yamlContent); + return new ParsedMarkdown(metadata, markdownContent); } /** @@ -158,9 +155,14 @@ private static class SimpleYamlParser { /** * Parse YAML string into a map of key-value pairs. * + *

This is a simplified parser designed for flat string-to-string mappings. + * Block-style complex YAML structures (such as multi-line lists or indented + * nested objects) are not supported and will be gracefully skipped. + * However, flow-style inline structures (e.g., single-line JSON strings) + * are treated as standard scalar values and will be parsed as raw strings. + * * @param yaml YAML content to parse * @return Map of key-value pairs - * @throws IllegalArgumentException if YAML syntax is invalid */ static Map parse(String yaml) { Map result = new LinkedHashMap<>(); @@ -184,14 +186,23 @@ static Map parse(String yaml) { Matcher matcher = KEY_VALUE_PATTERN.matcher(line.trim()); if (!matcher.matches()) { - throw new IllegalArgumentException( - "Invalid YAML line (expected 'key: value' format): " + line); + logger.debug( + "Skipping unsupported YAML line (expected 'key: value' format): {}", + line); + continue; } String key = matcher.group(1); String value = parseValue(matcher.group(2)); - result.put(key, value); + if (!value.isEmpty()) { + result.put(key, value); + } else { + logger.debug( + "Skipping key '{}': empty values or block-style complex structures are" + + " unsupported", + key); + } } return result; diff --git a/agentscope-core/src/test/java/io/agentscope/core/skill/util/MarkdownSkillParserTest.java b/agentscope-core/src/test/java/io/agentscope/core/skill/util/MarkdownSkillParserTest.java index 3d4b642b4..96f7d31e3 100644 --- a/agentscope-core/src/test/java/io/agentscope/core/skill/util/MarkdownSkillParserTest.java +++ b/agentscope-core/src/test/java/io/agentscope/core/skill/util/MarkdownSkillParserTest.java @@ -20,7 +20,6 @@ import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import io.agentscope.core.skill.util.MarkdownSkillParser.ParsedMarkdown; @@ -283,28 +282,29 @@ void testParseUnicodeCharacters() { class ErrorHandlingTests { @Test - @DisplayName("Should throw exception for invalid YAML") + @DisplayName("Should gracefully ignore invalid YAML lines instead of throwing exception") void testInvalidYaml() { String markdown = "---\nname: test\nthis is not a valid line\n---\nContent"; - IllegalArgumentException exception = - assertThrows( - IllegalArgumentException.class, - () -> MarkdownSkillParser.parse(markdown)); - assertTrue(exception.getMessage().contains("Invalid YAML line")); - assertTrue(exception.getMessage().contains("expected 'key: value' format")); + MarkdownSkillParser.ParsedMarkdown parsed = MarkdownSkillParser.parse(markdown); + Map metadata = parsed.getMetadata(); + + assertEquals("test", metadata.get("name")); + assertFalse(metadata.containsKey("this is not a valid line")); + assertEquals("Content", parsed.getContent()); } @Test - @DisplayName("Should throw exception for list format") + @DisplayName("Should gracefully ignore list format instead of throwing exception") void testListFormat() { - String markdown = "---\n- item1\n- item2\n---\nContent"; + String markdown = "---\nname: test_skill\n- item1\n- item2\n---\nContent"; + + MarkdownSkillParser.ParsedMarkdown parsed = MarkdownSkillParser.parse(markdown); + Map metadata = parsed.getMetadata(); - IllegalArgumentException exception = - assertThrows( - IllegalArgumentException.class, - () -> MarkdownSkillParser.parse(markdown)); - assertTrue(exception.getMessage().contains("Invalid YAML line")); + assertEquals("test_skill", metadata.get("name")); + assertFalse(metadata.containsKey("- item1")); + assertFalse(metadata.containsKey("- item2")); } } @@ -429,7 +429,7 @@ void testGenerateEmptyValue() { String generated = MarkdownSkillParser.generate(metadata, "Content"); ParsedMarkdown parsed = MarkdownSkillParser.parse(generated); - assertEquals("", parsed.getMetadata().get("empty")); + assertNull(parsed.getMetadata().get("empty")); } } @@ -534,5 +534,43 @@ void testToString() { assertTrue(toString.contains("metadata")); assertTrue(toString.contains("content")); } + + @Test + @DisplayName( + "Should parse basic scalars and gracefully ignore complex YAML structures like" + + " lists or JSON") + void testParseAndIgnoreComplexMetadata() { + String markdown = + """ + --- + name: Agent Browser + description: A fast Rust-based headless browser automation CLI + read_when: + - Automating web interactions + - Extracting structured data from pages + metadata: {"clawdbot":{"emoji":"🌐"}} + allowed-tools: Bash(agent-browser:*) + --- + + # Content + This is the content.\ + """; + + MarkdownSkillParser.ParsedMarkdown parsed = MarkdownSkillParser.parse(markdown); + Map metadata = parsed.getMetadata(); + + assertEquals("Agent Browser", metadata.get("name")); + assertEquals( + "A fast Rust-based headless browser automation CLI", + metadata.get("description")); + assertEquals("Bash(agent-browser:*)", metadata.get("allowed-tools")); + + assertEquals("{\"clawdbot\":{\"emoji\":\"🌐\"}}", metadata.get("metadata")); + + assertNull(metadata.get("read_when")); + assertNull(metadata.get("- Automating web interactions")); + + assertTrue(parsed.getContent().contains("# Content")); + } } } From ad9790c7c390958169263473f7c86eab7df47dac Mon Sep 17 00:00:00 2001 From: jujn <2087687391@qq.com> Date: Thu, 26 Mar 2026 17:56:45 +0800 Subject: [PATCH 2/2] fix: copilot review --- .../core/skill/util/MarkdownSkillParser.java | 30 +++-- .../skill/util/MarkdownSkillParserTest.java | 114 ++++++++++++------ 2 files changed, 98 insertions(+), 46 deletions(-) diff --git a/agentscope-core/src/main/java/io/agentscope/core/skill/util/MarkdownSkillParser.java b/agentscope-core/src/main/java/io/agentscope/core/skill/util/MarkdownSkillParser.java index d7d5201d2..939b1b814 100644 --- a/agentscope-core/src/main/java/io/agentscope/core/skill/util/MarkdownSkillParser.java +++ b/agentscope-core/src/main/java/io/agentscope/core/skill/util/MarkdownSkillParser.java @@ -193,21 +193,37 @@ static Map parse(String yaml) { } String key = matcher.group(1); - String value = parseValue(matcher.group(2)); + String rawValue = matcher.group(2); - if (!value.isEmpty()) { - result.put(key, value); - } else { + if (isBlockScalarModifier(rawValue)) { logger.debug( - "Skipping key '{}': empty values or block-style complex structures are" - + " unsupported", - key); + "Skipping key '{}': block-style values ('{}') are unsupported", + key, + rawValue.trim()); + continue; } + + result.put(key, parseValue(rawValue)); } return result; } + /** + * Check if the raw value is a YAML block scalar modifier ('|' or '>'). + * + * @param rawValue The raw string captured after the colon + * @return true if it is a block scalar modifier + */ + private static boolean isBlockScalarModifier(String rawValue) { + if (rawValue == null) { + return false; + } + + String trimmed = rawValue.trim(); + return "|".equals(trimmed) || ">".equals(trimmed); + } + /** * Parse a YAML value, handling quoted strings. * diff --git a/agentscope-core/src/test/java/io/agentscope/core/skill/util/MarkdownSkillParserTest.java b/agentscope-core/src/test/java/io/agentscope/core/skill/util/MarkdownSkillParserTest.java index 96f7d31e3..52597615f 100644 --- a/agentscope-core/src/test/java/io/agentscope/core/skill/util/MarkdownSkillParserTest.java +++ b/agentscope-core/src/test/java/io/agentscope/core/skill/util/MarkdownSkillParserTest.java @@ -306,6 +306,80 @@ void testListFormat() { assertFalse(metadata.containsKey("- item1")); assertFalse(metadata.containsKey("- item2")); } + + @Test + @DisplayName( + "Should parse basic scalars and gracefully ignore complex YAML structures like" + + " lists or JSON") + void testParseAndIgnoreComplexMetadata() { + String markdown = + """ + --- + name: Agent Browser + description: A fast Rust-based headless browser automation CLI + read_when: + - Automating web interactions + - Extracting structured data from pages + metadata: {"clawdbot":{"emoji":"🌐"}} + allowed-tools: Bash(agent-browser:*) + --- + + # Content + This is the content.\ + """; + + MarkdownSkillParser.ParsedMarkdown parsed = MarkdownSkillParser.parse(markdown); + Map metadata = parsed.getMetadata(); + + assertEquals("Agent Browser", metadata.get("name")); + assertEquals( + "A fast Rust-based headless browser automation CLI", + metadata.get("description")); + assertEquals("Bash(agent-browser:*)", metadata.get("allowed-tools")); + + assertEquals("{\"clawdbot\":{\"emoji\":\"🌐\"}}", metadata.get("metadata")); + + assertEquals("", metadata.get("read_when")); + assertNull(metadata.get("- Automating web interactions")); + + assertTrue(parsed.getContent().contains("# Content")); + } + + @Test + @DisplayName( + "Should gracefully skip keys with block-style modifiers (| or >) instead of" + + " recording them as literal values") + void testSkipBlockStyleModifiers() { + String markdown = + """ + --- + name: test_skill + description: | + This is a multi-line description. + It should be ignored by the simple parser. + summary: > + This is a folded multi-line summary. + It should also be ignored. + version: "1.0" + --- + Content\ + """; + + MarkdownSkillParser.ParsedMarkdown parsed = MarkdownSkillParser.parse(markdown); + Map metadata = parsed.getMetadata(); + + assertEquals("test_skill", metadata.get("name")); + assertEquals("1.0", metadata.get("version")); + + assertNull( + metadata.get("description"), + "Block scalar modifier '|' should not be parsed as a literal value"); + assertNull( + metadata.get("summary"), + "Block scalar modifier '>' should not be parsed as a literal value"); + + assertFalse(metadata.containsKey(" This is a multi-line description.")); + } } @Nested @@ -429,7 +503,7 @@ void testGenerateEmptyValue() { String generated = MarkdownSkillParser.generate(metadata, "Content"); ParsedMarkdown parsed = MarkdownSkillParser.parse(generated); - assertNull(parsed.getMetadata().get("empty")); + assertEquals("", parsed.getMetadata().get("empty")); } } @@ -534,43 +608,5 @@ void testToString() { assertTrue(toString.contains("metadata")); assertTrue(toString.contains("content")); } - - @Test - @DisplayName( - "Should parse basic scalars and gracefully ignore complex YAML structures like" - + " lists or JSON") - void testParseAndIgnoreComplexMetadata() { - String markdown = - """ - --- - name: Agent Browser - description: A fast Rust-based headless browser automation CLI - read_when: - - Automating web interactions - - Extracting structured data from pages - metadata: {"clawdbot":{"emoji":"🌐"}} - allowed-tools: Bash(agent-browser:*) - --- - - # Content - This is the content.\ - """; - - MarkdownSkillParser.ParsedMarkdown parsed = MarkdownSkillParser.parse(markdown); - Map metadata = parsed.getMetadata(); - - assertEquals("Agent Browser", metadata.get("name")); - assertEquals( - "A fast Rust-based headless browser automation CLI", - metadata.get("description")); - assertEquals("Bash(agent-browser:*)", metadata.get("allowed-tools")); - - assertEquals("{\"clawdbot\":{\"emoji\":\"🌐\"}}", metadata.get("metadata")); - - assertNull(metadata.get("read_when")); - assertNull(metadata.get("- Automating web interactions")); - - assertTrue(parsed.getContent().contains("# Content")); - } } }