Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
637 changes: 544 additions & 93 deletions pkg/toon/encoder.go

Large diffs are not rendered by default.

57 changes: 52 additions & 5 deletions pkg/toon/marshal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,10 @@ func TestMarshalNestedStruct(t *testing.T) {
}

// Should contain nested structure
// Note: TOON v2 requires quoting numeric-like strings, so "123" becomes \"123\"
dataStr := string(data)
if !contains(dataStr, "id: 123") {
t.Errorf("Missing 'id: 123' in output:\n%s", dataStr)
if !contains(dataStr, "id: \"123\"") {
t.Errorf("Missing 'id: \"123\"' in output:\n%s", dataStr)
}
if !contains(dataStr, "person:") {
t.Errorf("Missing 'person:' in output:\n%s", dataStr)
Expand Down Expand Up @@ -226,14 +227,31 @@ func TestMarshalArray(t *testing.T) {
t.Fatalf("Marshal failed: %v", err)
}

// Should produce a list format
// Should produce inline format with TOON v2 header [5]:
dataStr := string(data)
if !contains(dataStr, "[5|]") {
t.Errorf("Missing array header '[5|]' in output:\n%s", dataStr)
if !contains(dataStr, "[5]:") {
t.Errorf("Missing array header '[5]:' in output:\n%s", dataStr)
}
}

func TestUnmarshalArray(t *testing.T) {
// Test TOON v2 inline format
toonData := []byte(`[3]: 10,20,30`)

var result []int
err := Unmarshal(toonData, &result, DecodeOptions{IndentSize: 2})
if err != nil {
t.Fatalf("Unmarshal failed: %v", err)
}

expected := []int{10, 20, 30}
if !reflect.DeepEqual(result, expected) {
t.Errorf("Expected %v, got %v", expected, result)
}
}

func TestUnmarshalArrayLegacyList(t *testing.T) {
// Test legacy list format for backward compatibility
toonData := []byte(`[3|]
- 10
- 20
Expand Down Expand Up @@ -329,6 +347,35 @@ func TestUnmarshalPrimitiveTypes(t *testing.T) {
}
}

func TestEscapeSequences(t *testing.T) {
tests := []struct {
name string
input string
expected string
}{
{"simple newline", `"hello\nworld"`, "hello\nworld"},
{"simple tab", `"hello\tworld"`, "hello\tworld"},
{"simple quote", `"hello\"world"`, "hello\"world"},
{"simple backslash", `"hello\\world"`, "hello\\world"},
{"literal backslash-n", `"hello\\nworld"`, "hello\\nworld"},
{"carriage return", `"hello\rworld"`, "hello\rworld"},
{"multiple escapes", `"a\\b\"c\nd"`, "a\\b\"c\nd"},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var result string
err := Unmarshal([]byte(tt.input), &result, DecodeOptions{IndentSize: 2})
if err != nil {
t.Fatalf("Unmarshal failed: %v", err)
}
if result != tt.expected {
t.Errorf("Expected %q, got %q", tt.expected, result)
}
})
}
}

// Helper function to check if a string contains a substring
func contains(s, substr string) bool {
return len(s) >= len(substr) && (s == substr || len(s) > len(substr) && findSubstring(s, substr))
Expand Down
122 changes: 92 additions & 30 deletions pkg/toon/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,22 +27,45 @@ func decodeValueFromLines(cursor *LineCursor, options DecodeOptions) (JsonValue,
return nil, fmt.Errorf("no content to decode")
}

// Check for root array
// TODO: Implement root array detection logic if needed, for now assume object or primitive
// The TS implementation checks `isArrayHeaderAfterHyphen` but that seems specific to list items?
// Actually it checks `isArrayHeaderAfterHyphen` on the first line content.

// Check for root array
if IsArrayHeaderAfterHyphen(first.Content) {
headerInfo := ParseArrayHeaderLine(first.Content, DelimiterComma)
// Check for root array with TOON v2 format: [N]: values
if strings.HasPrefix(strings.TrimSpace(first.Content), "[") {
// Try TOON v2 inline array format
headerInfo := ParseArrayHeaderLineTOONv2(first.Content)
if headerInfo != nil {
cursor.Advance()
return decodeArrayFromHeader(headerInfo, "", cursor, 0, options)
} else {
// Try inline array
if arr, err := ParseInlineArray(first.Content); err == nil {
return arr, nil

// Get inline values after colon
colonIdx := strings.Index(first.Content, ":")
if colonIdx != -1 {
inlineValues := strings.TrimSpace(first.Content[colonIdx+1:])
if inlineValues != "" {
values := ParseDelimitedValues(inlineValues, headerInfo.Delimiter)
arr := make(JsonArray, len(values))
for i, v := range values {
arr[i] = ParsePrimitiveToken(v)
}
return arr, nil
}
}

// No inline values - check for list format
if len(headerInfo.Fields) > 0 {
return decodeTabularArray(headerInfo, cursor, 0, options)
}
return decodeListArray(headerInfo, cursor, 0, options)
}

// Try old format with [N|]
headerInfo = ParseArrayHeaderLine(first.Content, DelimiterComma)
if headerInfo != nil {
cursor.Advance()
return decodeArrayFromHeader(headerInfo, "", cursor, 0, options)
}

// Try inline array like [item1, item2]
if arr, err := ParseInlineArray(first.Content); err == nil {
cursor.Advance()
return arr, nil
}
}

Expand Down Expand Up @@ -86,14 +109,53 @@ func decodeObject(cursor *LineCursor, baseDepth int, options DecodeOptions) (Jso
}

func decodeKeyValue(content string, cursor *LineCursor, baseDepth int, options DecodeOptions) (string, JsonValue, error) {
// Check if content contains an array header pattern like "key[N]:" or "key[N]{fields}:"
// TOON v2 format: key[N]: values or key[N]{fields}: for tabular

// Look for bracket pattern to extract key and array info
bracketStart := strings.Index(content, "[")
colonIdx := strings.Index(content, ":")

if bracketStart != -1 && colonIdx != -1 && bracketStart < colonIdx {
// This might be an array header like "key[N]:" or "key[N]{fields}:"
key := strings.TrimSpace(content[:bracketStart])
afterKey := content[bracketStart:]

// Parse the array header
headerInfo := ParseArrayHeaderLineTOONv2(afterKey)
if headerInfo != nil {
headerInfo.Key = key

// Get inline values after the colon if any
colonInAfterKey := strings.Index(afterKey, ":")
if colonInAfterKey != -1 {
inlineValues := strings.TrimSpace(afterKey[colonInAfterKey+1:])
if inlineValues != "" {
// Parse inline values using delimiter
values := ParseDelimitedValues(inlineValues, headerInfo.Delimiter)
arr := make(JsonArray, len(values))
for i, v := range values {
arr[i] = ParsePrimitiveToken(v)
}
return key, arr, nil
}
}

// No inline values - check for list or tabular format
if len(headerInfo.Fields) > 0 {
arr, err := decodeTabularArray(headerInfo, cursor, baseDepth, options)
return key, arr, err
}
arr, err := decodeListArray(headerInfo, cursor, baseDepth, options)
return key, arr, err
}
}

// Simple key parsing (split by first colon)
parts := strings.SplitN(content, ":", 2)
key := strings.TrimSpace(parts[0])

if len(parts) < 2 {
// Should not happen if called correctly, or maybe it's a key without value (empty object?)
// If no colon, it might be an error or specific syntax.
// For now assume key: value
return key, nil, fmt.Errorf("invalid key-value pair: %s", content)
}

Expand All @@ -109,22 +171,14 @@ func decodeKeyValue(content string, cursor *LineCursor, baseDepth int, options D
return key, make(JsonObject), nil
}

// Check for array header first (before parsing key)
// Actually, decodeKeyValue is called with the line content.
// If the line IS an array header, it's not a key-value pair.
// But decodeKeyValue is called by decodeObject which expects key-value.
// If we are here, we split by colon.

// Check for array header
// e.g. "key: 3 |" -> rest is "3 |"
// Check for array header with old format (for backward compatibility)
if IsArrayHeaderAfterHyphen(rest) {
headerInfo := ParseArrayHeaderLine(rest, DelimiterComma)
if headerInfo != nil {
// It is an array header!
val, err := decodeArrayFromHeader(headerInfo, "", cursor, baseDepth, options)
return key, val, err
} else {
// Try inline array
// Try inline array with brackets like [item1, item2]
if arr, err := ParseInlineArray(rest); err == nil {
return key, arr, nil
}
Expand Down Expand Up @@ -217,17 +271,25 @@ func decodeObjectFromListItem(firstLine *ParsedLine, cursor *LineCursor, baseDep

obj := JsonObject{key: value}

// Read subsequent fields at the same depth
// Sibling fields are at depth baseDepth + 1 (one level deeper than the list item line)
// because they align with the content after "- "
siblingDepth := baseDepth + 1

for !cursor.AtEnd() {
line := cursor.Peek()
if line == nil || line.Depth < baseDepth {
break
}

// Must be same depth and NOT a list item (which would be next item in array)
if line.Depth == baseDepth && !strings.HasPrefix(line.Content, ListItemPrefix) && line.Content != "-" {
// If we see a line at list item depth that is a list item, we're done with this object
if line.Depth == baseDepth && (strings.HasPrefix(line.Content, ListItemPrefix) || line.Content == "-") {
break
}

// Sibling fields should be at siblingDepth
if line.Depth == siblingDepth && !strings.HasPrefix(line.Content, ListItemPrefix) && line.Content != "-" {
cursor.Advance()
k, v, err := decodeKeyValue(line.Content, cursor, baseDepth, options)
k, v, err := decodeKeyValue(line.Content, cursor, siblingDepth, options)
if err != nil {
return nil, err
}
Expand Down
112 changes: 108 additions & 4 deletions pkg/toon/parser_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,119 @@ func ParsePrimitiveToken(token string) JsonValue {
}

// Return as string (unquoted)
// Note: In a full implementation, we might want to handle quoted strings explicitly
// to support escape sequences, but for now we'll assume simple strings.
if strings.HasPrefix(token, "\"") && strings.HasSuffix(token, "\"") {
return token[1 : len(token)-1]
// Handle quoted strings with escape sequences
if strings.HasPrefix(token, "\"") && strings.HasSuffix(token, "\"") && len(token) >= 2 {
inner := token[1 : len(token)-1]
// Unescape: process escape sequences character by character
var result strings.Builder
i := 0
for i < len(inner) {
if inner[i] == '\\' && i+1 < len(inner) {
switch inner[i+1] {
case '\\':
result.WriteByte('\\')
i += 2
case '"':
result.WriteByte('"')
i += 2
case 'n':
result.WriteByte('\n')
i += 2
case 'r':
result.WriteByte('\r')
i += 2
case 't':
result.WriteByte('\t')
i += 2
default:
// Unknown escape, keep as is
result.WriteByte(inner[i])
i++
}
} else {
result.WriteByte(inner[i])
i++
}
}
return result.String()
}
Comment on lines +36 to 71
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Byte-level iteration may corrupt multi-byte UTF-8 characters.

The escape sequence parser iterates over inner using byte indices (inner[i]), but non-ASCII characters in the string occupy multiple bytes. If a backslash precedes a multi-byte character or appears mid-sequence, the logic may misinterpret bytes.

Consider using []rune for character-by-character iteration:

-		inner := token[1 : len(token)-1]
-		// Unescape: process escape sequences character by character
-		var result strings.Builder
-		i := 0
-		for i < len(inner) {
-			if inner[i] == '\\' && i+1 < len(inner) {
-				switch inner[i+1] {
+		inner := []rune(token[1 : len(token)-1])
+		var result strings.Builder
+		i := 0
+		for i < len(inner) {
+			if inner[i] == '\\' && i+1 < len(inner) {
+				switch inner[i+1] {
 				case '\\':
-					result.WriteByte('\\')
+					result.WriteRune('\\')
 					i += 2
 				case '"':
-					result.WriteByte('"')
+					result.WriteRune('"')
 					i += 2
 				// ... similar changes for other cases
 				default:
-					result.WriteByte(inner[i])
+					result.WriteRune(inner[i])
 					i++
 				}
 			} else {
-				result.WriteByte(inner[i])
+				result.WriteRune(inner[i])
 				i++
 			}
 		}
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
// Handle quoted strings with escape sequences
if strings.HasPrefix(token, "\"") && strings.HasSuffix(token, "\"") && len(token) >= 2 {
inner := token[1 : len(token)-1]
// Unescape: process escape sequences character by character
var result strings.Builder
i := 0
for i < len(inner) {
if inner[i] == '\\' && i+1 < len(inner) {
switch inner[i+1] {
case '\\':
result.WriteByte('\\')
i += 2
case '"':
result.WriteByte('"')
i += 2
case 'n':
result.WriteByte('\n')
i += 2
case 'r':
result.WriteByte('\r')
i += 2
case 't':
result.WriteByte('\t')
i += 2
default:
// Unknown escape, keep as is
result.WriteByte(inner[i])
i++
}
} else {
result.WriteByte(inner[i])
i++
}
}
return result.String()
}
// Handle quoted strings with escape sequences
if strings.HasPrefix(token, "\"") && strings.HasSuffix(token, "\"") && len(token) >= 2 {
inner := []rune(token[1 : len(token)-1])
var result strings.Builder
i := 0
for i < len(inner) {
if inner[i] == '\\' && i+1 < len(inner) {
switch inner[i+1] {
case '\\':
result.WriteRune('\\')
i += 2
case '"':
result.WriteRune('"')
i += 2
case 'n':
result.WriteRune('\n')
i += 2
case 'r':
result.WriteRune('\r')
i += 2
case 't':
result.WriteRune('\t')
i += 2
default:
// Unknown escape, keep as is
result.WriteRune(inner[i])
i++
}
} else {
result.WriteRune(inner[i])
i++
}
}
return result.String()
}
🤖 Prompt for AI Agents
In pkg/toon/parser_utils.go around lines 36 to 71, the escape-sequence
unescaping iterates bytes which can corrupt multi-byte UTF-8 runes; convert
inner to a []rune and iterate rune-by-rune so you treat Unicode characters as
single code points, perform escape lookahead on the next rune (not next byte),
write runes to the strings.Builder (use WriteRune) and when encountering unknown
escapes emit the backslash and the following rune unchanged; ensure you advance
the rune index correctly for two-rune escapes and handle end-of-input safely.


return token
}

// ParseArrayHeaderLineTOONv2 parses a TOON v2 array header
// Format: [N]: or [N]{fields}: or [N|]: (pipe delimiter) or [N ]: (tab delimiter)
// Example: "[3]: 1,2,3" or "[2]{id,name}:" or "[3|]: a|b|c"
func ParseArrayHeaderLineTOONv2(line string) *ArrayHeaderInfo {
startBracket := strings.Index(line, "[")
if startBracket == -1 {
return nil
}
endBracket := strings.Index(line, "]")
if endBracket == -1 || endBracket <= startBracket {
return nil
}

// Parse bracket content [N] or [N|] or [N ]
bracketContent := line[startBracket+1 : endBracket]

// Find end of length (digits)
var i int
for i = 0; i < len(bracketContent); i++ {
if bracketContent[i] < '0' || bracketContent[i] > '9' {
break
}
}

if i == 0 {
// No digits at start
return nil
}

lengthStr := bracketContent[:i]
length, err := strconv.Atoi(lengthStr)
if err != nil {
return nil
}

// Determine delimiter
delimiter := "," // Default is comma
rest := bracketContent[i:]

if strings.HasPrefix(rest, "|") {
delimiter = "|"
} else if strings.HasPrefix(rest, "\t") {
delimiter = "\t"
}
// If rest is empty or just whitespace, delimiter stays as comma

// Check for fields segment after bracket: {field1,field2}
afterBracket := strings.TrimSpace(line[endBracket+1:])
var fields []string

if strings.HasPrefix(afterBracket, "{") {
closeBrace := strings.Index(afterBracket, "}")
if closeBrace != -1 {
fieldsContent := afterBracket[1:closeBrace]
fields = ParseDelimitedValues(fieldsContent, delimiter)
afterBracket = strings.TrimSpace(afterBracket[closeBrace+1:])
}
}

// Check for colon
if !strings.HasPrefix(afterBracket, ":") {
// Not a valid array header (no colon)
return nil
}

return &ArrayHeaderInfo{
Key: "", // Key will be set by caller
Length: length,
Delimiter: delimiter,
Fields: fields,
}
}

// ParseArrayHeaderLine parses a line to check if it's an array header
// Format: [key]: [length<delimiter>] [fields...]
// Example: "items: [3|] name age"
Expand Down
Loading