From 20f88df2d6b33a6ec42fed5beb8186b824671310 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 30 Nov 2025 14:34:24 +0000 Subject: [PATCH 1/3] Initial plan From 567a47e385dc59ed4700b4dd3b60683863f8dbf3 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 30 Nov 2025 14:55:59 +0000 Subject: [PATCH 2/3] Add ZON Java implementation with encoder, decoder, CLI, tests and documentation Co-authored-by: ronibhakta1 <77425964+ronibhakta1@users.noreply.github.com> --- .gitignore | 27 + LICENSE | 2 +- README.md | 186 ++- SPEC.md | 1112 +++++++++++++++++ benchmarks/benchmark_output.md | 214 ++++ docs/api-reference.md | 342 +++++ docs/llm-best-practices.md | 269 ++++ docs/syntax-cheatsheet.md | 291 +++++ pom.xml | 118 ++ src/main/java/com/zonformat/zon/Cli.java | 97 ++ .../java/com/zonformat/zon/Constants.java | 39 + src/main/java/com/zonformat/zon/Zon.java | 116 ++ .../com/zonformat/zon/ZonDecodeError.java | 106 ++ .../java/com/zonformat/zon/ZonDecoder.java | 806 ++++++++++++ .../java/com/zonformat/zon/ZonEncoder.java | 654 ++++++++++ .../zonformat/zon/CanonicalNumbersTest.java | 255 ++++ .../zonformat/zon/ConformanceDecoderTest.java | 196 +++ .../zonformat/zon/ConformanceEncoderTest.java | 235 ++++ .../com/zonformat/zon/SecurityLimitsTest.java | 175 +++ .../java/com/zonformat/zon/SecurityTest.java | 103 ++ .../com/zonformat/zon/StrictModeTest.java | 218 ++++ .../java/com/zonformat/zon/ZonCodecTest.java | 709 +++++++++++ 22 files changed, 6267 insertions(+), 3 deletions(-) create mode 100644 SPEC.md create mode 100644 benchmarks/benchmark_output.md create mode 100644 docs/api-reference.md create mode 100644 docs/llm-best-practices.md create mode 100644 docs/syntax-cheatsheet.md create mode 100644 pom.xml create mode 100644 src/main/java/com/zonformat/zon/Cli.java create mode 100644 src/main/java/com/zonformat/zon/Constants.java create mode 100644 src/main/java/com/zonformat/zon/Zon.java create mode 100644 src/main/java/com/zonformat/zon/ZonDecodeError.java create mode 100644 src/main/java/com/zonformat/zon/ZonDecoder.java create mode 100644 src/main/java/com/zonformat/zon/ZonEncoder.java create mode 100644 src/test/java/com/zonformat/zon/CanonicalNumbersTest.java create mode 100644 src/test/java/com/zonformat/zon/ConformanceDecoderTest.java create mode 100644 src/test/java/com/zonformat/zon/ConformanceEncoderTest.java create mode 100644 src/test/java/com/zonformat/zon/SecurityLimitsTest.java create mode 100644 src/test/java/com/zonformat/zon/SecurityTest.java create mode 100644 src/test/java/com/zonformat/zon/StrictModeTest.java create mode 100644 src/test/java/com/zonformat/zon/ZonCodecTest.java diff --git a/.gitignore b/.gitignore index 524f096..ef3778f 100644 --- a/.gitignore +++ b/.gitignore @@ -22,3 +22,30 @@ # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml hs_err_pid* replay_pid* + +# Maven +target/ +pom.xml.tag +pom.xml.releaseBackup +pom.xml.versionsBackup +pom.xml.next +release.properties +dependency-reduced-pom.xml +buildNumber.properties +.mvn/timing.properties +.mvn/wrapper/maven-wrapper.jar + +# IDE +.idea/ +*.iml +*.ipr +*.iws +.project +.classpath +.settings/ +.factorypath +.vscode/ + +# OS +.DS_Store +Thumbs.db diff --git a/LICENSE b/LICENSE index 39444f9..7edc976 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2025 ZON +Copyright (c) 2025 ZON-FORMAT (Roni Bhakta) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 1edaf81..d65ba78 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,184 @@ -# ZOD-JAVA -ZON-FORMAT FOR JAVA +# ZON Format for Java + +[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) +[![Java 11+](https://img.shields.io/badge/Java-11%2B-blue)](https://www.java.com/) + +**Zero Overhead Notation** - A compact, human-readable data serialization format optimized for LLM token efficiency. + +Copyright (c) 2025 ZON-FORMAT (Roni Bhakta) + +## Overview + +ZON achieves **23.8% token reduction** compared to JSON while maintaining 100% data fidelity and LLM retrieval accuracy. It uses: + +- **Single-character primitives**: `T`/`F` for booleans +- **Table format**: Uniform arrays encoded as column headers + data rows +- **Minimal quoting**: Unquoted safe strings + +## Installation + +### Maven + +```xml + + com.zonformat + zon-java + 1.0.5 + +``` + +### Gradle + +```groovy +implementation 'com.zonformat:zon-java:1.0.5' +``` + +## Quick Start + +```java +import com.zonformat.zon.Zon; +import java.util.*; + +public class Example { + public static void main(String[] args) { + // Create data + Map data = new LinkedHashMap<>(); + data.put("name", "Alice"); + data.put("age", 30); + data.put("active", true); + + // Encode to ZON + String zon = Zon.encode(data); + System.out.println(zon); + // Output: + // active:T + // age:30 + // name:Alice + + // Decode back to Java + Object decoded = Zon.decode(zon); + } +} +``` + +## Table Format + +Arrays of uniform objects are encoded efficiently: + +```java +List> users = new ArrayList<>(); + +Map user1 = new LinkedHashMap<>(); +user1.put("id", 1); +user1.put("name", "Alice"); +user1.put("active", true); +users.add(user1); + +Map user2 = new LinkedHashMap<>(); +user2.put("id", 2); +user2.put("name", "Bob"); +user2.put("active", false); +users.add(user2); + +Map data = new LinkedHashMap<>(); +data.put("users", users); + +String zon = Zon.encode(data); +// Output: +// users:@(2):active,id,name +// T,1,Alice +// F,2,Bob +``` + +## API Reference + +### Encoding + +```java +// Basic encoding +String zon = Zon.encode(data); + +// Using encoder instance +ZonEncoder encoder = new ZonEncoder(); +String zon = encoder.encode(data); +``` + +### Decoding + +```java +// Strict mode (default) - validates row/field counts +Object data = Zon.decode(zonString); + +// Non-strict mode - tolerates mismatches +Object data = Zon.decode(zonString, false); + +// Using decoder instance +ZonDecoder decoder = new ZonDecoder(false); +Object data = decoder.decode(zonString); +``` + +### Error Handling + +```java +try { + Object data = Zon.decode(invalidZon); +} catch (ZonDecodeError e) { + System.out.println("Error code: " + e.getCode()); + System.out.println("Line: " + e.getLine()); + System.out.println("Context: " + e.getContext()); +} +``` + +## CLI Usage + +Build the CLI jar: + +```bash +mvn package +``` + +Use the CLI: + +```bash +# Encode JSON to ZON +java -jar target/zon-java-1.0.5-cli.jar encode data.json > data.zonf + +# Decode ZON to JSON +java -jar target/zon-java-1.0.5-cli.jar decode data.zonf > data.json +``` + +## Token Efficiency + +| Format | Tokens | Savings | +|--------|--------|---------| +| JSON (formatted) | 1,300 | - | +| JSON (compact) | 802 | 38% | +| ZON | 692 | 47% | + +## Use Cases + +✅ **Use ZON for:** +- LLM prompt contexts (RAG, few-shot examples) +- Configuration files +- Log storage and analysis +- Tabular data interchange + +❌ **Don't use ZON for:** +- Public REST APIs (use JSON for compatibility) +- Real-time streaming (not yet supported) +- Files requiring comments (use YAML/JSONC) + +## Format Specification + +See [SPEC.md](SPEC.md) for the complete ZON format specification. + +## License + +MIT License - Copyright (c) 2025 ZON-FORMAT (Roni Bhakta) + +See [LICENSE](LICENSE) for details. + +## Related Projects + +- [zon-ts](https://github.com/ZON-Format/zon-TS) - TypeScript implementation +- [ZON Specification](SPEC.md) - Format specification diff --git a/SPEC.md b/SPEC.md new file mode 100644 index 0000000..0e599b9 --- /dev/null +++ b/SPEC.md @@ -0,0 +1,1112 @@ +# ZON Specification + +## Zero Overhead Notation - Formal Specification + +**Version:** 1.0.5 + +**Date:** 2025-11-28 + +**Status:** Stable Release + +**Authors:** ZON Format Contributors + +**License:** MIT + +--- + +## Abstract + +Zero Overhead Notation (ZON) is a compact, line-oriented text format that encodes the JSON data model with minimal redundancy optimized for large language model token efficiency. ZON achieves up to 23.8% token reduction compared to JSON through single-character primitives (`T`, `F`), null as `null`, explicit table markers (`@`), colon-less nested structures, and intelligent quoting rules. Arrays of uniform objects use tabular encoding with column headers declared once; metadata uses flat key-value pairs. This specification defines ZON's concrete syntax, canonical value formatting, encoding/decoding behavior, conformance requirements, and strict validation rules. ZON provides deterministic, lossless representation achieving 100% LLM retrieval accuracy in benchmarks. + +## Status of This Document + +This document is a **Stable Release v1.0.4** and defines normative behavior for ZON encoders, decoders, and validators. Implementation feedback should be reported at https://github.com/ZON-Format/zon-TS. + +Backward compatibility is maintained across v1.0.x releases. Major versions (v2.x) may introduce breaking changes. + +## Normative References + +**[RFC2119]** Bradner, S., "Key words for use in RFCs to Indicate Requirement Levels", BCP 14, RFC 2119, March 1997. +https://www.rfc-editor.org/rfc/rfc2119 + +**[RFC8174]** Leiba, B., "Ambiguity of Uppercase vs Lowercase in RFC 2119 Key Words", BCP 14, RFC 8174, May 2017. +https://www.rfc-editor.org/rfc/rfc8174 + +**[RFC8259]** Bray, T., "The JavaScript Object Notation (JSON) Data Interchange Format", STD 90, RFC 8259, December 2017. +https://www.rfc-editor.org/rfc/rfc8259 + +## Informative References + +**[RFC4180]** Shafranovich, Y., "Common Format and MIME Type for Comma-Separated Values (CSV) Files", RFC 4180, October 2005. +https://www.rfc-editor.org/rfc/rfc4180 + +**[ISO8601]** ISO 8601:2019, "Date and time — Representations for information interchange". + +**[UNICODE]** The Unicode Consortium, "The Unicode Standard", Version 15.1, September 2023. + +--- + +## Table of Contents + +1. [Introduction](#introduction) +2. [Terminology and Conventions](#1-terminology-and-conventions) +3. [Data Model](#2-data-model) +4. [Encoding Normalization](#3-encoding-normalization) +5. [Decoding Interpretation](#4-decoding-interpretation) +6. [Concrete Syntax](#5-concrete-syntax) +7. [Primitives](#6-primitives) +8. [Strings and Keys](#7-strings-and-keys) +9. [Objects](#8-objects) +10. [Arrays](#9-arrays) +11. [Table Format](#10-table-format) +12. [Quoting and Escaping](#11-quoting-and-escaping) +13. [Whitespace](#12-whitespace-and-line-endings) +14. [Conformance](#13-conformance-and-options) +15. [Strict Mode Errors](#14-strict-mode-errors) +16. [Security](#15-security-considerations) +17. [Internationalization](#16-internationalization) +18. [Interoperability](#17-interoperability) +19. [Media Type](#18-media-type) +20. [Error Handling](#19-error-handling) +21. [Appendices](#appendices) + +--- + +## Introduction (Informative) + +### Purpose + +ZON addresses token bloat in JSON while maintaining structural fidelity. By declaring column headers once, using single-character tokens, and eliminating redundant punctuation, ZON achieves optimal compression for LLM contexts. + +### Design Goals + +1. **Minimize tokens** - Every character counts in LLM context windows +2. **Preserve structure** - 100% lossless round-trip conversion +3. **Human readable** - Debuggable, understandable format +4. **LLM friendly** - Explicit markers aid comprehension +5. **Deterministic** - Same input → same output +6. **Deep Nesting** - Efficiently handles complex, recursive structures + +### Use Cases + +✅ **Use ZON for:** +- LLM prompt contexts (RAG, few-shot examples) +- Log storage and analysis +- Configuration files +- Browser storage (localStorage) +- Tabular data interchange +- **Complex nested data structures** (ZON excels here) + +❌ **Don't use ZON for:** +- Public REST APIs (use JSON for compatibility) +- Real-time streaming protocols (not yet supported) +- Files requiring comments (use YAML/JSONC) + +### Example + +**JSON (118 chars):** +```json +{"users":[{"id":1,"name":"Alice","active":true},{"id":2,"name":"Bob","active":false}]} +``` + +**ZON (64 chars, 46% reduction):** +```zon +users:@(2):active,id,name +T,1,Alice +F,2,Bob +``` + +--- + +## 1. Terminology and Conventions + +### 1.1 RFC2119 Keywords + +The keywords **MUST**, **MUST NOT**, **REQUIRED**, **SHALL**, **SHALL NOT**, **SHOULD**, **SHOULD NOT**, **RECOMMENDED**, **MAY**, and **OPTIONAL** are interpreted per [RFC2119] and [RFC8174]. + +### 1.2 Definitions + +**ZON document** - UTF-8 text conforming to this specification + +**Line** - Character sequence terminated by LF (`\n`) + +**Key-value pair** - Line pattern: `key:value` + +**Table** - Array of uniform objects with header + data rows + +**Table header** - Pattern: `key:@(N):columns` or `@(N):columns` + +**Meta separator** - Colon (`:`) separating keys/values + +**Table marker** - At-sign (`@`) indicating table structure + +**Primitive** - Boolean, null, number, or string (not object/array) + +**Uniform array** - All elements are objects with identical keys + +**Strict mode** - Validation enforcing row/column counts + +--- + +## 2. Data Model + +### 2.1 JSON Compatibility + +ZON encodes the JSON data model: +- **Primitives**: `string | number | boolean | null` +- **Objects**: `{ [string]: JsonValue }` +- **Arrays**: `JsonValue[]` + +### 2.2 Ordering + +- **Arrays**: Order MUST be preserved exactly +- **Objects**: Key order MUST be preserved + - Encoders SHOULD sort keys alphabetically + - Decoders MUST preserve document order + +### 2.3 Canonical Numbers + +**Requirements for ENCODER:** + +1. **No leading zeros:** `007` → invalid +2. **No trailing zeros:** `3.14000` → `3.14` +3. **No unnecessary decimals:** Integer `5` stays `5`, not `5.0` +4. **No scientific notation:** `1e6` → `1000000`, `1e-3` → `0.001` +5. **Special values map to null:** + - `NaN` → `null` + - `Infinity` → `null` + - `-Infinity` → `null` + +**Implementation:** +- Integers: Use standard string representation +- Floats: Ensure decimal point present, convert exponents to fixed-point +- Special values: Normalized to `null` before encoding + +**Examples:** +``` +1000000 ✓ (not 1e6 or 1e+6) +0.001 ✓ (not 1e-3) +3.14 ✓ (not 3.140000) +42 ✓ (integer, no decimal) +null ✓ (was NaN or Infinity) +``` + +**Scientific notation:** +``` +1e6 ⚠️ Decoders MUST accept, encoders SHOULD avoid (prefer 1000000) +2.5E-3 ⚠️ Decoders MUST accept, encoders SHOULD avoid (prefer 0.0025) +``` + +**Requirements:** +- Encoders MUST ensure `decode(encode(x)) === x` (round-trip fidelity) +- No trailing zeros in fractional part (except `.0` for float clarity) +- No leading zeros (except standalone `0`) +- `-0` normalizes to `0` + +### 2.4 Special Values + +- `NaN` → `null` +- `Infinity` → `null` +- `-Infinity` → `null` + +--- + +## 3. Encoding Normalization + +### 3.1 Host Type Mapping + +Encoders MUST normalize non-JSON types before encoding: + +**JavaScript/TypeScript:** +| Input | ZON Output | Notes | +|-------|------------|-------| +| `undefined` | `null` | Null | +| `Symbol()` | `null` | Not serializable | +| `function() {}` | `null` | Not serializable | +| `new Date()` | `"2025-11-28T10:00:00Z"` | ISO 8601 string | +| `new Set([1,2])` | `"[1,2]"` | Convert to array | +| `new Map([[k,v]])` | `"{k:v}"` | Convert to object | +| `BigInt(999)` | `"999"` | String if outside safe range | + +**Python:** +| Input | ZON Output | Notes | +|-------|------------|-------| +| `None` | `null` | Null | +| `datetime.now()` | `"2025-11-28T10:00:00Z"` | ISO 8601 | +| `set([1,2])` | `"[1,2]"` | Convert to list | +| `Decimal('3.14')` | `3.14` or `"3.14"` | Number if no precision loss | +| `bytes(b'\x00')` | `""` | Base64 encode | + +Implementations MUST document their normalization policy. + +--- + +## 4. Decoding Interpretation + +### 4.1 Type Inference + +**Unquoted tokens:** +``` +T → true (boolean) +F → false (boolean) +null → null +42 → 42 (integer) +3.14 → 3.14 (float) +1e6 → 1000000 (number) +05 → "05" (string, leading zero) +hello → "hello" (string) +``` + +**Quoted tokens:** +``` +"T" → "T" (string, not boolean) +"123" → "123" (string, not number) +"hello" → "hello" (string) +"" → "" (empty string) +``` + +### 4.2 Escape Sequences + +Only these escapes are valid: +- `\\` → `\` +- `\"` → `"` +- `\n` → newline +- `\r` → carriage return +- `\t` → tab + +**Invalid escapes MUST error:** +``` +"\x41" ❌ Invalid +"\u0041" ❌ Invalid (use literal UTF-8) +"\b" ❌ Invalid +``` + +### 4.3 Leading Zeros + +Numbers with leading zeros are strings: +``` +05 → "05" (string) +007 → "007" (string) +0 → 0 (number) +``` + +--- + +## 5. Concrete Syntax + +### 5.1 Line Structure + +ZON documents are line-oriented: +- Lines end with LF (`\n`) +- Empty lines are whitespace-only +- Blank lines separate metadata from tables + +### 5.2 Root Form + +Determined by first non-empty line: + +**Root table:** +```zon +@(2):id,name +1,Alice +2,Bob +``` + +**Root object:** +```zon +name:Alice +age:30 +``` + +**Root primitive:** +```zon +42 +``` + +### 5.3 ABNF Grammar + +```abnf +document = object-form / table-form / primitive-form +object-form = *(key-value / table-section) +table-form = table-header 1*data-row +primitive-form = value + +key-value = key ":" value LF +table-header = [key ":"] "@" "(" count ")" ":" column-list LF +table-section = table-header 1*data-row +data-row = value *("," value) LF + +key = unquoted-string / quoted-string +value = primitive / quoted-compound +primitive = "T" / "F" / "null" / number / unquoted-string +quoted-compound = quoted-string ; Contains JSON-like notation + +column-list = column *("," column) +column = key +count = 1*DIGIT +number = ["-"] 1*DIGIT ["." 1*DIGIT] [("e"/"E") ["+"/"-"] 1*DIGIT] +``` + +--- + +## 6. Primitives + +### 6.1 Booleans + +**Encoding:** +- `true` → `T` +- `false` → `F` + +**Decoding:** +- `T` (case-sensitive) → `true` +- `F` (case-sensitive) → `false` + +**Rationale:** 75% character reduction + +### 6.2 Null + +**Encoding:** +- `null` → `null` (4-character literal) + +**Decoding:** +- `null` → `null` +- Also accepts (case-insensitive): `none`, `nil` + +**Rationale:** Clarity and readability over minimal compression + +### 6.3 Numbers + +**Examples:** +```zon +age:30 +price:19.99 +score:-42 +temp:98.6 +large:1000000 +``` + +**Rules:** +- Integers without decimal: `42` +- Floats with decimal: `3.14` +- Negatives with `-` prefix: `-17` +- No thousands separators +- Decimal separator is `.` (period) + +--- + +## 7. Strings and Keys + +### 7.1 Safe Strings (Unquoted) + +Pattern: `^[a-zA-Z0-9_\-\.]+$` + +**Examples:** +```zon +name:Alice +user_id:u123 +version:v1.0.4 +api-key:sk_test_key +``` + +### 7.2 Required Quoting + +Quote strings if they: + +1. **Contain structural chars:** `,`, `:`, `[`, `]`, `{`, `}`, `"` +2. **Match literal keywords:** `T`, `F`, `true`, `false`, `null`, `none`, `nil` +3. **Look like PURE numbers:** `123`, `3.14`, `1e6` (Complex patterns like `192.168.1.1` or `v1.0.5` do NOT need quoting) +4. **Have whitespace:** Leading/trailing spaces, internal spaces (MUST quote to preserve) +5. **Are empty:** `""` (MUST quote to distinguish from `null`) +6. **Contain escapes:** Newlines, tabs, quotes (MUST quote to prevent structure breakage) + +**Examples:** +```zon +message:"Hello, world" +path:"C:\Users\file" +empty:"" +quoted:"true" +number:"123" +spaces:" padded " +``` + +### 7.3 ISO Date Optimization + +ISO 8601 dates MAY be unquoted: +```zon +created:2025-11-28 +timestamp:2025-11-28T10:00:00Z +time:10:30:00 +``` + +Decoders interpret these as strings (not parsed as Date objects unless application logic does so). + +--- + +## 8. Objects + +### 8.1 Flat Objects + +```zon +active:T +age:30 +name:Alice +``` + +Decodes to: +```json +{"active": true, "age": 30, "name": "Alice"} +``` + +### 8.2 Nested Objects + +Quoted compound notation: + +```zon +config:"{database:{host:localhost,port:5432},cache:{ttl:3600}}" +``` + +Alternatively using JSON string: +```zon +config:"{"database":{"host":"localhost","port":5432}}" +``` + +### 8.3 Empty Objects + +```zon +metadata:"{}" +``` + +--- + +## 9. Arrays + +### 9.1 Format Selection + +**Decision algorithm:** + +1. All elements are objects with same keys? → **Table format** +2. Otherwise → **Inline quoted format** + +### 9.2 Inline Arrays + +**Primitive arrays:** +```zon +tags:"[nodejs,typescript,llm]" +numbers:"[1,2,3,4,5]" +flags:"[T,F,T]" +mixed:"[hello,123,T,null]" +``` + +**Empty:** +```zon +items:"[]" +``` + +### 9.3 Irregularity Threshold + +**Uniform detection:** + +Calculate irregularity score: +``` +For each pair of objects (i, j): + similarity = shared_keys / (keys_i + keys_j - shared_keys) # Jaccard +Avg_similarity = mean(all_similarities) +Irregularity = 1 - avg_similarity +``` + +**Threshold:** +- If irregularity > 0.6 → Use inline format +- If irregularity ≤ 0.6 → Use table format + +--- + +## 10. Table Format + +### 10.1 Header Syntax + +**With key:** +``` +users:@(2):active,id,name +``` + +**Root array:** +``` +@(2):active,id,name +``` + +**Components:** +- `users` - Array key (optional for root) +- `@` - Table marker (REQUIRED) +- `(2)` - Row count (REQUIRED for strict mode) +- `:` - Separator (REQUIRED) +- `active,id,name` - Columns, comma-separated (REQUIRED) + +### 10.2 Column Order + +Columns SHOULD be sorted alphabetically: + +```zon +users:@(2):active,id,name,role +T,1,Alice,admin +F,2,Bob,user +``` + +### 10.3 Data Rows + +Each row is comma-separated values: + +```zon +T,1,Alice,admin +``` + +**Rules:** +- One row per line +- Values encoded as primitives (§6-7) +- Field count MUST equal column count (strict mode) +- Missing values encode as `null` + +### 10.4 Sparse Tables (v2.0) + +Optional fields append as `key:value`: + +```zon +users:@(3):id,name +1,Alice +2,Bob,role:admin,score:98 +3,Carol +``` + +**Row 2 decodes to:** +```json +{"id": 2, "name": "Bob", "role": "admin", "score": 98} +``` + +--- + +## 11. Quoting and Escaping + +### 11.1 CSV Quoting (RFC 4180) + +For table values containing commas: + +```zon +messages:@(1):id,text +1,"He said ""hello"" to me" +``` + +**Rules:** +- Wrap in double quotes: `"value"` +- Escape internal quotes by doubling: `"` → `""` + +### 11.2 Escape Sequences + +```zon +multiline:"Line 1\nLine 2" +tab:"Col1\tCol2" +quote:"She said \"Hi\"" +backslash:"C:\\path\\file" +``` + +**Valid escapes:** +- `\\` → `\` +- `\"` → `"` +- `\n` → newline +- `\r` → CR +- `\t` → tab + +### 11.3 Unicode + +Use literal UTF-8 (no `\uXXXX` escapes): + +```zon +chinese:王小明 +emoji:✅ +arabic:مرحبا +``` + +--- + +## 12. Whitespace and Line Endings + +### 12.1 Encoding Rules + +Encoders MUST: +- Use LF (`\n`) line endings +- NOT emit trailing whitespace on lines +- NOT emit trailing newline at EOF (RECOMMENDED) +- MAY emit one blank line between metadata and table + +### 12.2 Decoding Rules + +Decoders SHOULD: +- Accept LF or CRLF (normalize to LF) +- Ignore trailing whitespace per line +- Treat multiple blank lines as single separator + +--- + +## 13. Conformance and Options + +### 13.1 Encoder Checklist + +✅ **A conforming encoder MUST:** + +- [ ] Emit UTF-8 with LF line endings +- [ ] Encode booleans as `T`/`F` +- [ ] Encode null as `null` +- [ ] Emit canonical numbers (§2.3) +- [ ] Normalize NaN/Infinity to `null` +- [ ] Detect uniform arrays → table format +- [ ] Emit table headers: `key:@(N):columns` +- [ ] Sort columns alphabetically +- [ ] Sort object keys alphabetically +- [ ] Quote strings per §7.2-7.3 +- [ ] Use only valid escapes (§11.2) +- [ ] Preserve array order +- [ ] Preserve key order +- [ ] Ensure round-trip: `decode(encode(x)) === x` + +### 13.2 Decoder Checklist + +✅ **A conforming decoder MUST:** + +- [ ] Accept UTF-8 (LF or CRLF) +- [ ] Decode `T` → true, `F` → false, `null` → null +- [ ] Parse decimal and exponent numbers +- [ ] Treat leading-zero numbers as strings +- [ ] Unescape quoted strings +- [ ] Error on invalid escapes +- [ ] Parse table headers: `key:@(N):columns` +- [ ] Split rows by comma (CSV-aware) +- [ ] Preserve array order +- [ ] Preserve key order +- [ ] **Error Codes:** + - `E001`: Row count mismatch (strict mode) + - `E002`: Field count mismatch (strict mode) + - `E301`: Document size > 100MB + - `E302`: Line length > 1MB + - `E303`: Array length > 1M items + - `E304`: Object key count > 100K +- [ ] Enforce row count (strict mode) +- [ ] Enforce field count (strict mode) + +### 13.3 Strict Mode + +**Enabled by default** in reference implementation. + +Enforces: +- Table row count = declared `(N)` +- Each row field count = column count +- No malformed headers +- No invalid escapes +- No unterminated strings + +**Non-strict mode** MAY tolerate count mismatches. + +--- + +## 14. Schema Validation (LLM Evals) + +ZON includes a runtime schema validation library designed for LLM guardrails. It allows defining expected structures and validating LLM outputs against them. + +### 14.1 Schema Definition + +```typescript +import { zon } from 'zon-format'; + +const UserSchema = zon.object({ + name: zon.string().describe("Full name"), + age: zon.number(), + role: zon.enum(['admin', 'user']), + tags: zon.array(zon.string()).optional() +}); +``` + +### 14.2 Prompt Generation + +Schemas can generate system prompts to guide LLMs: + +```typescript +const prompt = UserSchema.toPrompt(); +// Output: +// object: +// - name: string - Full name +// - age: number +// - role: enum(admin, user) +// - tags: array of [string] (optional) +``` + +### 14.3 Validation + +```typescript +import { validate } from 'zon-format'; + +const result = validate(llmOutputString, UserSchema); + +if (result.success) { + console.log(result.data); // Typed data +} else { + console.error(result.error); // "Expected number at age, got string" +} +``` + +--- + +## 15. Strict Mode Errors + +### 15.1 Table Errors + +| Code | Error | Example | +|------|-------|---------| +| **E001** | Row count mismatch | `@(2)` but 3 rows | +| **E002** | Field count mismatch | 3 columns, row has 2 values | +| **E003** | Malformed header | Missing `@`, `(N)`, or `:` | +| **E004** | Invalid column name | Unescaped special chars | + +### 14.2 Syntax Errors + +| Code | Error | Example | +|------|-------|---------| +| **E101** | Invalid escape | `"\x41"` instead of `"A"` | +| **E102** | Unterminated string | `"hello` (no closing quote) | +| **E103** | Missing colon | `name Alice` → `name:Alice` | +| **E104** | Empty key | `:value` | + +### 14.3 Format Errors + +| Code | Error | Example | +|------|-------|---------| +| **E201** | Trailing whitespace | Line ends with spaces | +| **E202** | CRLF line ending | `\r\n` instead of `\n` | +| **E203** | Multiple blank lines | More than one consecutive | +| **E204** | Trailing newline | Document ends with `\n` | + +--- + +## 17. Security Considerations + +### 15.1 Resource Limits + +Implementations SHOULD limit: +- Document size: 100 MB +- Line length: 1 MB +- Nesting depth: 100 levels +- Array length: 1,000,000 +- Object keys: 100,000 + +Prevents denial-of-service attacks. + +### 15.2 Validation + +- Validate UTF-8 strictly +- Error on invalid escapes +- Reject malformed numbers +- Limit recursion depth + +### 15.3 Injection Prevention + +ZON does not execute code. Applications MUST sanitize before: +- SQL queries +- Shell commands +- HTML rendering + +--- + +## 18. Internationalization + +### 16.1 Character Encoding + +**REQUIRED:** UTF-8 without BOM + +Decoders MUST: +- Reject invalid UTF-8 +- Reject BOM (U+FEFF) at start + +### 16.2 Unicode + +Full Unicode support: +- Emoji: `✅`, `🚀` +- CJK: `王小明`, `日本語` +- RTL: `مرحبا`, `שלום` + +### 16.3 Locale Independence + +- Decimal separator: `.` (period) +- No thousands separators +- ISO 8601 dates for internationalization + +--- + +## 19. Interoperability + +### 17.1 JSON + +**ZON → JSON:** Lossless +**JSON → ZON:** Lossless, with 35-50% compression for tabular data + +**Example:** +```json +{"users": [{"id": 1, "name": "Alice"}]} +``` +↓ ZON (42% smaller) +```zon +users:@(1):id,name +1,Alice +``` + +### 17.2 CSV + +**CSV → ZON:** Add type awareness +**ZON → CSV:** Table rows export cleanly + +**Advantages over CSV:** +- Type preservation +- Metadata support +- Nesting capability + +### 17.3 TOON + +**Comparison:** +- ZON: Flat, `@(N)`, `T/F/null` → Better compression +- TOON: Indented, `[N]{fields}:`, `true/false` → Better readability +Both are LLM-optimized; choose based on data shape. + +--- + +## 20. Media Type & File Extension + +### 18.1 File Extension + +**Extension:** `.zonf` + +ZON files use the `.zonf` extension (ZON Format) for all file operations. + +**Examples:** +``` +data.zonf +users.zonf +config.zonf +``` + +### 18.2 Media Type + +**Media type:** `text/zon` + +**Status:** Provisional (not yet registered with IANA) + +**Charset:** UTF-8 (always) + +ZON documents are **always UTF-8 encoded**. The `charset=utf-8` parameter may be specified but defaults to UTF-8 when omitted. + +**HTTP Content-Type header:** +```http +Content-Type: text/zon +Content-Type: text/zon; charset=utf-8 # Explicit (optional) +``` + +### 18.3 MIME Type Usage + +**Web servers:** +```nginx +# nginx +location ~ \.zonf$ { + default_type text/zon; + charset utf-8; +} +``` + +```apache +# Apache +AddType text/zon .zonf +AddDefaultCharset utf-8 +``` + +**HTTP responses:** +```http +HTTP/1.1 200 OK +Content-Type: text/zon; charset=utf-8 +Content-Length: 1234 + +users:@(2):id,name +1,Alice +2,Bob +``` + +### 18.4 Character Encoding + +**Normative requirement:** ZON files MUST be UTF-8 encoded. + +**Rationale:** +- Universal support across programming languages +- Compatible with JSON (RFC 8259) +- No byte-order mark (BOM) required +- Supports full Unicode character set + +**Encoding declaration:** Not required (always UTF-8) + +### 18.5 IANA Registration + +**Current status:** Not registered + +**Future work:** Formal registration with IANA is planned for v2.0. + +**Template for registration:** +``` +Type name: text +Subtype name: zon +Required parameters: None +Optional parameters: charset (default: utf-8) +Encoding considerations: Always UTF-8 +Security considerations: See §15 +Interoperability considerations: None known +Published specification: This document +Applications that use this media type: Data serialization for LLMs +Fragment identifier considerations: N/A +Additional information: + File extension: .zonf + Macintosh file type code: TEXT + Uniform Type Identifier: public.zon +Person & email address: See repository +Intended usage: COMMON +Restrictions on usage: None +``` + +### 18.2 IANA Status + +Provisional (not yet IANA-registered). May pursue formal registration at v2.0. + +--- + +## Appendices + +### Appendix A: Examples + +**A.1 Simple Object** +```zon +active:T +age:30 +name:Alice +``` + +**A.2 Table** +```zon +users:@(2):active,id,name +T,1,Alice +F,2,Bob +``` + +**A.3 Mixed** +```zon +tags:"[api,auth]" +version:1.0 +users:@(1):id,name +1,Alice +``` + +**A.4 Root Array** +```zon +@(2):id,name +1,Alice +2,Bob +``` + +### Appendix B: Test Suite + +**Coverage:** +- ✅ 28/28 unit tests +- ✅ 27/27 roundtrip tests +- ✅ 100% data integrity + +**Test categories:** +- Primitives (T, F, null, numbers, strings) +- Tables (uniform arrays) +- Quoting, escaping +- Round-trip fidelity +- Edge cases, errors + +### Appendix C: Changelog + +**v1.0.4 (2025-11-29)** +- Disabled sequential column omission +- 100% LLM accuracy achieved +- All columns explicit + +**v1.0.2 (2025-11-27)** +- Irregularity threshold tuning +- ISO date detection +- Sparse table encoding + +**v1.0.1 (2025-11-26)** +- License: MIT +- Documentation updates + +**v1.0.0 (2025-11-26)** +- Initial stable release +- Single-character primitives +- Table format +- Lossless round-trip + +### Appendix D: Parsing Algorithm + +**Decoder flow:** +``` +1. Split by lines (LF/CRLF) +2. Detect root form (table / object / primitive) +3. If table: + a. Parse header: @(N):columns + b. Read N rows + c. Split by comma (CSV-aware) + d. Map to objects +4. If object: + a. Parse key:value pairs + b. Build object +5. Return decoded value +``` + +**CSV-aware row splitting:** +```javascript +function parseRow(line, columns) { + const values = []; + let current = ''; + let inQuotes = false; + + for (let i = 0; i < line.length; i++) { + const char = line[i]; + + if (char === '"' && !inQuotes) { + inQuotes = true; + } else if (char === '"' && inQuotes) { + if (line[i+1] === '"') { // Escaped quote + current += '"'; + i++; + } else { + inQuotes = false; + } + } else if (char === ',' && !inQuotes) { + values.push(parseValue(current.trim())); + current = ''; + } else { + current += char; + } + } + + values.push(parseValue(current.trim())); + return values; +} +``` + +### Appendix E: License + +MIT License + +Copyright (c) 2025 ZON-FORMAT (Roni Bhakta) + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +--- + +**End of Specification** diff --git a/benchmarks/benchmark_output.md b/benchmarks/benchmark_output.md new file mode 100644 index 0000000..fe9c904 --- /dev/null +++ b/benchmarks/benchmark_output.md @@ -0,0 +1,214 @@ +╔════════════════════════════════════════════════════════════════════════════╗ +║ ZON vs TOON vs CSV vs JSON BENCHMARK ║ +║ Token Efficiency Comparison ║ +║ Using GPT-5 o200k_base,Claude 3.5 (Anthropic), ║ +║ Llama 3 (Meta) tokenizer ║ +╚════════════════════════════════════════════════════════════════════════════╝ + +════════════════════════════════════════════════════════════════════════════════ +📊 Unified Dataset + Combined dataset with tabular, nested, and time-series data +──────────────────────────────────────────────────────────────────────────────── +📦 BYTE SIZES: + ZON: 1,399 bytes + TOON: 1,665 bytes + CSV: 1,384 bytes + YAML: 2,033 bytes + XML: 3,235 bytes + JSON (formatted): 2,842 bytes + JSON (compact): 1,854 bytes + +🔹 Tokenizer: GPT-4o (o200k) + ZON █████████░░░░░░░░░░░ 513 tokens 👑 + ├─ vs JSON formatted: -45.4% + ├─ vs JSON compact: -12.9% + ├─ vs TOON: -16.4% + ├─ vs CSV: -3.9% + ├─ vs YAML: -29.5% + └─ vs XML: -53.1% + + TOON ███████████░░░░░░░░░ 614 tokens + vs ZON: +19.7% + + CSV ██████████░░░░░░░░░░ 534 tokens + vs ZON: +4.1% + + YAML █████████████░░░░░░░ 728 tokens + vs ZON: +41.9% + + XML ████████████████████ 1,093 tokens + vs ZON: +113.1% + + JSON (cmp) ███████████░░░░░░░░░ 589 tokens + + +🔹 Tokenizer: Claude 3.5 (Anthropic) + ZON ██████████░░░░░░░░░░ 548 tokens + ├─ vs JSON formatted: -40.0% + ├─ vs JSON compact: -8.1% + ├─ vs TOON: -3.9% + ├─ vs CSV: +0.7% + ├─ vs YAML: -14.5% + └─ vs XML: -50.4% + + TOON ██████████░░░░░░░░░░ 570 tokens + vs ZON: +4.0% + + CSV ██████████░░░░░░░░░░ 544 tokens 👑 + vs ZON: -0.7% + + YAML ████████████░░░░░░░░ 641 tokens + vs ZON: +17.0% + + XML ████████████████████ 1,104 tokens + vs ZON: +101.5% + + JSON (cmp) ███████████░░░░░░░░░ 596 tokens + + +🔹 Tokenizer: Llama 3 (Meta) + ZON ██████████░░░░░░░░░░ 696 tokens 👑 + ├─ vs JSON formatted: -43.1% + ├─ vs JSON compact: -8.4% + ├─ vs TOON: -11.2% + ├─ vs CSV: -4.4% + ├─ vs YAML: -22.1% + └─ vs XML: -50.0% + + TOON ███████████░░░░░░░░░ 784 tokens + vs ZON: +12.6% + + CSV ██████████░░░░░░░░░░ 728 tokens + vs ZON: +4.6% + + YAML █████████████░░░░░░░ 894 tokens + vs ZON: +28.4% + + XML ████████████████████ 1,392 tokens + vs ZON: +100.0% + + JSON (cmp) ███████████░░░░░░░░░ 760 tokens + + +════════════════════════════════════════════════════════════════════════════════ +📊 Large Complex Nested Dataset + Deeply nested, non-uniform structure with mixed types +──────────────────────────────────────────────────────────────────────────────── +📦 BYTE SIZES: + ZON: 335,611 bytes + TOON: 607,194 bytes + CSV: 369,682 bytes + YAML: 607,189 bytes + XML: 1,016,540 bytes + JSON (formatted): 834,132 bytes + JSON (compact): 551,854 bytes + +🔹 Tokenizer: GPT-4o (o200k) + ZON █████████░░░░░░░░░░░ 143,661 tokens 👑 + ├─ vs JSON formatted: -49.5% + ├─ vs JSON compact: -23.8% + ├─ vs TOON: -36.1% + ├─ vs CSV: -12.9% + ├─ vs YAML: -36.1% + └─ vs XML: -57.1% + + TOON █████████████░░░░░░░ 224,940 tokens + vs ZON: +56.6% + + CSV ██████████░░░░░░░░░░ 164,919 tokens + vs ZON: +14.8% + + YAML █████████████░░░░░░░ 224,938 tokens + vs ZON: +56.6% + + XML ████████████████████ 335,239 tokens + vs ZON: +133.4% + + JSON (cmp) ███████████░░░░░░░░░ 188,604 tokens + + +🔹 Tokenizer: Claude 3.5 (Anthropic) + ZON █████████░░░░░░░░░░░ 145,652 tokens 👑 + ├─ vs JSON formatted: -46.8% + ├─ vs JSON compact: -21.3% + ├─ vs TOON: -26.0% + ├─ vs CSV: -9.9% + ├─ vs YAML: -26.0% + └─ vs XML: -55.5% + + TOON ████████████░░░░░░░░ 196,893 tokens + vs ZON: +35.2% + + CSV ██████████░░░░░░░░░░ 161,701 tokens + vs ZON: +11.0% + + YAML ████████████░░░░░░░░ 196,892 tokens + vs ZON: +35.2% + + XML ████████████████████ 327,274 tokens + vs ZON: +124.7% + + JSON (cmp) ███████████░░░░░░░░░ 185,136 tokens + + +🔹 Tokenizer: Llama 3 (Meta) + ZON ██████████░░░░░░░░░░ 230,838 tokens 👑 + ├─ vs JSON formatted: -43.0% + ├─ vs JSON compact: -16.5% + ├─ vs TOON: -26.7% + ├─ vs CSV: -9.2% + ├─ vs YAML: -26.7% + └─ vs XML: -51.9% + + TOON █████████████░░░░░░░ 314,824 tokens + vs ZON: +36.4% + + CSV ███████████░░░░░░░░░ 254,181 tokens + vs ZON: +10.1% + + YAML █████████████░░░░░░░ 314,820 tokens + vs ZON: +36.4% + + XML ████████████████████ 480,125 tokens + vs ZON: +108.0% + + JSON (cmp) ████████████░░░░░░░░ 276,405 tokens + + +════════════════════════════════════════════════════════════════════════════════ +📈 OVERALL SUMMARY +════════════════════════════════════════════════════════════════════════════════ + +🔹 GPT-4o (o200k) Summary: + ZON Wins: 2/2 datasets + Total Tokens: + ZON: █████████████░░░░░░░░░░░░░░░░░ 144,174 tokens + vs JSON (cmp): -23.8% + vs TOON: -36.1% + vs CSV: -12.9% + vs YAML: -36.1% + vs XML: -57.1% + +🔹 Claude 3.5 (Anthropic) Summary: + ZON Wins: 1/2 datasets + Total Tokens: + ZON: █████████████░░░░░░░░░░░░░░░░░ 146,200 tokens + vs JSON (cmp): -21.3% + vs TOON: -26.0% + vs CSV: -9.9% + vs YAML: -26.0% + vs XML: -55.5% + +🔹 Llama 3 (Meta) Summary: + ZON Wins: 2/2 datasets + Total Tokens: + ZON: ██████████████░░░░░░░░░░░░░░░░ 231,534 tokens + vs JSON (cmp): -16.5% + vs TOON: -26.6% + vs CSV: -9.2% + vs YAML: -26.7% + vs XML: -51.9% + +════════════════════════════════════════════════════════════════════════════════ +✨ Benchmark complete! +════════════════════════════════════════════════════════════════════════════════ diff --git a/docs/api-reference.md b/docs/api-reference.md new file mode 100644 index 0000000..14b3af5 --- /dev/null +++ b/docs/api-reference.md @@ -0,0 +1,342 @@ +# ZON API Reference for Java + +Copyright (c) 2025 ZON-FORMAT (Roni Bhakta) + +Complete API documentation for `zon-java` v1.0.5. + +## Installation + +### Maven + +```xml + + com.zonformat + zon-java + 1.0.5 + +``` + +### Gradle + +```groovy +implementation 'com.zonformat:zon-java:1.0.5' +``` + +--- + +## Main Classes + +### `Zon` + +Main entry point for ZON encoding and decoding operations. + +#### Methods + +##### `encode(Object data)` + +Encodes Java data to ZON format. + +**Parameters:** +- `data` (`Object`) - Java data to encode (Map, List, or primitive) + +**Returns:** `String` - ZON-formatted string + +**Throws:** `IllegalArgumentException` - if circular reference detected + +**Example:** +```java +import com.zonformat.zon.Zon; +import java.util.*; + +Map data = new LinkedHashMap<>(); +data.put("name", "Alice"); +data.put("age", 30); +data.put("active", true); + +String encoded = Zon.encode(data); +// Output: +// active:T +// age:30 +// name:Alice +``` + +##### `decode(String zonStr)` + +Decodes a ZON format string to Java objects with strict mode enabled. + +**Parameters:** +- `zonStr` (`String`) - The ZON-formatted string to decode + +**Returns:** `Object` - Decoded Java object (Map, List, or primitive) + +**Throws:** `ZonDecodeError` - if decoding fails or validation errors occur + +##### `decode(String zonStr, boolean strict)` + +Decodes a ZON format string to Java objects with specified strictness. + +**Parameters:** +- `zonStr` (`String`) - The ZON-formatted string to decode +- `strict` (`boolean`) - Whether to enable strict validation + +**Returns:** `Object` - Decoded Java object (Map, List, or primitive) + +**Throws:** `ZonDecodeError` - if decoding fails + +**Example:** +```java +import com.zonformat.zon.Zon; + +String zonData = "users:@(2):id,name\n1,Alice\n2,Bob"; +Object decoded = Zon.decode(zonData); + +// Non-strict mode +Object decoded = Zon.decode(zonData, false); +``` + +##### `getVersion()` + +Gets the library version. + +**Returns:** `String` - Version string (e.g., "1.0.5") + +--- + +### `ZonEncoder` + +Encoder class for converting Java objects to ZON format. + +#### Constructors + +##### `ZonEncoder()` + +Creates a new ZonEncoder with default settings. + +##### `ZonEncoder(int anchorInterval)` + +Creates a new ZonEncoder with custom anchor interval. + +**Parameters:** +- `anchorInterval` (`int`) - Anchor interval for large datasets + +#### Methods + +##### `encode(Object data)` + +Encodes Java data to ZON format. + +**Parameters:** +- `data` (`Object`) - Data to encode (Map, List, or primitive) + +**Returns:** `String` - ZON-formatted string + +**Throws:** `IllegalArgumentException` - if circular reference detected + +--- + +### `ZonDecoder` + +Decoder class for converting ZON format strings back to Java objects. + +#### Constructors + +##### `ZonDecoder()` + +Creates a new ZonDecoder with strict mode enabled. + +##### `ZonDecoder(boolean strict)` + +Creates a new ZonDecoder with specified strictness. + +**Parameters:** +- `strict` (`boolean`) - Whether to enable strict validation + +#### Methods + +##### `decode(String zonStr)` + +Decodes a ZON format string to Java objects. + +**Parameters:** +- `zonStr` (`String`) - ZON format string + +**Returns:** `Object` - Decoded Java object (Map, List, or primitive) + +**Throws:** `ZonDecodeError` - if decoding fails + +--- + +### `ZonDecodeError` + +Exception thrown when ZON decoding fails. + +#### Constructors + +##### `ZonDecodeError(String message)` + +Creates a new ZonDecodeError with a message. + +##### `ZonDecodeError(String message, String code)` + +Creates a new ZonDecodeError with a message and error code. + +##### `ZonDecodeError(String message, String code, Integer line, Integer column, String context)` + +Creates a new ZonDecodeError with full details. + +#### Methods + +##### `getCode()` + +Gets the error code. + +**Returns:** `String` - Error code or null + +##### `getLine()` + +Gets the line number where the error occurred. + +**Returns:** `Integer` - Line number or null + +##### `getColumn()` + +Gets the column position. + +**Returns:** `Integer` - Column position or null + +##### `getContext()` + +Gets the context snippet. + +**Returns:** `String` - Context or null + +--- + +## Error Codes + +| Code | Description | Example | +|------|-------------|---------| +| `E001` | Row count mismatch | Declared `@(3)` but only 2 rows provided | +| `E002` | Field count mismatch | Declared 3 columns but row has 2 values | +| `E301` | Document size exceeds 100MB | Prevents memory exhaustion | +| `E302` | Line length exceeds 1MB | Prevents buffer overflow | +| `E303` | Array length exceeds 1M items | Prevents excessive iteration | +| `E304` | Object key count exceeds 100K | Prevents hash collision | + +--- + +## Constants + +Located in `com.zonformat.zon.Constants`: + +```java +public static final char TABLE_MARKER = '@'; +public static final char META_SEPARATOR = ':'; +public static final long MAX_DOCUMENT_SIZE = 100 * 1024 * 1024; // 100 MB +public static final int MAX_LINE_LENGTH = 1024 * 1024; // 1 MB +public static final int MAX_ARRAY_LENGTH = 1_000_000; // 1M items +public static final int MAX_OBJECT_KEYS = 100_000; // 100K keys +public static final int MAX_NESTING_DEPTH = 100; // 100 levels +``` + +--- + +## Type Mapping + +### Java to ZON + +| Java Type | ZON Encoding | +|-----------|--------------| +| `Boolean` (true) | `T` | +| `Boolean` (false) | `F` | +| `null` | `null` | +| `Integer`, `Long` | Number without decimal | +| `Double`, `Float` | Number with decimal | +| `String` | Quoted or unquoted | +| `Map` | Object notation | +| `List` | Array or table | + +### ZON to Java + +| ZON Value | Java Type | +|-----------|-----------| +| `T`, `true`, `TRUE` | `Boolean` (true) | +| `F`, `false`, `FALSE` | `Boolean` (false) | +| `null`, `none`, `nil` | `null` | +| Integer number | `Long` | +| Decimal number | `Double` | +| String | `String` | +| Object | `LinkedHashMap` | +| Array/Table | `ArrayList` | + +--- + +## Complete Examples + +### Example 1: Simple Object + +```java +Map data = new LinkedHashMap<>(); +data.put("name", "ZON Format"); +data.put("version", "1.0.5"); +data.put("active", true); +data.put("score", 98.5); + +String encoded = Zon.encode(data); +// active:T +// name:ZON Format +// score:98.5 +// version:"1.0.5" + +Object decoded = Zon.decode(encoded); +``` + +### Example 2: Uniform Table + +```java +List> employees = new ArrayList<>(); + +Map e1 = new LinkedHashMap<>(); +e1.put("id", 1); +e1.put("name", "Alice"); +e1.put("dept", "Eng"); +e1.put("salary", 85000); +employees.add(e1); + +Map e2 = new LinkedHashMap<>(); +e2.put("id", 2); +e2.put("name", "Bob"); +e2.put("dept", "Sales"); +e2.put("salary", 72000); +employees.add(e2); + +Map data = new LinkedHashMap<>(); +data.put("employees", employees); + +String encoded = Zon.encode(data); +// employees:@(2):dept,id,name,salary +// Eng,1,Alice,85000 +// Sales,2,Bob,72000 +``` + +### Example 3: Error Handling + +```java +try { + String invalidZon = "users:@(3):id,name\n1,Alice"; + Object data = Zon.decode(invalidZon); +} catch (ZonDecodeError e) { + System.out.println("Error: " + e.getMessage()); + System.out.println("Code: " + e.getCode()); // "E001" + System.out.println("Context: " + e.getContext()); // "Table: users" +} +``` + +--- + +## See Also + +- [Syntax Cheatsheet](./syntax-cheatsheet.md) - Quick reference +- [Format Specification](../SPEC.md) - Formal grammar +- [LLM Best Practices](./llm-best-practices.md) - Usage guide +- [GitHub Repository](https://github.com/ZON-Format/ZOD-JAVA) diff --git a/docs/llm-best-practices.md b/docs/llm-best-practices.md new file mode 100644 index 0000000..b7329e0 --- /dev/null +++ b/docs/llm-best-practices.md @@ -0,0 +1,269 @@ +# Using ZON with LLMs - Best Practices + +Copyright (c) 2025 ZON-FORMAT (Roni Bhakta) + +Guide for maximizing ZON's effectiveness in LLM applications. + +## Why ZON for LLMs? + +LLM API costs are directly tied to token count. ZON reduces tokens by **23.8% vs JSON** while achieving **100% retrieval accuracy**. + +**Key Benefits:** +- 💰 **Lower costs**: Fewer tokens = lower API bills +- 🎯 **Better accuracy**: 100% vs JSON's 91.7% +- 📊 **Self-documenting**: Explicit headers `@(N):columns` +- 🔍 **Human-readable**: Easy to debug and verify + +--- + +## Sending ZON as Input + +### Basic Pattern + +Wrap ZON data in code blocks with format label: + +````markdown +Here's the user data in ZON format: + +```zon +users:@(3):active,id,name,role +T,1,Alice,admin +T,2,Bob,user +F,3,Carol,guest +``` + +Question: How many active users are there? +```` + +**Why this works:** +- ✅ Code blocks prevent formatting issues +- ✅ `zon` label helps model recognize format +- ✅ Explicit headers (`@(3):columns`) give clear schema + +--- + +## Java Integration Examples + +### Building LLM Prompts + +```java +import com.zonformat.zon.Zon; +import java.util.*; + +public class LLMPromptBuilder { + public static String buildPrompt(List> data, String question) { + Map wrapper = new LinkedHashMap<>(); + wrapper.put("data", data); + + String zonData = Zon.encode(wrapper); + + return String.format(""" + Here's the data in ZON format: + + ```zon + %s + ``` + + Question: %s + """, zonData, question); + } +} +``` + +### Parsing LLM Responses + +```java +import com.zonformat.zon.Zon; +import java.util.regex.*; + +public class LLMResponseParser { + private static final Pattern ZON_BLOCK = Pattern.compile("```zon\\n([\\s\\S]*?)```"); + + public static Object extractZonData(String llmResponse) { + Matcher matcher = ZON_BLOCK.matcher(llmResponse); + if (matcher.find()) { + String zonData = matcher.group(1); + return Zon.decode(zonData); + } + return null; + } +} +``` + +--- + +## Prompting Strategies + +### Strategy 1: Show the Format (No Explanation) + +**Best approach** - Let the model infer the structure: + +```` +```zon +products:@(4):category,id,name,price,stock +Electronics,1,Laptop,999,45 +Books,2,Python Guide,29.99,120 +Electronics,3,Mouse,19.99,200 +Books,4,JavaScript Basics,24.95,85 +``` + +Find products with stock below 100. +```` + +**Why it works:** The explicit headers (`@(4):category,id,name,price,stock`) are self-documenting. + +### Strategy 2: Minimal Context + +For complex queries, add brief context: + +```` +Data format: ZON (tabular) +@(N) = row count +Column names listed in header + +```zon +logs:@(100):level,message,timestamp,userId +ERROR,Database timeout,2025-01-15T10:30:00Z,1001 +WARN,High memory usage,2025-01-15T10:31:15Z,1002 +ERROR,API rate limit,2025-01-15T10:32:45Z,1001 +... +``` + +How many ERROR logs are from userId 1001? +```` + +--- + +## Common Use Cases + +### 1. Data Retrieval Questions + +**Perfect for ZON** - table format excels here: + +```java +List> employees = buildEmployeeList(); +Map data = new LinkedHashMap<>(); +data.put("employees", employees); + +String zon = Zon.encode(data); +String prompt = String.format(""" + ```zon + %s + ``` + + Questions: + 1. What's the average salary in Engineering? + 2. How many inactive employees are there? + 3. List all Sales department employees. + """, zon); +``` + +### 2. Configuration Files + +```java +// Parse ZON config +String config = """ + environment:production + database{host:localhost,port:5432,ssl:T} + cache{ttl:3600,enabled:T} + features[darkMode,betaAccess,newUI] + """; + +Map settings = (Map) Zon.decode(config); +``` + +### 3. Structured Logging + +```java +public void logEvent(String level, String message, Map context) { + context.put("level", level); + context.put("message", message); + context.put("timestamp", Instant.now().toString()); + + String zonLog = Zon.encode(context); + logger.info(zonLog); +} +``` + +--- + +## Token Efficiency Tips + +### Tip 1: Use Compact Field Names + +```zon +# Good ✅ (shorter column names) +u:@(100):id,n,e,a +1,Alice,alice@ex.com,T +2,Bob,bob@ex.com,F + +# Acceptable ❌ (verbose names) +users:@(100):userId,fullName,emailAddress,isActive +1,Alice,alice@ex.com,true +2,Bob,bob@ex.com,false +``` + +**Token savings:** ~20% with compact names + +### Tip 2: Boolean Shorthand + +ZON uses `T`/`F` instead of `true`/`false`: + +```zon +users:@(100):id,name,active,verified +1,Alice,T,T +2,Bob,F,T +3,Carol,T,F +``` + +**Token savings:** ~40% on boolean fields + +--- + +## Benchmark Results + +### Token Efficiency Comparison + +| Format | Tokens | Savings vs JSON | +|--------|--------|-----------------| +| JSON (formatted) | 1,300 | - | +| JSON (compact) | 802 | 38% | +| TOON | 874 | 33% | +| CSV | 714 | 45% | +| **ZON** | **692** | **47%** | + +### LLM Accuracy (GPT-4o) + +| Format | Accuracy | Efficiency Score | +|--------|----------|------------------| +| **ZON** | **99.0%** | **1430.6** | +| CSV | 99.0% | 1386.5 | +| JSON compact | 91.7% | 1143.4 | +| TOON | 99.0% | 1132.7 | +| JSON | 96.8% | 744.6 | + +*Efficiency score = (Accuracy % ÷ Tokens) × 10,000. Higher is better.* + +--- + +## Quick Reference + +### Do's ✅ +- Use code blocks for formatting +- Include `@(N)` row counts +- List column names explicitly +- Use `T`/`F` for booleans +- Use `null` for null values + +### Don'ts ❌ +- Don't explain ZON syntax (show, don't tell) +- Don't mix formats (stick to ZON) +- Don't omit row counts +- Don't use verbose field names unnecessarily + +--- + +**See also:** +- [Syntax Cheatsheet](./syntax-cheatsheet.md) - Quick reference +- [API Reference](./api-reference.md) - encode/decode functions +- [Format Specification](../SPEC.md) - Formal grammar diff --git a/docs/syntax-cheatsheet.md b/docs/syntax-cheatsheet.md new file mode 100644 index 0000000..71e7093 --- /dev/null +++ b/docs/syntax-cheatsheet.md @@ -0,0 +1,291 @@ +# ZON Syntax Cheatsheet + +Copyright (c) 2025 ZON-FORMAT (Roni Bhakta) + +Quick reference for ZON format syntax. Cross-referenced with actual implementation in v1.0.5. + +## Basic Types + +### Primitives + +```zon +# String (unquoted when safe) +name:Alice + +# Number +score:98.5 +count:42 + +# Boolean (T/F) +active:T +disabled:F + +# Null +value:null +``` + +### Objects + +```zon +# Simple object +name:ZON Format +version:1.0.5 +active:T +score:98.5 +``` + +**JSON equivalent:** +```json +{ + "name": "ZON Format", + "version": "1.0.5", + "active": true, + "score": 98.5 +} +``` + +### Nested Objects + +**Colon-less Syntax (v2.0.5):** +```zon +# Colon is optional if value starts with { or [ +config{database{host:localhost,port:5432},cache{ttl:3600,enabled:T}} +``` + +**Legacy Quoted (v1.x):** +```zon +config:"{database:{host:localhost,port:5432}}" +``` + +--- + +## Arrays + +### Primitive Arrays (Inline) + +```zon +tags:"[nodejs,typescript,llm]" +numbers:"[1,2,3,4,5]" +flags:"[T,F,T]" +``` + +### Tabular Arrays (Uniform Objects) + +**Most efficient form - ZON's specialty** + +```zon +users:@(3):active,id,name,role +T,1,Alice,admin +T,2,Bob,user +F,3,Carol,guest +``` + +**Breakdown:** +- `@(3)` = 3 rows +- `:active,id,name,role` = column headers (alphabetically sorted) +- Data rows follow + +**JSON equivalent:** +```json +{ + "users": [ + { "id": 1, "name": "Alice", "role": "admin", "active": true }, + { "id": 2, "name": "Bob", "role": "user", "active": true }, + { "id": 3, "name": "Carol", "role": "guest", "active": false } + ] +} +``` + +### Empty Containers + +```zon +# Empty object +metadata:"{}" + +# Empty array +tags:"[]" +``` + +--- + +## Quoting Rules + +### When Strings NEED Quotes + +1. **Contains special characters**: + - Commas: `"hello, world"` + - Brackets: `"[test]"` + - Braces: `"{test}"` + +2. **Looks like a literal**: + - `"true"` (string, not boolean) + - `"123"` (string, not number) + - `"false"` (string, not boolean) + - `"null"` (string, not null) + +3. **Leading/trailing spaces**: + - `" padded "` + +4. **Empty string**: + - `""` (MUST quote, otherwise parses as `null`) + +### Safe Unquoted Strings + +```zon +# Alphanumeric + dash, underscore, dot +name:john-doe +file:data_v1.json +host:api.example.com +``` + +--- + +## Table Headers + +### Basic Header (with count) + +```zon +users:@(2):id,name,active +1,Alice,T +2,Bob,F +``` + +**Best practice**: Always include count `@(N)` for explicit schema + +--- + +## Type Conversions + +| ZON | Java | Notes | +|-----|------|-------| +| `T` | `Boolean.TRUE` | Boolean true | +| `F` | `Boolean.FALSE` | Boolean false | +| `null` | `null` | Null value | +| `42` | `Long` | Integer number | +| `3.14` | `Double` | Decimal number | +| `hello` | `String` | Unquoted string | +| `"hello"` | `String` | Quoted string | + +--- + +## Java Code Examples + +### Encoding + +```java +import com.zonformat.zon.Zon; +import java.util.*; + +// Simple object +Map data = new LinkedHashMap<>(); +data.put("name", "Alice"); +data.put("age", 30); +data.put("active", true); + +String zon = Zon.encode(data); +// active:T +// age:30 +// name:Alice +``` + +### Decoding + +```java +import com.zonformat.zon.Zon; + +String zonData = """ + users:@(2):id,name + 1,Alice + 2,Bob + """; + +Object decoded = Zon.decode(zonData); +// Returns: {users=[{id=1, name=Alice}, {id=2, name=Bob}]} +``` + +### Error Handling + +```java +import com.zonformat.zon.*; + +try { + Object data = Zon.decode(invalidZon); +} catch (ZonDecodeError e) { + System.out.println("Error: " + e.getCode()); + System.out.println("Message: " + e.getMessage()); + System.out.println("Context: " + e.getContext()); +} +``` + +--- + +## Escape Sequences + +Within quoted strings: +- `""` - Double quote (CSV-style) +- `\n` - Newline +- `\r` - Carriage return +- `\t` - Tab +- `\\` - Backslash + +**Example:** +```zon +message:"Line 1\nLine 2" +quote:"He said ""hello""" +``` + +--- + +## Complete Example + +**JSON:** +```json +{ + "metadata": { "version": "1.0.5", "env": "production" }, + "users": [ + { "id": 1, "name": "Alice", "active": true, "loginCount": 42 }, + { "id": 2, "name": "Bob", "active": true, "loginCount": 17 }, + { "id": 3, "name": "Carol", "active": false, "loginCount": 3 } + ], + "config": { "database": { "host": "localhost", "port": 5432 } } +} +``` + +**ZON:** +```zon +metadata{version:1.0.5,env:production} +users:@(3):active,id,loginCount,name +T,1,42,Alice +T,2,17,Bob +F,3,3,Carol +config.database{host:localhost,port:5432} +``` + +**Token count:** +- JSON (formatted): 151 tokens +- ZON: 87 tokens +- **Savings: 42% fewer tokens** + +--- + +## Quick Reference + +### Do's ✅ +- Use code blocks for formatting +- Include `@(N)` row counts +- List column names explicitly +- Use `T`/`F` for booleans +- Use `null` for null values + +### Don'ts ❌ +- Don't explain ZON syntax (show, don't tell) +- Don't mix formats (stick to ZON) +- Don't omit row counts +- Don't use verbose field names unnecessarily + +--- + +**See also:** +- [API Reference](./api-reference.md) - encode/decode functions +- [Format Specification](../SPEC.md) - Formal grammar +- [LLM Best Practices](./llm-best-practices.md) - Usage guide diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..d832b50 --- /dev/null +++ b/pom.xml @@ -0,0 +1,118 @@ + + + 4.0.0 + + com.zonformat + zon-java + 1.0.5 + jar + + ZON Format for Java + Zero Overhead Notation (ZON) - A compact, human-readable data serialization format optimized for LLM token efficiency + https://github.com/ZON-Format/ZOD-JAVA + + + + MIT License + https://opensource.org/licenses/MIT + repo + + + + + + Roni Bhakta + ZON-Format + + + + + scm:git:git://github.com/ZON-Format/ZOD-JAVA.git + scm:git:ssh://github.com:ZON-Format/ZOD-JAVA.git + https://github.com/ZON-Format/ZOD-JAVA + + + + 11 + 11 + UTF-8 + 5.10.1 + 2.10.1 + + + + + + com.google.code.gson + gson + ${gson.version} + + + + + org.junit.jupiter + junit-jupiter + ${junit.version} + test + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.11.0 + + ${maven.compiler.source} + ${maven.compiler.target} + + + + + org.apache.maven.plugins + maven-surefire-plugin + 3.2.2 + + + + org.apache.maven.plugins + maven-jar-plugin + 3.3.0 + + + + com.zonformat.zon.Cli + + + + + + + + org.apache.maven.plugins + maven-shade-plugin + 3.5.1 + + + package + + shade + + + + + com.zonformat.zon.Cli + + + true + cli + + + + + + + diff --git a/src/main/java/com/zonformat/zon/Cli.java b/src/main/java/com/zonformat/zon/Cli.java new file mode 100644 index 0000000..1a30e2a --- /dev/null +++ b/src/main/java/com/zonformat/zon/Cli.java @@ -0,0 +1,97 @@ +/* + * ZON CLI - Command Line Interface + * + * Copyright (c) 2025 ZON-FORMAT (Roni Bhakta) + * MIT License + */ +package com.zonformat.zon; + +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import com.google.gson.JsonParser; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; + +/** + * Command Line Interface for ZON format conversion. + * + *

Usage

+ *
+ * java -jar zon-java-cli.jar encode <file.json>
+ * java -jar zon-java-cli.jar decode <file.zonf>
+ * 
+ * + *

Examples

+ *
+ * # Convert JSON to ZON
+ * java -jar zon-java-cli.jar encode data.json > data.zonf
+ * 
+ * # Convert ZON back to JSON
+ * java -jar zon-java-cli.jar decode data.zonf > data.json
+ * 
+ */ +public class Cli { + + private static final Gson gson = new GsonBuilder().setPrettyPrinting().create(); + + public static void main(String[] args) { + if (args.length < 2) { + printUsage(); + System.exit(1); + } + + String command = args[0]; + String inputFile = args[1]; + + try { + Path path = Paths.get(inputFile).toAbsolutePath(); + String content = Files.readString(path); + + switch (command.toLowerCase()) { + case "encode": + encodeJson(content); + break; + case "decode": + decodeZon(content); + break; + default: + System.err.println("Unknown command: " + command); + printUsage(); + System.exit(1); + } + } catch (IOException e) { + System.err.println("Error reading file: " + e.getMessage()); + System.exit(1); + } catch (Exception e) { + System.err.println("Error: " + e.getMessage()); + System.exit(1); + } + } + + private static void encodeJson(String jsonContent) { + // Parse JSON to Java object + Object data = gson.fromJson(jsonContent, Object.class); + + // Encode to ZON + String zon = Zon.encode(data); + System.out.println(zon); + } + + private static void decodeZon(String zonContent) { + // Decode ZON to Java object + Object data = Zon.decode(zonContent); + + // Convert to pretty JSON + String json = gson.toJson(data); + System.out.println(json); + } + + private static void printUsage() { + System.err.println("Usage: zon "); + System.err.println("Example: zon encode data.json > data.zonf"); + System.err.println(" zon decode data.zonf > output.json"); + } +} diff --git a/src/main/java/com/zonformat/zon/Constants.java b/src/main/java/com/zonformat/zon/Constants.java new file mode 100644 index 0000000..ab62a73 --- /dev/null +++ b/src/main/java/com/zonformat/zon/Constants.java @@ -0,0 +1,39 @@ +/* + * ZON Protocol Constants v1.0.5 + * + * Copyright (c) 2025 ZON-FORMAT (Roni Bhakta) + * MIT License + */ +package com.zonformat.zon; + +/** + * Constants for the ZON format protocol. + */ +public final class Constants { + + private Constants() { + // Prevent instantiation + } + + // Format markers + public static final char TABLE_MARKER = '@'; + public static final char META_SEPARATOR = ':'; + + // Reserved tokens (for future use) + public static final String GAS_TOKEN = "_"; + public static final String LIQUID_TOKEN = "^"; + + // Default anchor interval for large datasets + public static final int DEFAULT_ANCHOR_INTERVAL = 100; + + // Security limits (DOS prevention) + public static final long MAX_DOCUMENT_SIZE = 100L * 1024 * 1024; // 100 MB + public static final int MAX_LINE_LENGTH = 1024 * 1024; // 1 MB + public static final int MAX_ARRAY_LENGTH = 1_000_000; // 1 million items + public static final int MAX_OBJECT_KEYS = 100_000; // 100K keys + public static final int MAX_NESTING_DEPTH = 100; // Maximum nesting depth + + // Legacy compatibility + public static final char LEGACY_TABLE_MARKER = '@'; + public static final int INLINE_THRESHOLD_ROWS = 0; +} diff --git a/src/main/java/com/zonformat/zon/Zon.java b/src/main/java/com/zonformat/zon/Zon.java new file mode 100644 index 0000000..3d9ae15 --- /dev/null +++ b/src/main/java/com/zonformat/zon/Zon.java @@ -0,0 +1,116 @@ +/* + * ZON Format v1.0.5 + * Zero Overhead Notation - A human-readable data serialization format + * optimized for LLM token efficiency + * + * Copyright (c) 2025 ZON-FORMAT (Roni Bhakta) + * MIT License + */ +package com.zonformat.zon; + +/** + * Main entry point for ZON encoding and decoding operations. + * + *

ZON (Zero Overhead Notation) is a compact, human-readable data format + * optimized for LLM token efficiency. It achieves 35-50% token reduction + * vs JSON through tabular encoding, single-character primitives, and + * intelligent compression while maintaining 100% data fidelity.

+ * + *

Quick Start

+ *
{@code
+ * // Encode Java data to ZON
+ * Map data = new HashMap<>();
+ * data.put("name", "Alice");
+ * data.put("age", 30);
+ * String zon = Zon.encode(data);
+ * 
+ * // Decode ZON back to Java
+ * Object decoded = Zon.decode(zon);
+ * }
+ * + *

Table Format

+ *

Arrays of uniform objects are encoded as tables:

+ *
{@code
+ * // Input: [{id: 1, name: "Alice"}, {id: 2, name: "Bob"}]
+ * // Output:
+ * // @(2):id,name
+ * // 1,Alice
+ * // 2,Bob
+ * }
+ * + *

Boolean and Null Encoding

+ *
    + *
  • {@code true} → {@code T}
  • + *
  • {@code false} → {@code F}
  • + *
  • {@code null} → {@code null}
  • + *
+ * + * @see ZonEncoder + * @see ZonDecoder + * @see GitHub Repository + */ +public final class Zon { + + private Zon() { + // Prevent instantiation + } + + /** + * Encodes Java data to ZON format. + * + *

Supported types:

+ *
    + *
  • Objects ({@code Map})
  • + *
  • Arrays ({@code List})
  • + *
  • Strings
  • + *
  • Numbers (Integer, Long, Double, Float)
  • + *
  • Booleans
  • + *
  • Null
  • + *
+ * + * @param data Data to encode + * @return ZON-formatted string + * @throws IllegalArgumentException if circular reference detected + */ + public static String encode(Object data) { + return new ZonEncoder().encode(data); + } + + /** + * Decodes ZON format string to Java objects. + * + *

Uses strict mode by default, which validates:

+ *
    + *
  • Row counts match declared values
  • + *
  • Field counts match column counts
  • + *
+ * + * @param zonStr ZON format string + * @return Decoded Java object (Map, List, or primitive) + * @throws ZonDecodeError if decoding fails or validation errors occur + */ + public static Object decode(String zonStr) { + return new ZonDecoder().decode(zonStr); + } + + /** + * Decodes ZON format string to Java objects with specified strictness. + * + * @param zonStr ZON format string + * @param strict Whether to enable strict validation + * @return Decoded Java object (Map, List, or primitive) + * @throws ZonDecodeError if decoding fails + */ + public static Object decode(String zonStr, boolean strict) { + return new ZonDecoder(strict).decode(zonStr); + } + + /** + * Gets the library version. + * + * @return Version string + */ + public static String getVersion() { + return "1.0.5"; + } +} diff --git a/src/main/java/com/zonformat/zon/ZonDecodeError.java b/src/main/java/com/zonformat/zon/ZonDecodeError.java new file mode 100644 index 0000000..52ffe94 --- /dev/null +++ b/src/main/java/com/zonformat/zon/ZonDecodeError.java @@ -0,0 +1,106 @@ +/* + * ZON Exceptions + * + * Copyright (c) 2025 ZON-FORMAT (Roni Bhakta) + * MIT License + */ +package com.zonformat.zon; + +/** + * Exception thrown when ZON decoding fails. + */ +public class ZonDecodeError extends RuntimeException { + + private final String code; + private final Integer line; + private final Integer column; + private final String context; + + /** + * Creates a new ZonDecodeError with a message. + * + * @param message Error message + */ + public ZonDecodeError(String message) { + this(message, null, null, null, null); + } + + /** + * Creates a new ZonDecodeError with a message and error code. + * + * @param message Error message + * @param code Error code (e.g., "E001", "E002") + */ + public ZonDecodeError(String message, String code) { + this(message, code, null, null, null); + } + + /** + * Creates a new ZonDecodeError with full details. + * + * @param message Error message + * @param code Error code + * @param line Line number where error occurred + * @param column Column position + * @param context Relevant context snippet + */ + public ZonDecodeError(String message, String code, Integer line, Integer column, String context) { + super(message); + this.code = code; + this.line = line; + this.column = column; + this.context = context; + } + + /** + * Gets the error code. + * + * @return Error code or null + */ + public String getCode() { + return code; + } + + /** + * Gets the line number where the error occurred. + * + * @return Line number or null + */ + public Integer getLine() { + return line; + } + + /** + * Gets the column position. + * + * @return Column position or null + */ + public Integer getColumn() { + return column; + } + + /** + * Gets the context snippet. + * + * @return Context or null + */ + public String getContext() { + return context; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder("ZonDecodeError"); + if (code != null) { + sb.append(" [").append(code).append("]"); + } + sb.append(": ").append(getMessage()); + if (line != null) { + sb.append(" (line ").append(line).append(")"); + } + if (context != null) { + sb.append("\n Context: ").append(context); + } + return sb.toString(); + } +} diff --git a/src/main/java/com/zonformat/zon/ZonDecoder.java b/src/main/java/com/zonformat/zon/ZonDecoder.java new file mode 100644 index 0000000..ffb1c32 --- /dev/null +++ b/src/main/java/com/zonformat/zon/ZonDecoder.java @@ -0,0 +1,806 @@ +/* + * ZON Decoder v1.0.5 - Compact Hybrid Format + * + * Copyright (c) 2025 ZON-FORMAT (Roni Bhakta) + * MIT License + */ +package com.zonformat.zon; + +import java.util.*; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Decoder for converting ZON format strings back to Java objects. + * + *

Supports both strict and non-strict modes for validation:

+ *
    + *
  • Strict mode (default): Validates row and field counts
  • + *
  • Non-strict mode: Allows mismatches for lenient parsing
  • + *
+ * + *

Example usage:

+ *
{@code
+ * String zon = "users:@(2):id,name\n1,Alice\n2,Bob";
+ * Object decoded = ZonDecoder.decode(zon);
+ * // Returns: {users: [{id: 1, name: "Alice"}, {id: 2, name: "Bob"}]}
+ * }
+ */ +public class ZonDecoder { + + private static final Pattern V2_NAMED_PATTERN = Pattern.compile("^@(\\w+)\\((\\d+)\\)(\\[\\w+\\])*:(.+)$"); + private static final Pattern V2_VALUE_PATTERN = Pattern.compile("^@\\((\\d+)\\)(\\[\\w+\\])*:(.+)$"); + private static final Pattern V2_PATTERN = Pattern.compile("^@(\\d+)(\\[\\w+\\])*:(.+)$"); + private static final Pattern V1_PATTERN = Pattern.compile("^@(\\w+)\\((\\d+)\\):(.+)$"); + private static final Pattern OMITTED_COL_PATTERN = Pattern.compile("\\[(\\w+)\\]"); + private static final Pattern URL_PATTERN = Pattern.compile("^https?://"); + private static final Pattern TIMESTAMP_PATTERN = Pattern.compile("^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}"); + private static final Pattern TIME_PATTERN = Pattern.compile("^\\d{2}:\\d{2}:\\d{2}"); + private static final Pattern KEY_PATTERN = Pattern.compile("^[a-zA-Z_]\\w*$"); + private static final Pattern BLOCK_PATTERN = Pattern.compile("^[a-zA-Z0-9_]+\\s*[\\{\\[]"); + + private final boolean strict; + private int currentLine; + + /** + * Creates a new ZonDecoder with strict mode enabled. + */ + public ZonDecoder() { + this(true); + } + + /** + * Creates a new ZonDecoder with specified strictness. + * + * @param strict Whether to enable strict validation + */ + public ZonDecoder(boolean strict) { + this.strict = strict; + this.currentLine = 0; + } + + /** + * Decodes a ZON format string to Java objects. + * + * @param zonStr ZON format string + * @return Decoded Java object (Map, List, or primitive) + * @throws ZonDecodeError if decoding fails + */ + public Object decode(String zonStr) { + if (zonStr == null || zonStr.isEmpty()) { + return new LinkedHashMap<>(); + } + + // Security: Check document size + if (zonStr.length() > Constants.MAX_DOCUMENT_SIZE) { + throw new ZonDecodeError( + "[E301] Document size exceeds maximum (" + Constants.MAX_DOCUMENT_SIZE + " bytes)", + "E301" + ); + } + + String[] lines = zonStr.trim().split("\n"); + if (lines.length == 0) { + return new LinkedHashMap<>(); + } + + // Special case: Root-level ZON list + if (lines.length == 1) { + String line = lines[0].trim(); + if (line.startsWith("[")) { + return parseZonNode(line, 0); + } + + // Check for colon-less object/array pattern + boolean hasBlock = BLOCK_PATTERN.matcher(line).find(); + + if (!line.contains(String.valueOf(Constants.META_SEPARATOR)) && + !line.startsWith(String.valueOf(Constants.TABLE_MARKER)) && + !hasBlock) { + return parsePrimitive(line); + } + } + + // Main decode loop + Map metadata = new LinkedHashMap<>(); + Map tables = new LinkedHashMap<>(); + TableInfo currentTable = null; + String currentTableName = null; + + for (String line : lines) { + String trimmedLine = line.stripTrailing(); + currentLine++; + + // Security: Check line length + if (trimmedLine.length() > Constants.MAX_LINE_LENGTH) { + throw new ZonDecodeError( + "[E302] Line length exceeds maximum (" + Constants.MAX_LINE_LENGTH + " chars)", + "E302", currentLine, null, null + ); + } + + // Skip blank lines + if (trimmedLine.isEmpty()) { + continue; + } + + // Table header (Anonymous or Legacy): @... + if (trimmedLine.startsWith(String.valueOf(Constants.TABLE_MARKER))) { + Object[] parsed = parseTableHeader(trimmedLine); + currentTableName = (String) parsed[0]; + currentTable = (TableInfo) parsed[1]; + tables.put(currentTableName, currentTable); + } + // Table row (if in a table and haven't read all rows) + else if (currentTable != null && currentTable.rowIndex < currentTable.expectedRows) { + Map row = parseTableRow(trimmedLine, currentTable); + currentTable.rows.add(row); + + // If we've read all rows, exit table mode + if (currentTable.rowIndex >= currentTable.expectedRows) { + currentTable = null; + } + } + // Metadata line OR Named Table + else { + int splitIdx = -1; + char splitChar = 0; + int depth = 0; + boolean inQuote = false; + + for (int i = 0; i < trimmedLine.length(); i++) { + char c = trimmedLine.charAt(i); + if (c == '"') inQuote = !inQuote; + if (!inQuote) { + if (c == '{' || c == '[') depth++; + if (c == '}' || c == ']') depth--; + + if (depth == 1 && (c == '{' || c == '[')) { + if (splitIdx == -1) { + splitIdx = i; + splitChar = c; + break; + } + } + if (c == ':' && depth == 0) { + splitIdx = i; + splitChar = ':'; + break; + } + } + } + + if (splitIdx != -1) { + String key; + String val; + + if (splitChar == ':') { + key = trimmedLine.substring(0, splitIdx).trim(); + val = trimmedLine.substring(splitIdx + 1).trim(); + } else { + key = trimmedLine.substring(0, splitIdx).trim(); + val = trimmedLine.substring(splitIdx).trim(); + } + + // Check if it's a named table start + if (val.startsWith(String.valueOf(Constants.TABLE_MARKER))) { + Object[] parsed = parseTableHeader(val); + currentTableName = key; + currentTable = (TableInfo) parsed[1]; + tables.put(currentTableName, currentTable); + } else { + currentTable = null; + metadata.put(key, parseValue(val)); + } + } + } + } + + // Recombine tables into metadata + for (Map.Entry entry : tables.entrySet()) { + String tableName = entry.getKey(); + TableInfo table = entry.getValue(); + + // Strict mode: validate row count + if (strict && table.rows.size() != table.expectedRows) { + throw new ZonDecodeError( + "[E001] Row count mismatch in table '" + tableName + + "': expected " + table.expectedRows + ", got " + table.rows.size(), + "E001", null, null, "Table: " + tableName + ); + } + + metadata.put(tableName, reconstructTable(table)); + } + + // Unflatten dotted keys + Object result = unflatten(metadata); + + // Unwrap pure lists + if (result instanceof Map) { + @SuppressWarnings("unchecked") + Map resultMap = (Map) result; + if (resultMap.size() == 1 && resultMap.containsKey("data") && resultMap.get("data") instanceof List) { + return resultMap.get("data"); + } + } + + return result; + } + + /** + * Convenience static method to decode with default settings. + * + * @param zonStr ZON format string + * @return Decoded Java object + */ + public static Object decodeStatic(String zonStr) { + return new ZonDecoder().decode(zonStr); + } + + /** + * Convenience static method to decode with specified strictness. + * + * @param zonStr ZON format string + * @param strict Whether to enable strict validation + * @return Decoded Java object + */ + public static Object decodeStatic(String zonStr, boolean strict) { + return new ZonDecoder(strict).decode(zonStr); + } + + private Object[] parseTableHeader(String line) { + // Try v2.0 format with name + Matcher m = V2_NAMED_PATTERN.matcher(line); + if (m.matches()) { + String tableName = m.group(1); + int count = Integer.parseInt(m.group(2)); + String omittedStr = m.group(3) != null ? m.group(3) : ""; + String colsStr = m.group(4); + + List omittedCols = parseOmittedCols(omittedStr); + List cols = parseColumns(colsStr); + + return new Object[]{tableName, new TableInfo(cols, omittedCols, count)}; + } + + // Try v2.1 format (anonymous/value) + m = V2_VALUE_PATTERN.matcher(line); + if (m.matches()) { + int count = Integer.parseInt(m.group(1)); + String omittedStr = m.group(2) != null ? m.group(2) : ""; + String colsStr = m.group(3); + + List omittedCols = parseOmittedCols(omittedStr); + List cols = parseColumns(colsStr); + + return new Object[]{"data", new TableInfo(cols, omittedCols, count)}; + } + + // Try v2.0 format (anonymous) + m = V2_PATTERN.matcher(line); + if (m.matches()) { + int count = Integer.parseInt(m.group(1)); + String omittedStr = m.group(2) != null ? m.group(2) : ""; + String colsStr = m.group(3); + + List omittedCols = parseOmittedCols(omittedStr); + List cols = parseColumns(colsStr); + + return new Object[]{"data", new TableInfo(cols, omittedCols, count)}; + } + + // Try v1.x format + m = V1_PATTERN.matcher(line); + if (m.matches()) { + String tableName = m.group(1); + int count = Integer.parseInt(m.group(2)); + String colsStr = m.group(3); + + List cols = parseColumns(colsStr); + + return new Object[]{tableName, new TableInfo(cols, Collections.emptyList(), count)}; + } + + throw new ZonDecodeError("Invalid table header: " + line); + } + + private List parseOmittedCols(String omittedStr) { + List cols = new ArrayList<>(); + if (omittedStr != null && !omittedStr.isEmpty()) { + Matcher m = OMITTED_COL_PATTERN.matcher(omittedStr); + while (m.find()) { + cols.add(m.group(1)); + } + } + return cols; + } + + private List parseColumns(String colsStr) { + String[] parts = colsStr.split(","); + List cols = new ArrayList<>(); + for (String part : parts) { + cols.add(part.trim()); + } + return cols; + } + + private Map parseTableRow(String line, TableInfo table) { + List tokens = splitByDelimiter(line, ','); + + int coreFieldCount = tokens.size(); + int sparseFieldCount = 0; + + // Count sparse fields + for (int i = table.cols.size(); i < tokens.size(); i++) { + String tok = tokens.get(i); + if (tok.contains(":") && !isURL(tok) && !isTimestamp(tok)) { + sparseFieldCount++; + } + } + + int actualCoreFields = Math.min(coreFieldCount, table.cols.size()); + + // Strict mode validation + if (strict && coreFieldCount < table.cols.size() && sparseFieldCount == 0) { + throw new ZonDecodeError( + "[E002] Field count mismatch on row " + (table.rowIndex + 1) + + ": expected " + table.cols.size() + " fields, got " + coreFieldCount, + "E002", currentLine, null, + line.length() > 50 ? line.substring(0, 50) + "..." : line + ); + } + + // Pad if needed + while (tokens.size() < table.cols.size()) { + tokens.add(""); + } + + Map row = new LinkedHashMap<>(); + int tokenIdx = 0; + + // Parse core columns + for (String col : table.cols) { + if (tokenIdx < tokens.size()) { + String tok = tokens.get(tokenIdx); + row.put(col, parseValue(tok)); + tokenIdx++; + } + } + + // Parse optional fields (sparse encoding) + while (tokenIdx < tokens.size()) { + String tok = tokens.get(tokenIdx); + if (tok.contains(":") && !isURL(tok) && !isTimestamp(tok)) { + int colonIdx = tok.indexOf(':'); + String key = tok.substring(0, colonIdx).trim(); + String val = tok.substring(colonIdx + 1).trim(); + + if (KEY_PATTERN.matcher(key).matches()) { + row.put(key, parseValue(val)); + } + } + tokenIdx++; + } + + // Reconstruct omitted sequential columns + if (table.omittedCols != null) { + for (String col : table.omittedCols) { + row.put(col, table.rowIndex + 1); + } + } + + table.rowIndex++; + return row; + } + + private boolean isURL(String s) { + return URL_PATTERN.matcher(s).find() || s.startsWith("/"); + } + + private boolean isTimestamp(String s) { + return TIMESTAMP_PATTERN.matcher(s).find() || TIME_PATTERN.matcher(s).find(); + } + + private List> reconstructTable(TableInfo table) { + List> result = new ArrayList<>(); + for (Map row : table.rows) { + result.add(unflattenMap(row)); + } + return result; + } + + private Object parseZonNode(String text, int depth) { + if (depth > Constants.MAX_NESTING_DEPTH) { + throw new ZonDecodeError("Maximum nesting depth exceeded (" + Constants.MAX_NESTING_DEPTH + ")"); + } + + String trimmed = text.trim(); + if (trimmed.isEmpty()) { + return null; + } + + // Dict: {k:v,k:v} + if (trimmed.startsWith("{") && trimmed.endsWith("}")) { + String content = trimmed.substring(1, trimmed.length() - 1).trim(); + if (content.isEmpty()) { + return new LinkedHashMap<>(); + } + + Map obj = new LinkedHashMap<>(); + List pairs = splitByDelimiter(content, ','); + + // Security: Check object key count + if (pairs.size() > Constants.MAX_OBJECT_KEYS) { + throw new ZonDecodeError( + "[E304] Object key count exceeds maximum (" + Constants.MAX_OBJECT_KEYS + " keys)", + "E304" + ); + } + + for (String pair : pairs) { + int[] splitResult = findSplitPoint(pair); + int splitIdx = splitResult[0]; + char splitChar = (char) splitResult[1]; + + if (splitIdx != -1) { + String keyStr, valStr; + if (splitChar == ':') { + keyStr = pair.substring(0, splitIdx).trim(); + valStr = pair.substring(splitIdx + 1).trim(); + } else { + keyStr = pair.substring(0, splitIdx).trim(); + valStr = pair.substring(splitIdx).trim(); + } + + Object key = parsePrimitive(keyStr); + Object val = parseZonNode(valStr, depth + 1); + obj.put(String.valueOf(key), val); + } + } + + return obj; + } + + // List: [v,v] + if (trimmed.startsWith("[") && trimmed.endsWith("]")) { + String content = trimmed.substring(1, trimmed.length() - 1).trim(); + if (content.isEmpty()) { + return new ArrayList<>(); + } + + List items = splitByDelimiter(content, ','); + + // Security: Check array length + if (items.size() > Constants.MAX_ARRAY_LENGTH) { + throw new ZonDecodeError( + "[E303] Array length exceeds maximum (" + Constants.MAX_ARRAY_LENGTH + " items)", + "E303" + ); + } + + List result = new ArrayList<>(); + for (String item : items) { + result.add(parseZonNode(item, depth + 1)); + } + return result; + } + + // Leaf node (primitive) + return parsePrimitive(trimmed); + } + + private int[] findSplitPoint(String pair) { + int splitIdx = -1; + char splitChar = 0; + boolean inQuote = false; + char quoteChar = 0; + int depth = 0; + + for (int i = 0; i < pair.length(); i++) { + char c = pair.charAt(i); + + if (c == '\\' && i + 1 < pair.length()) { + i++; + continue; + } + + if (c == '"' || c == '\'') { + if (!inQuote) { + inQuote = true; + quoteChar = c; + } else if (c == quoteChar) { + inQuote = false; + } + } else if (!inQuote) { + if (c == ':') { + if (depth == 0) { + splitIdx = i; + splitChar = ':'; + break; + } + } else if (c == '{' || c == '[') { + if (depth == 0 && splitIdx == -1) { + splitIdx = i; + splitChar = c; + break; + } + depth++; + } else if (c == '}' || c == ']') { + depth--; + } + } + } + + return new int[]{splitIdx, splitChar}; + } + + private List splitByDelimiter(String text, char delim) { + List parts = new ArrayList<>(); + StringBuilder current = new StringBuilder(); + boolean inQuote = false; + char quoteChar = 0; + int depth = 0; + + for (int i = 0; i < text.length(); i++) { + char c = text.charAt(i); + + // Handle escaped characters + if (c == '\\' && i + 1 < text.length()) { + current.append(c); + current.append(text.charAt(++i)); + continue; + } + + if (c == '"' || c == '\'') { + if (!inQuote) { + inQuote = true; + quoteChar = c; + } else if (c == quoteChar) { + inQuote = false; + } + current.append(c); + } else if (!inQuote) { + if (c == '{' || c == '[') { + depth++; + current.append(c); + } else if (c == '}' || c == ']') { + depth--; + current.append(c); + } else if (c == delim && depth == 0) { + parts.add(current.toString()); + current = new StringBuilder(); + } else { + current.append(c); + } + } else { + current.append(c); + } + } + + if (current.length() > 0) { + parts.add(current.toString()); + } + + return parts; + } + + private Object parsePrimitive(String val) { + String trimmed = val.trim(); + String lower = trimmed.toLowerCase(); + + // Booleans + if (lower.equals("t") || lower.equals("true")) { + return true; + } + if (lower.equals("f") || lower.equals("false")) { + return false; + } + + // Null + if (lower.equals("null") || lower.equals("none") || lower.equals("nil")) { + return null; + } + + // Quoted string (JSON style) + if (trimmed.startsWith("\"")) { + try { + return parseJsonString(trimmed); + } catch (Exception e) { + // Ignore + } + } + + // Try number + if (!trimmed.isEmpty()) { + try { + if (trimmed.contains(".") || trimmed.toLowerCase().contains("e")) { + return Double.parseDouble(trimmed); + } else { + return Long.parseLong(trimmed); + } + } catch (NumberFormatException e) { + // Not a number + } + } + + return trimmed; + } + + private Object parseValue(String val) { + String trimmed = val.trim(); + + // Quoted string - check BEFORE primitives + if (trimmed.startsWith("\"") && trimmed.endsWith("\"") && trimmed.length() >= 2) { + String content = trimmed.substring(1, trimmed.length() - 1); + String unquoted; + + // Check if it uses CSV-style escaping (doubled quotes) or JSON-style escaping (backslash) + if (content.contains("\"\"")) { + // CSV-style: "" becomes " + unquoted = content.replace("\"\"", "\""); + } else if (content.contains("\\")) { + // JSON-style escaping - parse it + try { + unquoted = (String) parseJsonString(trimmed); + } catch (Exception e) { + unquoted = content; + } + } else { + unquoted = content; + } + + // Check if the unquoted content is a ZON structure + String stripped = unquoted.trim(); + if (stripped.startsWith("{") || stripped.startsWith("[")) { + return parseZonNode(stripped, 0); + } + + return unquoted; + } + + // Booleans + String lower = trimmed.toLowerCase(); + if (lower.equals("t") || lower.equals("true")) { + return true; + } + if (lower.equals("f") || lower.equals("false")) { + return false; + } + + // Null + if (lower.equals("null") || lower.equals("none") || lower.equals("nil")) { + return null; + } + + // Check for ZON-style nested structures + if (trimmed.startsWith("{") || trimmed.startsWith("[")) { + return parseZonNode(trimmed, 0); + } + + // Try number + if (!trimmed.isEmpty()) { + try { + if (trimmed.contains(".") || trimmed.toLowerCase().contains("e")) { + return Double.parseDouble(trimmed); + } else { + return Long.parseLong(trimmed); + } + } catch (NumberFormatException e) { + // Not a number + } + } + + return trimmed; + } + + private Object parseJsonString(String s) { + if (!s.startsWith("\"") || !s.endsWith("\"")) { + throw new IllegalArgumentException("Not a JSON string"); + } + + String content = s.substring(1, s.length() - 1); + StringBuilder result = new StringBuilder(); + + for (int i = 0; i < content.length(); i++) { + char c = content.charAt(i); + if (c == '\\' && i + 1 < content.length()) { + char next = content.charAt(++i); + switch (next) { + case '"': result.append('"'); break; + case '\\': result.append('\\'); break; + case 'n': result.append('\n'); break; + case 'r': result.append('\r'); break; + case 't': result.append('\t'); break; + case 'u': + if (i + 4 < content.length()) { + String hex = content.substring(i + 1, i + 5); + result.append((char) Integer.parseInt(hex, 16)); + i += 4; + } + break; + default: result.append(next); + } + } else { + result.append(c); + } + } + + return result.toString(); + } + + @SuppressWarnings("unchecked") + private Map unflattenMap(Map d) { + Map result = new LinkedHashMap<>(); + + for (Map.Entry entry : d.entrySet()) { + String key = entry.getKey(); + Object value = entry.getValue(); + + if (!key.contains(".")) { + result.put(key, value); + continue; + } + + String[] parts = key.split("\\."); + + // Security: Prevent prototype pollution + boolean hasPrototypePollution = false; + for (String part : parts) { + if (part.equals("__proto__") || part.equals("constructor") || part.equals("prototype")) { + hasPrototypePollution = true; + break; + } + } + if (hasPrototypePollution) { + continue; + } + + Map target = result; + + for (int i = 0; i < parts.length - 1; i++) { + String part = parts[i]; + if (!target.containsKey(part)) { + target.put(part, new LinkedHashMap<>()); + } + Object next = target.get(part); + if (next instanceof Map) { + target = (Map) next; + } else { + break; + } + } + + String finalKey = parts[parts.length - 1]; + if (!finalKey.matches("\\d+")) { + target.put(finalKey, value); + } + } + + return result; + } + + @SuppressWarnings("unchecked") + private Object unflatten(Map d) { + return unflattenMap(d); + } + + /** + * Internal class to track table parsing state. + */ + private static class TableInfo { + List cols; + List omittedCols; + List> rows; + int rowIndex; + int expectedRows; + + TableInfo(List cols, List omittedCols, int expectedRows) { + this.cols = cols; + this.omittedCols = omittedCols; + this.rows = new ArrayList<>(); + this.rowIndex = 0; + this.expectedRows = expectedRows; + } + } +} diff --git a/src/main/java/com/zonformat/zon/ZonEncoder.java b/src/main/java/com/zonformat/zon/ZonEncoder.java new file mode 100644 index 0000000..3743180 --- /dev/null +++ b/src/main/java/com/zonformat/zon/ZonEncoder.java @@ -0,0 +1,654 @@ +/* + * ZON Encoder v1.0.5 - Compact Hybrid Format + * + * Copyright (c) 2025 ZON-FORMAT (Roni Bhakta) + * MIT License + */ +package com.zonformat.zon; + +import java.util.*; +import java.util.regex.Pattern; + +/** + * Encoder for converting Java objects to ZON format. + * + *

ZON (Zero Overhead Notation) is a compact, human-readable data format + * optimized for LLM token efficiency. It achieves 35-50% token reduction + * vs JSON through tabular encoding, single-character primitives, and + * intelligent compression while maintaining 100% data fidelity.

+ * + *

Example usage:

+ *
{@code
+ * Map data = new HashMap<>();
+ * data.put("name", "Alice");
+ * data.put("age", 30);
+ * data.put("active", true);
+ * 
+ * String encoded = ZonEncoder.encode(data);
+ * // Output:
+ * // active:T
+ * // age:30
+ * // name:Alice
+ * }
+ */ +public class ZonEncoder { + + private static final Pattern SAFE_STRING_PATTERN = Pattern.compile("^[a-zA-Z0-9_\\-\\.]+$"); + private static final Pattern ISO_DATE_FULL = Pattern.compile("^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(Z|[+-]\\d{2}:\\d{2})$"); + private static final Pattern ISO_DATE_ONLY = Pattern.compile("^\\d{4}-\\d{2}-\\d{2}$"); + private static final Pattern TIME_ONLY = Pattern.compile("^\\d{2}:\\d{2}:\\d{2}$"); + private static final Pattern PURE_INTEGER = Pattern.compile("^-?\\d+$"); + private static final Pattern PURE_DECIMAL = Pattern.compile("^-?\\d+\\.\\d+$"); + private static final Pattern SCIENTIFIC_NOTATION = Pattern.compile("^-?\\d+(\\.\\d+)?[eE][+-]?\\d+$"); + private static final Pattern CONTROL_CHARS = Pattern.compile("[\\x00-\\x1f]"); + + private final int anchorInterval; + private final Set visited = new HashSet<>(); + + /** + * Creates a new ZonEncoder with default settings. + */ + public ZonEncoder() { + this(Constants.DEFAULT_ANCHOR_INTERVAL); + } + + /** + * Creates a new ZonEncoder with custom anchor interval. + * + * @param anchorInterval Anchor interval for large datasets + */ + public ZonEncoder(int anchorInterval) { + this.anchorInterval = anchorInterval; + } + + /** + * Encodes Java data to ZON format. + * + * @param data Data to encode (Map, List, or primitive) + * @return ZON-formatted string + * @throws IllegalArgumentException if circular reference detected + */ + public String encode(Object data) { + visited.clear(); + + if (data == null) { + return "null"; + } + + // Handle primitives at root level + if (isPrimitive(data)) { + return formatValue(data); + } + + // Handle arrays at root level + if (data instanceof List) { + @SuppressWarnings("unchecked") + List list = (List) data; + + if (list.isEmpty()) { + return "[]"; + } + + // Check if it's a uniform array of objects (table candidate) + if (isUniformObjectArray(list)) { + double irregularity = calculateIrregularity(list); + if (irregularity <= 0.6) { + // Use table format + List output = writeTable(list, null); + return String.join("\n", output); + } + } + + // Otherwise use list format + return formatZonNode(data); + } + + // Handle maps/objects + if (data instanceof Map) { + @SuppressWarnings("unchecked") + Map map = (Map) data; + + if (map.isEmpty()) { + return ""; + } + + // Extract primary stream (table) from map + Object[] extraction = extractPrimaryStream(map); + @SuppressWarnings("unchecked") + List> streamData = (List>) extraction[0]; + @SuppressWarnings("unchecked") + Map metadata = (Map) extraction[1]; + String streamKey = (String) extraction[2]; + + List output = new ArrayList<>(); + + // Write metadata + if (!metadata.isEmpty()) { + output.addAll(writeMetadata(metadata)); + } + + // Write table if exists + if (streamData != null && streamKey != null) { + if (!output.isEmpty()) { + output.add(""); // Blank line separator + } + output.addAll(writeTable(streamData, streamKey)); + } + + return String.join("\n", output); + } + + // Fallback + return formatZonNode(data); + } + + /** + * Convenience static method to encode data. + * + * @param data Data to encode + * @return ZON-formatted string + */ + public static String encodeStatic(Object data) { + return new ZonEncoder().encode(data); + } + + private boolean isPrimitive(Object data) { + return data == null || + data instanceof String || + data instanceof Number || + data instanceof Boolean; + } + + @SuppressWarnings("unchecked") + private boolean isUniformObjectArray(List list) { + if (list.isEmpty()) return false; + + for (Object item : list) { + if (!(item instanceof Map)) { + return false; + } + } + return true; + } + + @SuppressWarnings("unchecked") + private double calculateIrregularity(List data) { + if (data.isEmpty()) return 0; + + Set allKeys = new HashSet<>(); + List> keySets = new ArrayList<>(); + + for (Object item : data) { + if (item instanceof Map) { + Map map = (Map) item; + Set keys = new HashSet<>(map.keySet()); + keySets.add(keys); + allKeys.addAll(keys); + } + } + + if (allKeys.isEmpty()) return 0; + + // Calculate average Jaccard similarity + double totalOverlap = 0; + int comparisons = 0; + + for (int i = 0; i < keySets.size(); i++) { + for (int j = i + 1; j < keySets.size(); j++) { + Set keys1 = keySets.get(i); + Set keys2 = keySets.get(j); + + // Count shared keys + Set intersection = new HashSet<>(keys1); + intersection.retainAll(keys2); + int shared = intersection.size(); + + // Jaccard similarity + int union = keys1.size() + keys2.size() - shared; + double similarity = union > 0 ? (double) shared / union : 1.0; + + totalOverlap += similarity; + comparisons++; + } + } + + if (comparisons == 0) return 0; + + double avgSimilarity = totalOverlap / comparisons; + return 1.0 - avgSimilarity; + } + + @SuppressWarnings("unchecked") + private Object[] extractPrimaryStream(Map data) { + // Find largest list of objects + List candidates = new ArrayList<>(); + + for (Map.Entry entry : data.entrySet()) { + String key = entry.getKey(); + Object value = entry.getValue(); + + if (value instanceof List) { + List list = (List) value; + if (!list.isEmpty() && list.get(0) instanceof Map) { + // Score = rows * columns + Map first = (Map) list.get(0); + int score = list.size() * first.size(); + candidates.add(new Object[]{key, list, score}); + } + } + } + + if (!candidates.isEmpty()) { + // Sort by score (descending), then alphabetically by key + candidates.sort((a, b) -> { + int scoreCompare = Integer.compare((Integer) b[2], (Integer) a[2]); + if (scoreCompare != 0) return scoreCompare; + return ((String) a[0]).compareTo((String) b[0]); + }); + + String streamKey = (String) candidates.get(0)[0]; + @SuppressWarnings("unchecked") + List> stream = (List>) candidates.get(0)[1]; + + Map meta = new LinkedHashMap<>(); + for (Map.Entry entry : data.entrySet()) { + if (!entry.getKey().equals(streamKey)) { + meta.put(entry.getKey(), entry.getValue()); + } + } + + return new Object[]{stream, meta, streamKey}; + } + + return new Object[]{null, data, null}; + } + + private List writeMetadata(Map metadata) { + List lines = new ArrayList<>(); + + // Flatten top-level objects (depth 1) + Map flattened = flatten(metadata, "", ".", 1, 0); + + List sortedKeys = new ArrayList<>(flattened.keySet()); + Collections.sort(sortedKeys); + + for (String key : sortedKeys) { + Object val = flattened.get(key); + String valStr = formatValue(val); + + // Colon-less syntax for objects/arrays + if (valStr.startsWith("{") || valStr.startsWith("[")) { + lines.add(key + valStr); + } else { + lines.add(key + Constants.META_SEPARATOR + valStr); + } + } + + return lines; + } + + @SuppressWarnings("unchecked") + private List writeTable(List stream, String key) { + if (stream == null || stream.isEmpty()) { + return Collections.emptyList(); + } + + List lines = new ArrayList<>(); + + // Flatten all rows + List> flatStream = new ArrayList<>(); + for (Object row : stream) { + if (row instanceof Map) { + flatStream.add(flatten((Map) row, "", ".", 0, 0)); + } + } + + // Get all column names + Set allKeysSet = new LinkedHashSet<>(); + for (Map row : flatStream) { + allKeysSet.addAll(row.keySet()); + } + List cols = new ArrayList<>(allKeysSet); + Collections.sort(cols); + + // Build header + StringBuilder header = new StringBuilder(); + if (key != null && !key.equals("data")) { + header.append(key).append(Constants.META_SEPARATOR); + } + header.append(Constants.TABLE_MARKER).append("(").append(stream.size()).append(")"); + header.append(Constants.META_SEPARATOR).append(String.join(",", cols)); + lines.add(header.toString()); + + // Write rows + for (Map row : flatStream) { + List tokens = new ArrayList<>(); + for (String col : cols) { + Object val = row.get(col); + if (val == null) { + tokens.add("null"); + } else { + tokens.add(formatValue(val)); + } + } + lines.add(String.join(",", tokens)); + } + + return lines; + } + + @SuppressWarnings("unchecked") + private Map flatten(Map d, String parent, String sep, int maxDepth, int currentDepth) { + checkCircular(d); + + Map result = new LinkedHashMap<>(); + + for (Map.Entry entry : d.entrySet()) { + String k = entry.getKey(); + Object v = entry.getValue(); + String newKey = parent.isEmpty() ? k : parent + sep + k; + + if (v instanceof Map && currentDepth < maxDepth) { + Map nested = flatten((Map) v, newKey, sep, maxDepth, currentDepth + 1); + result.putAll(nested); + } else { + result.put(newKey, v); + } + } + + return result; + } + + private void checkCircular(Object obj) { + if (obj != null && (obj instanceof Map || obj instanceof List)) { + if (visited.contains(System.identityHashCode(obj))) { + throw new IllegalArgumentException("Circular reference detected"); + } + visited.add(System.identityHashCode(obj)); + } + } + + private String formatValue(Object val) { + if (val == null) { + return "null"; + } + + if (val instanceof Boolean) { + return (Boolean) val ? "T" : "F"; + } + + if (val instanceof Number) { + Number num = (Number) val; + + // Handle special values + if (val instanceof Double) { + double d = (Double) val; + if (Double.isNaN(d) || Double.isInfinite(d)) { + return "null"; + } + } + if (val instanceof Float) { + float f = (Float) val; + if (Float.isNaN(f) || Float.isInfinite(f)) { + return "null"; + } + } + + // Check if integer + if (val instanceof Integer || val instanceof Long || val instanceof Short || val instanceof Byte) { + return String.valueOf(num.longValue()); + } + + // Handle floats - avoid scientific notation + double d = num.doubleValue(); + if (d == Math.floor(d) && !Double.isInfinite(d)) { + return String.valueOf((long) d); + } + + String s = String.valueOf(d); + // Avoid scientific notation + if (s.contains("E") || s.contains("e")) { + // Convert to fixed-point + if (Math.abs(d) >= 1) { + s = String.format("%.15f", d).replaceAll("0+$", "").replaceAll("\\.$", ".0"); + } else { + s = String.format("%.15f", d).replaceAll("0+$", ""); + } + } + // Ensure decimal point for floats + if (!s.contains(".")) { + s += ".0"; + } + return s; + } + + if (val instanceof Map || val instanceof List) { + return formatZonNode(val); + } + + // String formatting + String s = String.valueOf(val); + + // Check for newlines - must escape and quote + if (s.contains("\n") || s.contains("\r")) { + // Use JSON-style escaping inside CSV quotes + StringBuilder sb = new StringBuilder("\""); + for (char c : s.toCharArray()) { + switch (c) { + case '\n': sb.append("\\n"); break; + case '\r': sb.append("\\r"); break; + case '\t': sb.append("\\t"); break; + case '\\': sb.append("\\\\"); break; + case '"': sb.append("\"\""); break; // CSV-style quote doubling + default: + if (c < 32) { + sb.append(String.format("\\u%04x", (int) c)); + } else { + sb.append(c); + } + } + } + sb.append("\""); + return sb.toString(); + } + + // ISO Date detection + if (isISODate(s)) { + return s; + } + + // Check if needs type protection (looks like number, boolean, null, etc.) + if (needsTypeProtection(s)) { + // Just use simple CSV quoting + return csvQuote(s); + } + + // Check if needs CSV quoting (contains special chars) + if (needsQuotes(s)) { + return csvQuote(s); + } + + return s; + } + + @SuppressWarnings("unchecked") + private String formatZonNode(Object val) { + checkCircular(val); + + if (val == null) { + return "null"; + } + + if (val instanceof Boolean) { + return (Boolean) val ? "T" : "F"; + } + + if (val instanceof Number) { + return formatValue(val); + } + + if (val instanceof Map) { + Map map = (Map) val; + if (map.isEmpty()) { + return "{}"; + } + + List items = new ArrayList<>(); + List sortedKeys = new ArrayList<>(map.keySet()); + Collections.sort(sortedKeys); + + for (String k : sortedKeys) { + Object v = map.get(k); + String kStr = k; + if (Pattern.compile("[,:{}\\[\\]\"]").matcher(k).find()) { + kStr = jsonEscape(k); + } + + String vStr = formatZonNode(v); + + // Colon-less syntax for nested objects/arrays + if (vStr.startsWith("{") || vStr.startsWith("[")) { + items.add(kStr + vStr); + } else { + items.add(kStr + ":" + vStr); + } + } + + return "{" + String.join(",", items) + "}"; + } + + if (val instanceof List) { + List list = (List) val; + if (list.isEmpty()) { + return "[]"; + } + + List items = new ArrayList<>(); + for (Object item : list) { + items.add(formatZonNode(item)); + } + return "[" + String.join(",", items) + "]"; + } + + // String + String s = String.valueOf(val); + + if (s.contains("\n") || s.contains("\r")) { + return jsonEscape(s); + } + + if (isISODate(s)) { + return s; + } + + if (needsTypeProtection(s)) { + return jsonEscape(s); + } + + if (s.isEmpty() || !s.trim().equals(s)) { + return jsonEscape(s); + } + + if (Pattern.compile("[,{}\\[\\]\"]").matcher(s).find()) { + return jsonEscape(s); + } + + return s; + } + + private boolean isISODate(String s) { + return ISO_DATE_FULL.matcher(s).matches() || + ISO_DATE_ONLY.matcher(s).matches() || + TIME_ONLY.matcher(s).matches(); + } + + private boolean needsTypeProtection(String s) { + String lower = s.toLowerCase(); + + // Reserved words + if (Arrays.asList("t", "f", "true", "false", "null", "none", "nil").contains(lower)) { + return true; + } + + // Gas/Liquid tokens + if (s.equals(Constants.GAS_TOKEN) || s.equals(Constants.LIQUID_TOKEN)) { + return true; + } + + // Leading/trailing whitespace + if (!s.trim().equals(s)) { + return true; + } + + // Control characters + if (CONTROL_CHARS.matcher(s).find()) { + return true; + } + + // Pure numbers + if (PURE_INTEGER.matcher(s).matches() || + PURE_DECIMAL.matcher(s).matches() || + SCIENTIFIC_NOTATION.matcher(s).matches()) { + return true; + } + + return false; + } + + private boolean needsQuotes(String s) { + if (s.isEmpty()) { + return true; + } + + if (Arrays.asList("T", "F", "null", Constants.GAS_TOKEN, Constants.LIQUID_TOKEN).contains(s)) { + return true; + } + + if (PURE_INTEGER.matcher(s).matches()) { + return true; + } + + try { + Double.parseDouble(s); + return true; + } catch (NumberFormatException e) { + // Not a number + } + + if (!s.trim().equals(s)) { + return true; + } + + if (Pattern.compile("[,\\n\\r\\t\"\\[\\]|;]").matcher(s).find()) { + return true; + } + + return false; + } + + private String csvQuote(String s) { + String escaped = s.replace("\"", "\"\""); + return "\"" + escaped + "\""; + } + + private String jsonEscape(String s) { + StringBuilder sb = new StringBuilder("\""); + for (char c : s.toCharArray()) { + switch (c) { + case '"': sb.append("\\\""); break; + case '\\': sb.append("\\\\"); break; + case '\n': sb.append("\\n"); break; + case '\r': sb.append("\\r"); break; + case '\t': sb.append("\\t"); break; + default: + if (c < 32) { + sb.append(String.format("\\u%04x", (int) c)); + } else { + sb.append(c); + } + } + } + sb.append("\""); + return sb.toString(); + } +} diff --git a/src/test/java/com/zonformat/zon/CanonicalNumbersTest.java b/src/test/java/com/zonformat/zon/CanonicalNumbersTest.java new file mode 100644 index 0000000..4615434 --- /dev/null +++ b/src/test/java/com/zonformat/zon/CanonicalNumbersTest.java @@ -0,0 +1,255 @@ +/* + * Canonical Number Formatting Tests + * Port of canonical-numbers.test.ts from the TypeScript implementation + * + * Copyright (c) 2025 ZON-FORMAT (Roni Bhakta) + * MIT License + */ +package com.zonformat.zon; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Nested; + +import java.util.*; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * Tests for canonical number formatting in ZON. + */ +class CanonicalNumbersTest { + + @Nested + @DisplayName("Integer Numbers") + class IntegerNumberTests { + + @Test + @DisplayName("should encode integers without decimal point") + void testIntegerWithoutDecimal() { + Map data = new LinkedHashMap<>(); + data.put("value", 42); + + String encoded = Zon.encode(data); + + assertTrue(encoded.contains("42")); + assertFalse(encoded.contains("42.0")); + } + + @Test + @DisplayName("should handle zero") + void testZero() { + Map data = new LinkedHashMap<>(); + data.put("value", 0); + + String encoded = Zon.encode(data); + + assertTrue(encoded.contains("value:0")); + } + + @Test + @DisplayName("should handle negative integers") + void testNegativeIntegers() { + Map data = new LinkedHashMap<>(); + data.put("value", -123); + + String encoded = Zon.encode(data); + + assertTrue(encoded.contains("-123")); + } + } + + @Nested + @DisplayName("Floating Point Numbers") + class FloatingPointTests { + + @Test + @DisplayName("should encode floats without trailing zeros") + void testFloatWithoutTrailingZeros() { + Map data = new LinkedHashMap<>(); + data.put("value", 3.14); + + String encoded = Zon.encode(data); + + assertTrue(encoded.contains("3.14")); + assertFalse(encoded.contains("3.140000")); + } + + @Test + @DisplayName("should handle very small decimals") + void testVerySmallDecimals() { + Map data = new LinkedHashMap<>(); + data.put("value", 0.001); + + String encoded = Zon.encode(data); + + assertTrue(encoded.contains("0.001")); + assertFalse(encoded.contains("1e-3")); + } + + @Test + @DisplayName("should not use scientific notation for large numbers") + void testLargeNumbersNoScientificNotation() { + Map data = new LinkedHashMap<>(); + data.put("value", 1000000); + + String encoded = Zon.encode(data); + + assertTrue(encoded.contains("1000000")); + assertFalse(encoded.contains("1e6")); + assertFalse(encoded.contains("1e+6")); + } + + @Test + @DisplayName("should handle numbers with many decimal places") + void testManyDecimalPlaces() { + Map data = new LinkedHashMap<>(); + data.put("value", 3.141592653589793); + + String encoded = Zon.encode(data); + + // Should preserve precision + assertTrue(encoded.contains("3.14159265358979")); + // Should not contain scientific notation + assertFalse(encoded.matches(".*\\de[+-]?\\d.*")); + } + } + + @Nested + @DisplayName("Special Values") + class SpecialValuesTests { + + @Test + @DisplayName("should encode NaN as null") + void testNaNAsNull() { + Map data = new LinkedHashMap<>(); + data.put("value", Double.NaN); + + String encoded = Zon.encode(data); + + assertTrue(encoded.contains("value:null")); + } + + @Test + @DisplayName("should encode Infinity as null") + void testInfinityAsNull() { + Map data = new LinkedHashMap<>(); + data.put("value", Double.POSITIVE_INFINITY); + + String encoded = Zon.encode(data); + + assertTrue(encoded.contains("value:null")); + } + + @Test + @DisplayName("should encode -Infinity as null") + void testNegativeInfinityAsNull() { + Map data = new LinkedHashMap<>(); + data.put("value", Double.NEGATIVE_INFINITY); + + String encoded = Zon.encode(data); + + assertTrue(encoded.contains("value:null")); + } + } + + @Nested + @DisplayName("Round-Trip Preservation") + class RoundTripPreservationTests { + + @Test + @DisplayName("should preserve integer values through round-trip") + void testIntegerRoundTrip() { + Map data = new LinkedHashMap<>(); + data.put("value", 42); + + String encoded = Zon.encode(data); + @SuppressWarnings("unchecked") + Map decoded = (Map) Zon.decode(encoded); + + assertEquals(42L, ((Number) decoded.get("value")).longValue()); + } + + @Test + @DisplayName("should preserve float values through round-trip") + void testFloatRoundTrip() { + Map data = new LinkedHashMap<>(); + data.put("value", 3.14); + + String encoded = Zon.encode(data); + @SuppressWarnings("unchecked") + Map decoded = (Map) Zon.decode(encoded); + + assertEquals(3.14, ((Number) decoded.get("value")).doubleValue(), 0.0000000001); + } + + @Test + @DisplayName("should preserve large numbers through round-trip") + void testLargeNumberRoundTrip() { + Map data = new LinkedHashMap<>(); + data.put("value", 1000000); + + String encoded = Zon.encode(data); + @SuppressWarnings("unchecked") + Map decoded = (Map) Zon.decode(encoded); + + assertEquals(1000000L, ((Number) decoded.get("value")).longValue()); + } + + @Test + @DisplayName("should preserve very small numbers through round-trip") + void testVerySmallNumberRoundTrip() { + Map data = new LinkedHashMap<>(); + data.put("value", 0.000001); + + String encoded = Zon.encode(data); + @SuppressWarnings("unchecked") + Map decoded = (Map) Zon.decode(encoded); + + assertEquals(0.000001, ((Number) decoded.get("value")).doubleValue(), 0.0000000001); + } + } + + @Nested + @DisplayName("Array of Numbers") + class ArrayOfNumbersTests { + + @Test + @DisplayName("should format all numbers canonically in arrays") + void testArrayNumbersCanonical() { + List> values = new ArrayList<>(); + + Map v1 = new LinkedHashMap<>(); + v1.put("num", 1000000); + values.add(v1); + + Map v2 = new LinkedHashMap<>(); + v2.put("num", 0.001); + values.add(v2); + + Map v3 = new LinkedHashMap<>(); + v3.put("num", 42); + values.add(v3); + + Map v4 = new LinkedHashMap<>(); + v4.put("num", 3.14); + values.add(v4); + + Map data = new LinkedHashMap<>(); + data.put("values", values); + + String encoded = Zon.encode(data); + + // Should not contain scientific notation + assertFalse(encoded.contains("e+")); + assertFalse(encoded.contains("e-")); + assertFalse(encoded.contains("E")); + + // Should contain actual values + assertTrue(encoded.contains("1000000")); + assertTrue(encoded.contains("0.001")); + assertTrue(encoded.contains("42")); + assertTrue(encoded.contains("3.14")); + } + } +} diff --git a/src/test/java/com/zonformat/zon/ConformanceDecoderTest.java b/src/test/java/com/zonformat/zon/ConformanceDecoderTest.java new file mode 100644 index 0000000..8afb84b --- /dev/null +++ b/src/test/java/com/zonformat/zon/ConformanceDecoderTest.java @@ -0,0 +1,196 @@ +/* + * Conformance Tests - Decoder + * Port of conformance-decoder.test.ts from the TypeScript implementation + * + * Copyright (c) 2025 ZON-FORMAT (Roni Bhakta) + * MIT License + */ +package com.zonformat.zon; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.DisplayName; + +import java.util.*; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * Conformance tests based on SPEC.md §11.2 Decoder Checklist. + */ +class ConformanceDecoderTest { + + @Test + @DisplayName("should accept UTF-8 with LF or CRLF") + void testAcceptsLFandCRLF() { + String zonLF = "key:value\nkey2:value2"; + String zonCRLF = "key:value\r\nkey2:value2"; + + assertDoesNotThrow(() -> Zon.decode(zonLF)); + assertDoesNotThrow(() -> Zon.decode(zonCRLF)); + } + + @Test + @DisplayName("should decode T → true, F → false, null → null") + void testDecodePrimitives() { + String zonData = "active:T\narchived:F\nvalue:null"; + @SuppressWarnings("unchecked") + Map result = (Map) Zon.decode(zonData); + + assertEquals(true, result.get("active")); + assertEquals(false, result.get("archived")); + assertNull(result.get("value")); + } + + @Test + @DisplayName("should parse decimal and exponent numbers") + void testParseNumbers() { + String zonData = "int:42\nfloat:3.14\nbig:1000000"; + @SuppressWarnings("unchecked") + Map result = (Map) Zon.decode(zonData); + + assertEquals(42L, ((Number) result.get("int")).longValue()); + assertEquals(3.14, ((Number) result.get("float")).doubleValue(), 0.0001); + assertEquals(1000000L, ((Number) result.get("big")).longValue()); + } + + @Test + @DisplayName("should treat leading-zero numbers as strings") + void testLeadingZeroAsString() { + String zonData = "code:\"007\""; + @SuppressWarnings("unchecked") + Map result = (Map) Zon.decode(zonData); + + assertEquals("007", result.get("code")); + assertTrue(result.get("code") instanceof String); + } + + @Test + @DisplayName("should unescape quoted strings") + void testUnescapeQuotedStrings() { + String zonData = "text:\"he said \\\"hello\\\"\""; + @SuppressWarnings("unchecked") + Map result = (Map) Zon.decode(zonData); + + assertEquals("he said \"hello\"", result.get("text")); + } + + @Test + @DisplayName("should parse table rows into array of objects") + void testParseTableRows() { + String zonData = "users:@(2):id,name\n1,Alice\n2,Bob"; + @SuppressWarnings("unchecked") + Map result = (Map) Zon.decode(zonData); + + @SuppressWarnings("unchecked") + List> users = (List>) result.get("users"); + + assertEquals(2, users.size()); + assertEquals(1L, ((Number) users.get(0).get("id")).longValue()); + assertEquals("Alice", users.get(0).get("name")); + assertEquals(2L, ((Number) users.get(1).get("id")).longValue()); + assertEquals("Bob", users.get(1).get("name")); + } + + @Test + @DisplayName("should preserve key order from document") + void testPreserveKeyOrder() { + String zonData = "z:1\na:2\nm:3"; + @SuppressWarnings("unchecked") + Map result = (Map) Zon.decode(zonData); + + List keys = new ArrayList<>(result.keySet()); + assertEquals(Arrays.asList("z", "a", "m"), keys); + } + + @Test + @DisplayName("should reject prototype pollution attempts") + void testRejectPrototypePollution() { + String malicious = "items:@(1):id,__proto__.polluted\n1,true"; + Object decoded = Zon.decode(malicious, false); + + // Check that prototype pollution didn't occur + Map testObj = new HashMap<>(); + assertNull(testObj.get("polluted")); + } + + @Test + @DisplayName("should throw on nesting depth > 100") + void testThrowOnDeepNesting() { + StringBuilder deepNested = new StringBuilder(); + for (int i = 0; i < 150; i++) { + deepNested.append("["); + } + for (int i = 0; i < 150; i++) { + deepNested.append("]"); + } + + assertThrows(ZonDecodeError.class, () -> Zon.decode(deepNested.toString())); + } + + @Test + @DisplayName("should throw on line length > 1MB (E302)") + void testThrowOnLineLengthExceeded() { + StringBuilder longLine = new StringBuilder("key:"); + for (int i = 0; i < Constants.MAX_LINE_LENGTH + 1; i++) { + longLine.append("x"); + } + + ZonDecodeError error = assertThrows(ZonDecodeError.class, + () -> Zon.decode(longLine.toString())); + assertTrue(error.getMessage().contains("E302")); + } + + @Test + @DisplayName("should handle case-insensitive null/boolean aliases") + void testCaseInsensitiveAliases() { + String zonData = "a:TRUE\nb:False\nc:NONE\nd:nil"; + @SuppressWarnings("unchecked") + Map result = (Map) Zon.decode(zonData); + + assertEquals(true, result.get("a")); + assertEquals(false, result.get("b")); + assertNull(result.get("c")); + assertNull(result.get("d")); + } + + @Test + @DisplayName("should reconstruct nested objects from dotted keys") + void testReconstructNestedObjects() { + String zonData = "config.db.host:localhost\nconfig.db.port:5432"; + @SuppressWarnings("unchecked") + Map result = (Map) Zon.decode(zonData); + + @SuppressWarnings("unchecked") + Map config = (Map) result.get("config"); + @SuppressWarnings("unchecked") + Map db = (Map) config.get("db"); + + assertEquals("localhost", db.get("host")); + assertEquals(5432L, ((Number) db.get("port")).longValue()); + } + + @Test + @DisplayName("should unwrap pure lists (data key)") + void testUnwrapPureLists() { + String zonData = "data:@(2):id,name\n1,Alice\n2,Bob"; + Object result = Zon.decode(zonData); + + // Should return array directly, not { data: [...] } + assertTrue(result instanceof List); + @SuppressWarnings("unchecked") + List> list = (List>) result; + assertEquals(2, list.size()); + } + + @Test + @DisplayName("should handle empty strings in table cells") + void testEmptyStringsInTableCells() { + String zonData = "users:@(2):id,name\n1,\"\"\n2,Bob"; + @SuppressWarnings("unchecked") + Map result = (Map) Zon.decode(zonData); + + @SuppressWarnings("unchecked") + List> users = (List>) result.get("users"); + assertEquals("", users.get(0).get("name")); + } +} diff --git a/src/test/java/com/zonformat/zon/ConformanceEncoderTest.java b/src/test/java/com/zonformat/zon/ConformanceEncoderTest.java new file mode 100644 index 0000000..1b00fa3 --- /dev/null +++ b/src/test/java/com/zonformat/zon/ConformanceEncoderTest.java @@ -0,0 +1,235 @@ +/* + * Conformance Tests - Encoder + * Port of conformance-encoder.test.ts from the TypeScript implementation + * + * Copyright (c) 2025 ZON-FORMAT (Roni Bhakta) + * MIT License + */ +package com.zonformat.zon; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.DisplayName; + +import java.util.*; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * Conformance tests based on SPEC.md §11.1 Encoder Checklist. + */ +class ConformanceEncoderTest { + + @Test + @DisplayName("should emit UTF-8 with LF line endings") + void testEmitUTF8WithLF() { + Map data = new LinkedHashMap<>(); + data.put("a", 1); + data.put("b", 2); + + String encoded = Zon.encode(data); + + // Should use LF, not CRLF + assertFalse(encoded.contains("\r\n")); + // Should be a string (UTF-8 compatible) + assertNotNull(encoded); + } + + @Test + @DisplayName("should encode booleans as T/F") + void testEncodeBooleans() { + Map data = new LinkedHashMap<>(); + data.put("active", true); + data.put("archived", false); + + String encoded = Zon.encode(data); + + assertTrue(encoded.contains("active:T")); + assertTrue(encoded.contains("archived:F")); + assertFalse(encoded.contains("true")); + assertFalse(encoded.contains("false")); + } + + @Test + @DisplayName("should encode null as 'null'") + void testEncodeNull() { + Map data = new LinkedHashMap<>(); + data.put("value", null); + + String encoded = Zon.encode(data); + + assertTrue(encoded.contains("value:null")); + } + + @Test + @DisplayName("should emit canonical numbers") + void testCanonicalNumbers() { + Map data = new LinkedHashMap<>(); + data.put("int", 42); + data.put("float", 3.14); + data.put("big", 1000000); + + String encoded = Zon.encode(data); + + // No scientific notation + assertTrue(encoded.contains("1000000")); + assertFalse(encoded.contains("1e6")); + assertFalse(encoded.contains("1e+6")); + + // Has decimal for floats + assertTrue(encoded.contains("3.14")); + } + + @Test + @DisplayName("should normalize NaN/Infinity to null") + void testNormalizeSpecialValues() { + Map data = new LinkedHashMap<>(); + data.put("nan", Double.NaN); + data.put("inf", Double.POSITIVE_INFINITY); + data.put("negInf", Double.NEGATIVE_INFINITY); + + String encoded = Zon.encode(data); + + assertTrue(encoded.contains("nan:null")); + assertTrue(encoded.contains("inf:null")); + assertTrue(encoded.contains("negInf:null")); + } + + @Test + @DisplayName("should detect uniform arrays → table format") + void testDetectUniformArrays() { + List> users = new ArrayList<>(); + + Map u1 = new LinkedHashMap<>(); + u1.put("id", 1); + u1.put("name", "Alice"); + users.add(u1); + + Map u2 = new LinkedHashMap<>(); + u2.put("id", 2); + u2.put("name", "Bob"); + users.add(u2); + + Map data = new LinkedHashMap<>(); + data.put("users", users); + + String encoded = Zon.encode(data); + + // Should have table marker + assertTrue(encoded.matches("(?s).*users:@\\(\\d+\\).*")); + assertTrue(encoded.contains("id,name")); + } + + @Test + @DisplayName("should emit table headers with count and columns") + void testTableHeaders() { + List> items = new ArrayList<>(); + + Map i1 = new LinkedHashMap<>(); + i1.put("x", 1); + i1.put("y", 2); + items.add(i1); + + Map i2 = new LinkedHashMap<>(); + i2.put("x", 3); + i2.put("y", 4); + items.add(i2); + + Map i3 = new LinkedHashMap<>(); + i3.put("x", 5); + i3.put("y", 6); + items.add(i3); + + Map data = new LinkedHashMap<>(); + data.put("items", items); + + String encoded = Zon.encode(data); + + assertTrue(encoded.contains("items:@(3):")); + } + + @Test + @DisplayName("should sort columns alphabetically") + void testSortColumnsAlphabetically() { + List> records = new ArrayList<>(); + + Map r1 = new LinkedHashMap<>(); + r1.put("z", 1); + r1.put("a", 2); + r1.put("m", 3); + records.add(r1); + + Map data = new LinkedHashMap<>(); + data.put("records", records); + + String encoded = Zon.encode(data); + + // Columns should be sorted: a, m, z + assertTrue(encoded.matches("(?s).*records:@\\(1\\):a,m,z.*")); + } + + @Test + @DisplayName("should quote strings with special characters") + void testQuoteSpecialCharacters() { + Map data = new LinkedHashMap<>(); + data.put("comma", "a,b"); + data.put("colon", "x:y"); + data.put("quote", "say \"hi\""); + + String encoded = Zon.encode(data); + + assertTrue(encoded.contains("\"a,b\"")); + // Colons are allowed unquoted in v2.0.5 + assertTrue(encoded.contains("x:y")); + // Uses quote doubling: " becomes "" + assertTrue(encoded.contains("\"\"hi\"\"")); + } + + @Test + @DisplayName("should escape quotes in strings") + void testEscapeQuotes() { + Map data = new LinkedHashMap<>(); + data.put("text", "he said \"hello\""); + + String encoded = Zon.encode(data); + + // Uses quote doubling + assertTrue(encoded.contains("\"\"hello\"\"")); + } + + @Test + @DisplayName("should produce deterministic output") + void testDeterministicOutput() { + Map data = new LinkedHashMap<>(); + data.put("b", 2); + data.put("a", 1); + data.put("c", 3); + + String encoded1 = Zon.encode(data); + String encoded2 = Zon.encode(data); + + assertEquals(encoded1, encoded2); + } + + @Test + @DisplayName("should handle empty objects") + void testEmptyObjects() { + Map data = new LinkedHashMap<>(); + + String encoded = Zon.encode(data); + + // Empty object is empty string in ZON + assertEquals("", encoded); + } + + @Test + @DisplayName("should handle empty arrays") + void testEmptyArrays() { + Map data = new LinkedHashMap<>(); + data.put("items", new ArrayList<>()); + + String encoded = Zon.encode(data); + + assertNotNull(encoded); + assertTrue(encoded.length() > 0); + } +} diff --git a/src/test/java/com/zonformat/zon/SecurityLimitsTest.java b/src/test/java/com/zonformat/zon/SecurityLimitsTest.java new file mode 100644 index 0000000..16d5055 --- /dev/null +++ b/src/test/java/com/zonformat/zon/SecurityLimitsTest.java @@ -0,0 +1,175 @@ +/* + * Security Limits Tests (DOS Prevention) + * Port of security-limits.test.ts from the TypeScript implementation + * + * Copyright (c) 2025 ZON-FORMAT (Roni Bhakta) + * MIT License + */ +package com.zonformat.zon; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Nested; + +import java.util.*; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * Tests for security limits (DOS prevention). + */ +class SecurityLimitsTest { + + @Nested + @DisplayName("E301: Document Size Limit") + class DocumentSizeLimitTests { + + @Test + @DisplayName("should allow documents under 100MB") + void testAllowDocumentsUnder100MB() { + StringBuilder doc = new StringBuilder(); + for (int i = 0; i < 1000; i++) { + doc.append("test:value\n"); + } + + assertDoesNotThrow(() -> Zon.decode(doc.toString())); + } + + // Note: We don't test > 100MB as it would be too slow/memory intensive + } + + @Nested + @DisplayName("E302: Line Length Limit") + class LineLengthLimitTests { + + @Test + @DisplayName("should throw when line exceeds 1MB") + void testThrowOnLineLengthExceeded() { + StringBuilder longLine = new StringBuilder("key:"); + for (int i = 0; i < Constants.MAX_LINE_LENGTH + 1; i++) { + longLine.append("x"); + } + + ZonDecodeError error = assertThrows(ZonDecodeError.class, + () -> Zon.decode(longLine.toString())); + assertTrue(error.getMessage().contains("Line length exceeds maximum")); + assertEquals("E302", error.getCode()); + } + + @Test + @DisplayName("should allow lines under 1MB") + void testAllowLinesUnder1MB() { + StringBuilder line = new StringBuilder("key:"); + for (int i = 0; i < 1000; i++) { + line.append("x"); + } + + @SuppressWarnings("unchecked") + Map result = (Map) Zon.decode(line.toString()); + assertNotNull(result.get("key")); + } + } + + @Nested + @DisplayName("E303: Array Length Limit") + class ArrayLengthLimitTests { + + @Test + @DisplayName("should have array length limit defined") + void testArrayLengthLimitDefined() { + // The limit exists in implementation at MAX_ARRAY_LENGTH (1M items) + assertEquals(1_000_000, Constants.MAX_ARRAY_LENGTH); + } + } + + @Nested + @DisplayName("E304: Object Key Count Limit") + class ObjectKeyCountLimitTests { + + @Test + @DisplayName("should have object key limit defined") + void testObjectKeyLimitDefined() { + // The limit exists in implementation at MAX_OBJECT_KEYS (100K keys) + assertEquals(100_000, Constants.MAX_OBJECT_KEYS); + } + + @Test + @DisplayName("should allow objects under 100K keys") + void testAllowObjectsUnder100KKeys() { + StringBuilder keys = new StringBuilder("{"); + for (int i = 0; i < 100; i++) { + if (i > 0) keys.append(","); + keys.append("k").append(i).append(":").append(i); + } + keys.append("}"); + + String zonData = "data:\"" + keys.toString() + "\""; + @SuppressWarnings("unchecked") + Map result = (Map) Zon.decode(zonData); + + @SuppressWarnings("unchecked") + Map data = (Map) result.get("data"); + assertEquals(100, data.size()); + } + } + + @Nested + @DisplayName("Nesting Depth Limit") + class NestingDepthLimitTests { + + @Test + @DisplayName("should throw when nesting exceeds 100 levels") + void testThrowOnExcessiveNesting() { + StringBuilder nested = new StringBuilder(); + for (int i = 0; i < 150; i++) { + nested.append("["); + } + for (int i = 0; i < 150; i++) { + nested.append("]"); + } + + assertThrows(ZonDecodeError.class, () -> Zon.decode(nested.toString())); + } + + @Test + @DisplayName("should allow nesting under 100 levels") + void testAllowNestingUnder100Levels() { + StringBuilder nested = new StringBuilder(); + for (int i = 0; i < 50; i++) { + nested.append("["); + } + for (int i = 0; i < 50; i++) { + nested.append("]"); + } + + Object result = Zon.decode(nested.toString()); + assertNotNull(result); + } + } + + @Nested + @DisplayName("Combined Limits") + class CombinedLimitsTests { + + @Test + @DisplayName("should work with normal data within all limits") + void testNormalDataWithinLimits() { + String zonData = "metadata:\"{version:1.0.5,env:prod}\"\nusers:@(3):id,name\n1,Alice\n2,Bob\n3,Carol\ntags:\"[nodejs,typescript,llm]\""; + + @SuppressWarnings("unchecked") + Map result = (Map) Zon.decode(zonData); + + @SuppressWarnings("unchecked") + List> users = (List>) result.get("users"); + assertEquals(3, users.size()); + + @SuppressWarnings("unchecked") + Map metadata = (Map) result.get("metadata"); + assertEquals("1.0.5", metadata.get("version")); + + @SuppressWarnings("unchecked") + List tags = (List) result.get("tags"); + assertEquals(3, tags.size()); + } + } +} diff --git a/src/test/java/com/zonformat/zon/SecurityTest.java b/src/test/java/com/zonformat/zon/SecurityTest.java new file mode 100644 index 0000000..62ae8d2 --- /dev/null +++ b/src/test/java/com/zonformat/zon/SecurityTest.java @@ -0,0 +1,103 @@ +/* + * Security Tests + * Port of security.test.ts from the TypeScript implementation + * + * Copyright (c) 2025 ZON-FORMAT (Roni Bhakta) + * MIT License + */ +package com.zonformat.zon; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Nested; + +import java.util.*; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * Security and robustness tests. + */ +class SecurityTest { + + @Nested + @DisplayName("Prototype Pollution") + class PrototypePollutionTests { + + @Test + @DisplayName("should reject __proto__ keys") + void testRejectProtoKeys() { + String malicious = "items:@(1):id,__proto__.polluted\n1,true"; + Object decoded = Zon.decode(malicious, false); + + // Verify prototype pollution didn't occur + Map testObj = new HashMap<>(); + assertNull(testObj.get("polluted")); + } + + @Test + @DisplayName("should reject constructor.prototype keys") + void testRejectConstructorPrototypeKeys() { + String malicious = "items:@(1):id,constructor.prototype.polluted\n1,true"; + Object decoded = Zon.decode(malicious, false); + + // Verify prototype pollution didn't occur + Map testObj = new HashMap<>(); + assertNull(testObj.get("polluted")); + } + } + + @Nested + @DisplayName("Denial of Service (DoS)") + class DoSTests { + + @Test + @DisplayName("should throw on deep nesting in decoder") + void testThrowOnDeepNesting() { + // Create a deeply nested string: [[[[...]]]] + int depth = 150; + StringBuilder deepZon = new StringBuilder(); + for (int i = 0; i < depth; i++) { + deepZon.append("["); + } + deepZon.append("]"); + for (int i = 0; i < depth - 1; i++) { + deepZon.append("]"); + } + + ZonDecodeError error = assertThrows(ZonDecodeError.class, + () -> Zon.decode(deepZon.toString())); + assertTrue(error.getMessage().contains("Maximum nesting depth exceeded")); + } + } + + @Nested + @DisplayName("Circular References") + class CircularReferenceTests { + + @Test + @DisplayName("should throw on circular reference in encoder") + void testThrowOnCircularReference() { + Map circular = new LinkedHashMap<>(); + circular.put("name", "loop"); + circular.put("self", circular); + + assertThrows(IllegalArgumentException.class, () -> Zon.encode(circular)); + } + + @Test + @DisplayName("should throw on indirect circular reference") + void testThrowOnIndirectCircularReference() { + Map a = new LinkedHashMap<>(); + a.put("name", "a"); + + Map b = new LinkedHashMap<>(); + b.put("name", "b"); + + a.put("next", b); + b.put("next", a); + + assertThrows(IllegalArgumentException.class, () -> Zon.encode(a)); + } + } +} diff --git a/src/test/java/com/zonformat/zon/StrictModeTest.java b/src/test/java/com/zonformat/zon/StrictModeTest.java new file mode 100644 index 0000000..1d56f8c --- /dev/null +++ b/src/test/java/com/zonformat/zon/StrictModeTest.java @@ -0,0 +1,218 @@ +/* + * Strict Mode Validation Tests + * Port of strict-mode.test.ts from the TypeScript implementation + * + * Copyright (c) 2025 ZON-FORMAT (Roni Bhakta) + * MIT License + */ +package com.zonformat.zon; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Nested; + +import java.util.*; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * Tests for strict mode validation. + */ +class StrictModeTest { + + @Nested + @DisplayName("E001: Row Count Mismatch") + class RowCountMismatchTests { + + @Test + @DisplayName("should throw when table has fewer rows than declared (strict mode)") + void testThrowOnFewerRows() { + String zonData = "users:@(3):id,name\n1,Alice\n2,Bob"; + + ZonDecodeError error = assertThrows(ZonDecodeError.class, + () -> Zon.decode(zonData)); + assertTrue(error.getMessage().contains("Row count mismatch")); + assertEquals("E001", error.getCode()); + } + + @Test + @DisplayName("should allow row count mismatch in non-strict mode") + void testAllowMismatchInNonStrictMode() { + String zonData = "users:@(3):id,name\n1,Alice\n2,Bob"; + + @SuppressWarnings("unchecked") + Map result = (Map) Zon.decode(zonData, false); + + @SuppressWarnings("unchecked") + List> users = (List>) result.get("users"); + // Non-strict mode allows fewer rows + assertEquals(2, users.size()); + } + + @Test + @DisplayName("should pass when row count matches (strict mode)") + void testPassWhenRowCountMatches() { + String zonData = "users:@(2):id,name\n1,Alice\n2,Bob"; + + @SuppressWarnings("unchecked") + Map result = (Map) Zon.decode(zonData); + + @SuppressWarnings("unchecked") + List> users = (List>) result.get("users"); + assertEquals(2, users.size()); + assertEquals(1L, ((Number) users.get(0).get("id")).longValue()); + assertEquals("Alice", users.get(0).get("name")); + } + } + + @Nested + @DisplayName("E002: Field Count Mismatch") + class FieldCountMismatchTests { + + @Test + @DisplayName("should throw when row has fewer fields than declared columns (strict mode)") + void testThrowOnFewerFields() { + String zonData = "users:@(2):id,name,role\n1,Alice\n2,Bob,admin"; + + ZonDecodeError error = assertThrows(ZonDecodeError.class, + () -> Zon.decode(zonData)); + assertTrue(error.getMessage().contains("Field count mismatch")); + assertEquals("E002", error.getCode()); + } + + @Test + @DisplayName("should allow missing fields in non-strict mode") + void testAllowMissingFieldsInNonStrictMode() { + String zonData = "users:@(2):id,name,role\n1,Alice\n2,Bob,admin"; + + @SuppressWarnings("unchecked") + Map result = (Map) Zon.decode(zonData, false); + + @SuppressWarnings("unchecked") + List> users = (List>) result.get("users"); + assertEquals(2, users.size()); + assertEquals(1L, ((Number) users.get(0).get("id")).longValue()); + assertEquals("Alice", users.get(0).get("name")); + assertEquals("admin", users.get(1).get("role")); + } + + @Test + @DisplayName("should pass when all rows have correct field count (strict mode)") + void testPassWhenFieldCountMatches() { + String zonData = "users:@(2):id,name,role\n1,Alice,user\n2,Bob,admin"; + + @SuppressWarnings("unchecked") + Map result = (Map) Zon.decode(zonData); + + @SuppressWarnings("unchecked") + List> users = (List>) result.get("users"); + assertEquals(2, users.size()); + assertEquals("user", users.get(0).get("role")); + } + + @Test + @DisplayName("should allow sparse fields even in strict mode") + void testAllowSparseFieldsInStrictMode() { + String zonData = "users:@(2):id,name\n1,Alice,role:admin,score:98\n2,Bob"; + + @SuppressWarnings("unchecked") + Map result = (Map) Zon.decode(zonData); + + @SuppressWarnings("unchecked") + List> users = (List>) result.get("users"); + assertEquals(1L, ((Number) users.get(0).get("id")).longValue()); + assertEquals("Alice", users.get(0).get("name")); + assertEquals("admin", users.get(0).get("role")); + assertEquals(98L, ((Number) users.get(0).get("score")).longValue()); + assertEquals(2L, ((Number) users.get(1).get("id")).longValue()); + assertEquals("Bob", users.get(1).get("name")); + } + } + + @Nested + @DisplayName("Error Details") + class ErrorDetailsTests { + + @Test + @DisplayName("should include error code in error object") + void testErrorCode() { + String zonData = "users:@(2):id,name\n1,Alice"; + + ZonDecodeError error = assertThrows(ZonDecodeError.class, + () -> Zon.decode(zonData)); + assertEquals("E001", error.getCode()); + } + + @Test + @DisplayName("should include context in error message") + void testErrorContext() { + String zonData = "users:@(2):id,name\n1,Alice"; + + ZonDecodeError error = assertThrows(ZonDecodeError.class, + () -> Zon.decode(zonData)); + assertNotNull(error.getContext()); + assertTrue(error.toString().contains("Table: users")); + } + } + + @Nested + @DisplayName("Default Behavior") + class DefaultBehaviorTests { + + @Test + @DisplayName("strict mode should be enabled by default") + void testStrictModeEnabledByDefault() { + String zonData = "users:@(2):id,name\n1,Alice"; + + // Should throw because default is strict: true + assertThrows(ZonDecodeError.class, () -> Zon.decode(zonData)); + } + + @Test + @DisplayName("can explicitly enable strict mode") + void testExplicitStrictMode() { + String zonData = "users:@(2):id,name\n1,Alice"; + + assertThrows(ZonDecodeError.class, () -> Zon.decode(zonData, true)); + } + } + + @Nested + @DisplayName("Complex Scenarios") + class ComplexScenariosTests { + + @Test + @DisplayName("should validate multiple tables independently") + void testMultipleTables() { + String zonData = "users:@(2):id,name\n1,Alice\n2,Bob\nproducts:@(1):id,title\n100,Widget"; + + @SuppressWarnings("unchecked") + Map result = (Map) Zon.decode(zonData); + + @SuppressWarnings("unchecked") + List> users = (List>) result.get("users"); + @SuppressWarnings("unchecked") + List> products = (List>) result.get("products"); + + assertEquals(2, users.size()); + assertEquals(1, products.size()); + } + + @Test + @DisplayName("should work with valid data across multiple tables") + void testValidMultipleTables() { + String zonData = "users:@(2):id,name\n1,Alice\n2,Bob\nproducts:@(2):id,title\n100,Widget\n200,Gadget"; + + @SuppressWarnings("unchecked") + Map result = (Map) Zon.decode(zonData); + + @SuppressWarnings("unchecked") + List> users = (List>) result.get("users"); + @SuppressWarnings("unchecked") + List> products = (List>) result.get("products"); + + assertEquals(2, users.size()); + assertEquals(2, products.size()); + } + } +} diff --git a/src/test/java/com/zonformat/zon/ZonCodecTest.java b/src/test/java/com/zonformat/zon/ZonCodecTest.java new file mode 100644 index 0000000..75622ed --- /dev/null +++ b/src/test/java/com/zonformat/zon/ZonCodecTest.java @@ -0,0 +1,709 @@ +/* + * ZON Codec Tests + * Port of codec.test.ts from the TypeScript implementation + * + * Copyright (c) 2025 ZON-FORMAT (Roni Bhakta) + * MIT License + */ +package com.zonformat.zon; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Nested; + +import java.util.*; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * Comprehensive codec tests for ZON encoding and decoding. + */ +class ZonCodecTest { + + @Nested + @DisplayName("Round-trip tests") + class RoundTripTests { + + @Test + @DisplayName("Empty object") + void testEmptyObject() { + Map data = new LinkedHashMap<>(); + String encoded = Zon.encode(data); + Object decoded = Zon.decode(encoded); + assertTrue(decoded instanceof Map); + assertTrue(((Map) decoded).isEmpty()); + } + + @Test + @DisplayName("Simple metadata") + void testSimpleMetadata() { + Map data = new LinkedHashMap<>(); + data.put("name", "Alice"); + data.put("age", 30); + data.put("active", true); + + String encoded = Zon.encode(data); + @SuppressWarnings("unchecked") + Map decoded = (Map) Zon.decode(encoded); + + assertEquals("Alice", decoded.get("name")); + assertEquals(30L, ((Number) decoded.get("age")).longValue()); + assertEquals(true, decoded.get("active")); + } + + @Test + @DisplayName("Nested object") + void testNestedObject() { + Map profile = new LinkedHashMap<>(); + profile.put("age", 25); + profile.put("city", "NYC"); + + Map user = new LinkedHashMap<>(); + user.put("name", "Bob"); + user.put("profile", profile); + + Map data = new LinkedHashMap<>(); + data.put("user", user); + + String encoded = Zon.encode(data); + @SuppressWarnings("unchecked") + Map decoded = (Map) Zon.decode(encoded); + + assertNotNull(decoded.get("user")); + @SuppressWarnings("unchecked") + Map decodedUser = (Map) decoded.get("user"); + assertEquals("Bob", decodedUser.get("name")); + + @SuppressWarnings("unchecked") + Map decodedProfile = (Map) decodedUser.get("profile"); + assertEquals(25L, ((Number) decodedProfile.get("age")).longValue()); + assertEquals("NYC", decodedProfile.get("city")); + } + + @Test + @DisplayName("Array of objects (table)") + void testArrayOfObjects() { + List> data = new ArrayList<>(); + + Map item1 = new LinkedHashMap<>(); + item1.put("id", 1); + item1.put("name", "Alice"); + item1.put("score", 95); + data.add(item1); + + Map item2 = new LinkedHashMap<>(); + item2.put("id", 2); + item2.put("name", "Bob"); + item2.put("score", 87); + data.add(item2); + + Map item3 = new LinkedHashMap<>(); + item3.put("id", 3); + item3.put("name", "Charlie"); + item3.put("score", 92); + data.add(item3); + + String encoded = Zon.encode(data); + @SuppressWarnings("unchecked") + List> decoded = (List>) Zon.decode(encoded); + + assertEquals(3, decoded.size()); + assertEquals("Alice", decoded.get(0).get("name")); + assertEquals(87L, ((Number) decoded.get(1).get("score")).longValue()); + } + + @Test + @DisplayName("Mixed metadata and table") + void testMixedMetadataAndTable() { + List> records = new ArrayList<>(); + + Map r1 = new LinkedHashMap<>(); + r1.put("month", "Jan"); + r1.put("sales", 1000); + records.add(r1); + + Map r2 = new LinkedHashMap<>(); + r2.put("month", "Feb"); + r2.put("sales", 1200); + records.add(r2); + + Map data = new LinkedHashMap<>(); + data.put("title", "Sales Report"); + data.put("year", 2024); + data.put("records", records); + + String encoded = Zon.encode(data); + @SuppressWarnings("unchecked") + Map decoded = (Map) Zon.decode(encoded); + + assertEquals("Sales Report", decoded.get("title")); + assertEquals(2024L, ((Number) decoded.get("year")).longValue()); + + @SuppressWarnings("unchecked") + List> decodedRecords = (List>) decoded.get("records"); + assertEquals(2, decodedRecords.size()); + assertEquals("Jan", decodedRecords.get(0).get("month")); + } + + @Test + @DisplayName("Boolean values") + void testBooleanValues() { + List> items = new ArrayList<>(); + + Map i1 = new LinkedHashMap<>(); + i1.put("id", 1); + i1.put("active", true); + items.add(i1); + + Map i2 = new LinkedHashMap<>(); + i2.put("id", 2); + i2.put("active", false); + items.add(i2); + + Map data = new LinkedHashMap<>(); + data.put("success", true); + data.put("error", false); + data.put("items", items); + + String encoded = Zon.encode(data); + @SuppressWarnings("unchecked") + Map decoded = (Map) Zon.decode(encoded); + + assertEquals(true, decoded.get("success")); + assertEquals(false, decoded.get("error")); + + @SuppressWarnings("unchecked") + List> decodedItems = (List>) decoded.get("items"); + assertEquals(true, decodedItems.get(0).get("active")); + assertEquals(false, decodedItems.get(1).get("active")); + } + + @Test + @DisplayName("Null values") + void testNullValues() { + List> items = new ArrayList<>(); + + Map i1 = new LinkedHashMap<>(); + i1.put("id", 1); + i1.put("data", null); + items.add(i1); + + Map i2 = new LinkedHashMap<>(); + i2.put("id", 2); + i2.put("data", "value"); + items.add(i2); + + Map data = new LinkedHashMap<>(); + data.put("name", "Test"); + data.put("value", null); + data.put("items", items); + + String encoded = Zon.encode(data); + @SuppressWarnings("unchecked") + Map decoded = (Map) Zon.decode(encoded); + + assertEquals("Test", decoded.get("name")); + assertNull(decoded.get("value")); + + @SuppressWarnings("unchecked") + List> decodedItems = (List>) decoded.get("items"); + assertNull(decodedItems.get(0).get("data")); + assertEquals("value", decodedItems.get(1).get("data")); + } + + @Test + @DisplayName("Numbers (integers and floats)") + void testNumbers() { + List> items = new ArrayList<>(); + + Map i1 = new LinkedHashMap<>(); + i1.put("id", 1); + i1.put("value", 100); + items.add(i1); + + Map i2 = new LinkedHashMap<>(); + i2.put("id", 2); + i2.put("value", 200.5); + items.add(i2); + + Map data = new LinkedHashMap<>(); + data.put("integer", 42); + data.put("float", 3.14); + data.put("negative", -10); + data.put("negativeFloat", -2.5); + data.put("items", items); + + String encoded = Zon.encode(data); + @SuppressWarnings("unchecked") + Map decoded = (Map) Zon.decode(encoded); + + assertEquals(42L, ((Number) decoded.get("integer")).longValue()); + assertEquals(3.14, ((Number) decoded.get("float")).doubleValue(), 0.001); + assertEquals(-10L, ((Number) decoded.get("negative")).longValue()); + assertEquals(-2.5, ((Number) decoded.get("negativeFloat")).doubleValue(), 0.001); + } + + @Test + @DisplayName("Strings with special characters") + void testStringsWithSpecialCharacters() { + List> items = new ArrayList<>(); + + Map i1 = new LinkedHashMap<>(); + i1.put("id", 1); + i1.put("text", "normal"); + items.add(i1); + + Map i2 = new LinkedHashMap<>(); + i2.put("id", 2); + i2.put("text", "with, comma"); + items.add(i2); + + Map data = new LinkedHashMap<>(); + data.put("plain", "hello"); + data.put("withComma", "hello, world"); + data.put("withQuotes", "say \"hello\""); + data.put("withNewline", "line1\nline2"); + data.put("items", items); + + String encoded = Zon.encode(data); + @SuppressWarnings("unchecked") + Map decoded = (Map) Zon.decode(encoded); + + assertEquals("hello", decoded.get("plain")); + assertEquals("hello, world", decoded.get("withComma")); + assertEquals("say \"hello\"", decoded.get("withQuotes")); + assertEquals("line1\nline2", decoded.get("withNewline")); + } + + @Test + @DisplayName("Empty arrays") + void testEmptyArrays() { + Map nested = new LinkedHashMap<>(); + nested.put("also_empty", new ArrayList<>()); + + Map data = new LinkedHashMap<>(); + data.put("empty", new ArrayList<>()); + data.put("nested", nested); + + String encoded = Zon.encode(data); + @SuppressWarnings("unchecked") + Map decoded = (Map) Zon.decode(encoded); + + assertTrue(decoded.get("empty") instanceof List); + assertTrue(((List) decoded.get("empty")).isEmpty()); + } + + @Test + @DisplayName("Nested arrays in metadata") + void testNestedArraysInMetadata() { + List> items = new ArrayList<>(); + + Map i1 = new LinkedHashMap<>(); + i1.put("id", 1); + i1.put("values", Arrays.asList(10, 20)); + items.add(i1); + + Map i2 = new LinkedHashMap<>(); + i2.put("id", 2); + i2.put("values", Arrays.asList(30, 40)); + items.add(i2); + + Map data = new LinkedHashMap<>(); + data.put("tags", Arrays.asList("javascript", "typescript", "node")); + data.put("matrix", Arrays.asList(Arrays.asList(1, 2), Arrays.asList(3, 4))); + data.put("items", items); + + String encoded = Zon.encode(data); + @SuppressWarnings("unchecked") + Map decoded = (Map) Zon.decode(encoded); + + @SuppressWarnings("unchecked") + List tags = (List) decoded.get("tags"); + assertEquals(3, tags.size()); + assertEquals("javascript", tags.get(0)); + } + + @Test + @DisplayName("Complex nested objects in table cells") + void testComplexNestedObjectsInTableCells() { + List> data = new ArrayList<>(); + + Map meta1 = new LinkedHashMap<>(); + meta1.put("tags", Arrays.asList("a", "b")); + meta1.put("count", 5); + + Map item1 = new LinkedHashMap<>(); + item1.put("id", 1); + item1.put("metadata", meta1); + data.add(item1); + + Map meta2 = new LinkedHashMap<>(); + meta2.put("tags", Arrays.asList("c")); + meta2.put("count", 3); + + Map item2 = new LinkedHashMap<>(); + item2.put("id", 2); + item2.put("metadata", meta2); + data.add(item2); + + String encoded = Zon.encode(data); + @SuppressWarnings("unchecked") + List> decoded = (List>) Zon.decode(encoded); + + assertEquals(2, decoded.size()); + @SuppressWarnings("unchecked") + Map decodedMeta1 = (Map) decoded.get(0).get("metadata"); + assertEquals(5L, ((Number) decodedMeta1.get("count")).longValue()); + } + } + + @Nested + @DisplayName("Hikes example from README") + class HikesExampleTests { + + @Test + @DisplayName("Full hikes example") + void testFullHikesExample() { + // Context + Map context = new LinkedHashMap<>(); + context.put("task", "Our favorite hikes together"); + context.put("location", "Boulder"); + context.put("season", "spring_2025"); + + // Friends + List friends = Arrays.asList("ana", "luis", "sam"); + + // Hikes + List> hikes = new ArrayList<>(); + + Map h1 = new LinkedHashMap<>(); + h1.put("id", 1); + h1.put("name", "Blue Lake Trail"); + h1.put("distanceKm", 7.5); + h1.put("elevationGain", 320); + h1.put("companion", "ana"); + h1.put("wasSunny", true); + hikes.add(h1); + + Map h2 = new LinkedHashMap<>(); + h2.put("id", 2); + h2.put("name", "Ridge Overlook"); + h2.put("distanceKm", 9.2); + h2.put("elevationGain", 540); + h2.put("companion", "luis"); + h2.put("wasSunny", false); + hikes.add(h2); + + Map h3 = new LinkedHashMap<>(); + h3.put("id", 3); + h3.put("name", "Wildflower Loop"); + h3.put("distanceKm", 5.1); + h3.put("elevationGain", 180); + h3.put("companion", "sam"); + h3.put("wasSunny", true); + hikes.add(h3); + + Map data = new LinkedHashMap<>(); + data.put("context", context); + data.put("friends", friends); + data.put("hikes", hikes); + + String encoded = Zon.encode(data); + @SuppressWarnings("unchecked") + Map decoded = (Map) Zon.decode(encoded); + + // Verify structure + assertNotNull(decoded.get("context")); + assertNotNull(decoded.get("friends")); + assertNotNull(decoded.get("hikes")); + + @SuppressWarnings("unchecked") + List> decodedHikes = (List>) decoded.get("hikes"); + assertEquals(3, decodedHikes.size()); + assertEquals("Blue Lake Trail", decodedHikes.get(0).get("name")); + assertEquals(true, decodedHikes.get(0).get("wasSunny")); + assertEquals(false, decodedHikes.get(1).get("wasSunny")); + + // Verify the encoded format structure + assertTrue(encoded.contains("hikes:@(3):")); + assertTrue(encoded.contains("companion,distanceKm,elevationGain,id,name,wasSunny")); + } + } + + @Nested + @DisplayName("Edge cases") + class EdgeCaseTests { + + @Test + @DisplayName("String that looks like a number") + void testStringLooksLikeNumber() { + List> items = new ArrayList<>(); + + Map i1 = new LinkedHashMap<>(); + i1.put("id", 1); + i1.put("code", "001"); + items.add(i1); + + Map i2 = new LinkedHashMap<>(); + i2.put("id", 2); + i2.put("code", "002"); + items.add(i2); + + Map data = new LinkedHashMap<>(); + data.put("stringNumber", "123"); + data.put("actualNumber", 123); + data.put("items", items); + + String encoded = Zon.encode(data); + @SuppressWarnings("unchecked") + Map decoded = (Map) Zon.decode(encoded); + + assertTrue(decoded.get("stringNumber") instanceof String); + assertTrue(decoded.get("actualNumber") instanceof Number); + } + + @Test + @DisplayName("String that looks like boolean") + void testStringLooksLikeBoolean() { + List> items = new ArrayList<>(); + + Map i1 = new LinkedHashMap<>(); + i1.put("id", 1); + i1.put("status", "T"); + items.add(i1); + + Map i2 = new LinkedHashMap<>(); + i2.put("id", 2); + i2.put("status", true); + items.add(i2); + + Map data = new LinkedHashMap<>(); + data.put("stringTrue", "true"); + data.put("actualTrue", true); + data.put("stringFalse", "false"); + data.put("actualFalse", false); + data.put("items", items); + + String encoded = Zon.encode(data); + @SuppressWarnings("unchecked") + Map decoded = (Map) Zon.decode(encoded); + + assertTrue(decoded.get("stringTrue") instanceof String); + assertTrue(decoded.get("actualTrue") instanceof Boolean); + assertTrue(decoded.get("stringFalse") instanceof String); + assertTrue(decoded.get("actualFalse") instanceof Boolean); + } + + @Test + @DisplayName("Empty strings") + void testEmptyStrings() { + List> items = new ArrayList<>(); + + Map i1 = new LinkedHashMap<>(); + i1.put("id", 1); + i1.put("name", ""); + items.add(i1); + + Map i2 = new LinkedHashMap<>(); + i2.put("id", 2); + i2.put("name", "value"); + items.add(i2); + + Map data = new LinkedHashMap<>(); + data.put("empty", ""); + data.put("items", items); + + String encoded = Zon.encode(data); + @SuppressWarnings("unchecked") + Map decoded = (Map) Zon.decode(encoded); + + assertEquals("", decoded.get("empty")); + } + + @Test + @DisplayName("Whitespace preservation") + void testWhitespacePreservation() { + List> items = new ArrayList<>(); + + Map i1 = new LinkedHashMap<>(); + i1.put("id", 1); + i1.put("text", " padded "); + items.add(i1); + + Map data = new LinkedHashMap<>(); + data.put("leading", " space"); + data.put("trailing", "space "); + data.put("both", " both "); + data.put("items", items); + + String encoded = Zon.encode(data); + @SuppressWarnings("unchecked") + Map decoded = (Map) Zon.decode(encoded); + + assertEquals(" space", decoded.get("leading")); + assertEquals("space ", decoded.get("trailing")); + assertEquals(" both ", decoded.get("both")); + } + + @Test + @DisplayName("Very long strings") + void testVeryLongStrings() { + StringBuilder longString = new StringBuilder(); + for (int i = 0; i < 1000; i++) { + longString.append("a"); + } + + List> items = new ArrayList<>(); + Map i1 = new LinkedHashMap<>(); + i1.put("id", 1); + i1.put("text", longString.toString()); + items.add(i1); + + Map data = new LinkedHashMap<>(); + data.put("long", longString.toString()); + data.put("items", items); + + String encoded = Zon.encode(data); + @SuppressWarnings("unchecked") + Map decoded = (Map) Zon.decode(encoded); + + assertEquals(longString.toString(), decoded.get("long")); + } + + @Test + @DisplayName("Large arrays") + void testLargeArrays() { + List> items = new ArrayList<>(); + for (int i = 0; i < 100; i++) { + Map item = new LinkedHashMap<>(); + item.put("id", i + 1); + item.put("name", "Item " + (i + 1)); + item.put("value", i * 10); + items.add(item); + } + + Map data = new LinkedHashMap<>(); + data.put("items", items); + + String encoded = Zon.encode(data); + @SuppressWarnings("unchecked") + Map decoded = (Map) Zon.decode(encoded); + + @SuppressWarnings("unchecked") + List> decodedItems = (List>) decoded.get("items"); + assertEquals(100, decodedItems.size()); + } + + @Test + @DisplayName("Array of primitives") + void testArrayOfPrimitives() { + List data = Arrays.asList("apple", "banana", "cherry"); + + String encoded = Zon.encode(data); + @SuppressWarnings("unchecked") + List decoded = (List) Zon.decode(encoded); + + assertEquals(3, decoded.size()); + assertEquals("apple", decoded.get(0)); + assertEquals("banana", decoded.get(1)); + assertEquals("cherry", decoded.get(2)); + + // Should be encoded as array, not table + assertTrue(encoded.startsWith("[")); + } + + @Test + @DisplayName("Deeply nested objects") + void testDeeplyNestedObjects() { + Map level4 = new LinkedHashMap<>(); + level4.put("value", "deep"); + + Map level3 = new LinkedHashMap<>(); + level3.put("level4", level4); + + Map level2 = new LinkedHashMap<>(); + level2.put("level3", level3); + + Map level1 = new LinkedHashMap<>(); + level1.put("level2", level2); + + Map data = new LinkedHashMap<>(); + data.put("level1", level1); + + String encoded = Zon.encode(data); + @SuppressWarnings("unchecked") + Map decoded = (Map) Zon.decode(encoded); + + assertNotNull(decoded.get("level1")); + } + } + + @Nested + @DisplayName("Data type preservation") + class DataTypePreservationTests { + + @Test + @DisplayName("Integer vs float distinction") + void testIntegerFloatDistinction() { + List> items = new ArrayList<>(); + + Map i1 = new LinkedHashMap<>(); + i1.put("id", 1); + i1.put("intVal", 100); + i1.put("floatVal", 100.5); + items.add(i1); + + Map i2 = new LinkedHashMap<>(); + i2.put("id", 2); + i2.put("intVal", 200); + i2.put("floatVal", 200.0); + items.add(i2); + + Map data = new LinkedHashMap<>(); + data.put("integer", 42); + data.put("float", 42.0); + data.put("explicitFloat", 3.14); + data.put("items", items); + + String encoded = Zon.encode(data); + @SuppressWarnings("unchecked") + Map decoded = (Map) Zon.decode(encoded); + + assertEquals(42L, ((Number) decoded.get("integer")).longValue()); + assertEquals(3.14, ((Number) decoded.get("explicitFloat")).doubleValue(), 0.001); + } + + @Test + @DisplayName("Boolean shorthand T/F") + void testBooleanShorthand() { + List> data = new ArrayList<>(); + + Map i1 = new LinkedHashMap<>(); + i1.put("id", 1); + i1.put("flag", true); + data.add(i1); + + Map i2 = new LinkedHashMap<>(); + i2.put("id", 2); + i2.put("flag", false); + data.add(i2); + + Map i3 = new LinkedHashMap<>(); + i3.put("id", 3); + i3.put("flag", true); + data.add(i3); + + String encoded = Zon.encode(data); + + // Check that booleans are encoded as T/F + assertTrue(encoded.contains(",T") || encoded.contains("T,")); + assertTrue(encoded.contains(",F") || encoded.contains("F,")); + + @SuppressWarnings("unchecked") + List> decoded = (List>) Zon.decode(encoded); + assertEquals(true, decoded.get(0).get("flag")); + assertEquals(false, decoded.get(1).get("flag")); + assertEquals(true, decoded.get(2).get("flag")); + } + } +} From 72d9175312a0e0fb769f7e5279c0b7c08a67dce8 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 30 Nov 2025 14:58:00 +0000 Subject: [PATCH 3/3] Fix code review issues: consistent float formatting and bounds check Co-authored-by: ronibhakta1 <77425964+ronibhakta1@users.noreply.github.com> --- src/main/java/com/zonformat/zon/ZonDecoder.java | 2 +- src/main/java/com/zonformat/zon/ZonEncoder.java | 8 ++------ 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/src/main/java/com/zonformat/zon/ZonDecoder.java b/src/main/java/com/zonformat/zon/ZonDecoder.java index ffb1c32..493eacc 100644 --- a/src/main/java/com/zonformat/zon/ZonDecoder.java +++ b/src/main/java/com/zonformat/zon/ZonDecoder.java @@ -713,7 +713,7 @@ private Object parseJsonString(String s) { case 'r': result.append('\r'); break; case 't': result.append('\t'); break; case 'u': - if (i + 4 < content.length()) { + if (i + 4 <= content.length()) { String hex = content.substring(i + 1, i + 5); result.append((char) Integer.parseInt(hex, 16)); i += 4; diff --git a/src/main/java/com/zonformat/zon/ZonEncoder.java b/src/main/java/com/zonformat/zon/ZonEncoder.java index 3743180..f159030 100644 --- a/src/main/java/com/zonformat/zon/ZonEncoder.java +++ b/src/main/java/com/zonformat/zon/ZonEncoder.java @@ -409,13 +409,9 @@ private String formatValue(Object val) { // Avoid scientific notation if (s.contains("E") || s.contains("e")) { // Convert to fixed-point - if (Math.abs(d) >= 1) { - s = String.format("%.15f", d).replaceAll("0+$", "").replaceAll("\\.$", ".0"); - } else { - s = String.format("%.15f", d).replaceAll("0+$", ""); - } + s = String.format("%.15f", d).replaceAll("0+$", "").replaceAll("\\.$", ".0"); } - // Ensure decimal point for floats + // Ensure decimal point for floats (this only runs for true floats, as integers return earlier) if (!s.contains(".")) { s += ".0"; }