From b20c80083007ac4bd96dbe2511c47e2597ad59f2 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Wed, 10 Dec 2025 21:32:15 -0300 Subject: [PATCH 1/6] docs: Small fixes for ADR-0003 --- AGENTS.md | 4 ++ ...-0003-query-intermediate-representation.md | 47 ++++++++++++------- 2 files changed, 35 insertions(+), 16 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 92b0b5d9..de3f0bcb 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -13,6 +13,10 @@ - **Location**: `docs/adr/` - **Naming**: `ADR-XXXX-short-title-in-kebab-case.md` (`XXXX` is a sequential number). +- **Index**: + - [ADR-0001: Query Parser](docs/adr/ADR-0001-query-parser.md) + - [ADR-0002: Diagnostics System](docs/adr/ADR-0002-diagnostics-system.md) + - [ADR-0003: Query Intermediate Representation](docs/adr/ADR-0003-query-intermediate-representation.md) - **Template**: ```markdown diff --git a/docs/adr/ADR-0003-query-intermediate-representation.md b/docs/adr/ADR-0003-query-intermediate-representation.md index 284c6b75..cb4cf898 100644 --- a/docs/adr/ADR-0003-query-intermediate-representation.md +++ b/docs/adr/ADR-0003-query-intermediate-representation.md @@ -50,14 +50,16 @@ These structures are used by both execution modes. ```rust struct TransitionGraph { transitions: Vec, - data_fields: Vec, // DataFieldId → data field + data_fields: Vec, // DataFieldId → field name + variant_tags: Vec, // VariantTagId → tag name entrypoints: Vec<(String, TransitionId)>, default_entrypoint: TransitionId, } -type TransitionId = usize; // position in transitions array (structural) -type DataFieldId = usize; // index into FieldNames -type RefId = usize; // unique per each named subquery reference (Ref node in the query AST) +type TransitionId = usize; // position in transitions array (structural) +type DataFieldId = usize; // index into data_fields +type VariantTagId = usize; // index into variant_tags +type RefId = usize; // unique per each named subquery reference (Ref node in the query AST) ``` Each named definition has an entry point. The default entry is the last definition. Multiple entry points share the same transition graph. @@ -109,15 +111,15 @@ Navigation variants `Down`/`Up` move the cursor without matching. They enable ne ```rust enum Effect { - StartArray, // push new [] onto container stack - PushElement, // move current value into top array - EndArray, // pop array from stack, becomes current - StartObject, // push new {} onto container stack - EndObject, // pop object from stack, becomes current + StartArray, // push new [] onto container stack + PushElement, // move current value into top array + EndArray, // pop array from stack, becomes current + StartObject, // push new {} onto container stack + EndObject, // pop object from stack, becomes current Field(DataFieldId), // move current value into field on top object - StartVariant(DataFieldId), // push new variant (tagged) onto container stack - EndVariant, // pop variant from stack, becomes current - ToString, // convert current Node value to String (source text) + StartVariant(VariantTagId), // push variant tag onto container stack + EndVariant, // pop variant from stack, wrap current, becomes current + ToString, // convert current Node value to String (source text) } ``` @@ -173,13 +175,13 @@ enum Value<'a> { String(String), // Text values (from @capture :: string) Array(Vec>), // completed array Object(HashMap>), // completed object - Variant(DataFieldId, Box>), // tagged variant (tag + payload) + Variant(VariantTagId, Box>), // tagged variant (tag + payload) } enum Container<'a> { Array(Vec>), // array under construction - Object(HashMap>), // object under construction - Variant(DataFieldId, Box>), // variant under construction + Object(HashMap>), // object under construction + Variant(VariantTagId), // variant tag; EndVariant wraps current value } ``` @@ -201,7 +203,7 @@ Query: ``` Func = (function_declaration name: (identifier) @name - parameters: (parameters (identifier)* @params)) + parameters: (parameters (identifier)* @params :: string)) ``` Input: `function foo(a, b) {}` @@ -233,6 +235,7 @@ Execution trace: | Field("name") | - | [{name: Node(foo)}] | | StartArray | - | [{name:...}, []] | | (match "a") | Node(a) | [{name:...}, []] | +| ToString | String("a") | [{name:...}, []] | | PushElement | - | [{name:...}, [String("a")]] | | (match "b") | Node(b) | [{name:...}, [String("a")]] | | ToString | String("b") | [{name:...}, [String("a")]] | @@ -342,6 +345,18 @@ EndVariant The resulting `Value::Variant` preserves the tag distinct from the payload, preventing name collisions. When serialized to JSON, it flattens to match the documented data model: `{ tag: "A", ...payload }`. +**Constraint: branches must produce objects.** Top-level quantifiers in tagged branches are disallowed: + +``` +// Invalid: branch A has top-level quantifier, produces array not object +[A: (foo (bar) @x)* B: (baz) @y] + +// Valid: wrap quantifier in a sequence with capture +[A: { (foo (bar) @x)* } @items B: (baz) @y] +``` + +Flattening requires object payloads (`{ tag: "A", ...payload }`). Arrays cannot be spread into objects. This constraint is enforced during query validation; the diagnostic suggests wrapping with `{ ... } @name`. + ### Definition References and Recursion When a pattern references another definition (e.g., `(Expr)` inside `Binary`), the IR uses `RefId` to identify the call site. Each `Ref` node in the query AST gets a unique `RefId`, which is preserved through epsilon elimination. From cc1bc89617a401e7d4382a3940970ca8c44e59d6 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Wed, 10 Dec 2025 21:52:12 -0300 Subject: [PATCH 2/6] docs: Separate pre_effects and post_effects in ADR-0003 --- ...-0003-query-intermediate-representation.md | 97 +++++++++++-------- 1 file changed, 57 insertions(+), 40 deletions(-) diff --git a/docs/adr/ADR-0003-query-intermediate-representation.md b/docs/adr/ADR-0003-query-intermediate-representation.md index cb4cf898..b21abda5 100644 --- a/docs/adr/ADR-0003-query-intermediate-representation.md +++ b/docs/adr/ADR-0003-query-intermediate-representation.md @@ -68,12 +68,13 @@ Each named definition has an entry point. The default entry is the last definiti ```rust struct Transition { - matcher: Option, // None = epsilon (no node consumed) - pre_anchored: bool, // must match at current position, no scanning - post_anchored: bool, // after match, cursor must be at last sibling - effects: Vec, // data construction ops emitted on success + matcher: Option, // None = epsilon (no node consumed) + pre_anchored: bool, // must match at current position, no scanning + post_anchored: bool, // after match, cursor must be at last sibling + pre_effects: Vec, // effects before match (consume previous current) + post_effects: Vec, // effects after match (consume new current) ref_marker: Option, // call boundary marker - next: Vec, // successors; order = priority (first = greedy) + next: Vec, // successors; order = priority (first = greedy) } enum RefTransition { @@ -189,12 +190,13 @@ enum Container<'a> { For any given transition, the execution order is strict to ensure data consistency during backtracking: -1. **Match**: Validate node kind/fields. If fail, abort. -2. **Enter**: Push `Frame` with current `builder.watermark()`. -3. **Effects**: Emit new effects (committed tentatively). -4. **Exit**: Pop `Frame` (validate return). +1. **Enter**: Push `Frame` with current `builder.watermark()`. +2. **Pre-Effects**: Emit `pre_effects` (uses previous `current` value). +3. **Match**: Validate node kind/fields. If fail, rollback to watermark and abort. +4. **Post-Effects**: Emit `post_effects` (uses new `current` value). +5. **Exit**: Pop `Frame` (validate return). -This order ensures that if a definition call succeeds, its effects are present. If it fails later, the watermark saved during `Enter` allows rolling back all effects emitted by that definition. +This order ensures correct behavior during epsilon elimination. Pre-effects run before the match overwrites `current`, allowing effects like `PushElement` to be safely merged from preceding epsilon transitions. Post-effects run after, for effects that need the newly matched node. #### Example @@ -208,24 +210,26 @@ Func = (function_declaration Input: `function foo(a, b) {}` -Effect stream: +Effect stream (annotated with pre/post classification): ``` -StartObject - (match "foo") - Field("name") - StartArray - (match "a") - ToString - PushElement - (match "b") - ToString - PushElement - EndArray - Field("params") -EndObject +pre: StartObject + (match "foo") +post: Field("name") +pre: StartArray + (match "a") +post: ToString +post: PushElement + (match "b") +post: ToString +post: PushElement +post: EndArray +post: Field("params") +post: EndObject ``` +Note: In the raw graph, effects live on epsilon transitions between matches. The pre/post classification determines where they land after epsilon elimination. `StartObject` and `StartArray` are pre-effects (setup before matching). `Field`, `PushElement`, `ToString`, and `End*` are post-effects (consume the matched node or finalize containers). + Execution trace: | Effect | current | stack | @@ -304,14 +308,16 @@ Same structure, different `next` order. The first successor has priority. Array construction uses epsilon transitions with effects: ``` -T0: ε + StartArray next: [T1] -T1: ε (branch) next: [T2, T5] // try match or exit +T0: ε + StartArray next: [T1] // pre-effect: setup array +T1: ε (branch) next: [T2, T4] // try match or exit T2: Match(expr) next: [T3] -T3: ε + PushElement next: [T1] // loop back -T4: ε + EndArray next: [T5] -T5: ε + Field("items") next: [...] +T3: ε + PushElement next: [T1] // post-effect: consume matched node +T4: ε + EndArray next: [T5] // post-effect: finalize array +T5: ε + Field("items") next: [...] // post-effect: assign to field ``` +After epsilon elimination, `PushElement` from T3 merges into T2 as a post-effect. `StartArray` from T0 merges into T2 as a pre-effect (first iteration only—loop iterations enter from T3, not T0). + Backtracking naturally handles partial arrays: truncating the effect stream removes uncommitted `PushElement` effects. ### Scopes @@ -319,12 +325,14 @@ Backtracking naturally handles partial arrays: truncating the effect stream remo Nested objects from `{...} @name` use `StartObject`/`EndObject` effects: ``` -T0: ε + StartObject next: [T1] +T0: ε + StartObject next: [T1] // pre-effect: setup object T1: ... (sequence contents) next: [T2] -T2: ε + EndObject next: [T3] -T3: ε + Field("name") next: [...] +T2: ε + EndObject next: [T3] // post-effect: finalize object +T3: ε + Field("name") next: [...] // post-effect: assign to field ``` +`StartObject` is a pre-effect (merges forward). `EndObject` and `Field` are post-effects (merge backward onto preceding match). + ### Tagged Alternations Tagged branches use `StartVariant` to create explicit tagged structures. @@ -420,19 +428,28 @@ struct Interpreter<'a> { ### Epsilon Elimination (Optimization) -After initial construction, epsilon transitions can be eliminated by computing epsilon closures: +After initial construction, epsilon transitions can be eliminated by computing epsilon closures. The `pre_effects`/`post_effects` split is essential for correctness here. + +**Why the split matters**: A match transition overwrites `current` with the matched node. Effects from *preceding* epsilon transitions (like `PushElement`) need the *previous* `current` value. Without the split, merging them into a single post-match list would use the wrong value. ``` -Before: -T0: ε + StartArray next: [T1] -T1: ε + Field next: [T2] -T2: Match(kind) next: [T3] +Before (raw graph): +T1: Match(A) next: [T2] // current = A +T2: ε + PushElement next: [T3] // pushes A (correct) +T3: Match(B) next: [...] // current = B -After: -T0': Match(kind) + [StartArray, Field] next: [T3'] +After elimination (with split): +T3': pre: [PushElement], Match(B), post: [] // PushElement runs before Match(B), pushes A ✓ + +Wrong (without split, effects merged as post): +T3': Match(B) + [PushElement] // PushElement runs after Match(B), pushes B ✗ ``` -Effects from eliminated epsilons accumulate on the surviving match transition. This is why `effects` is `Vec` rather than `Option`. +**Accumulation rules**: +- Effects from incoming epsilon paths → accumulate into `pre_effects` +- Effects from outgoing epsilon paths → accumulate into `post_effects` + +This is why both are `Vec` rather than `Option`. **Reference expansion**: For definition references, epsilon elimination propagates `Enter`/`Exit` markers to surviving transitions: From 0049291ca02999796a58bccc022f777866e77193 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Wed, 10 Dec 2025 21:53:56 -0300 Subject: [PATCH 3/6] fix --- docs/adr/ADR-0003-query-intermediate-representation.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/adr/ADR-0003-query-intermediate-representation.md b/docs/adr/ADR-0003-query-intermediate-representation.md index b21abda5..54c4edad 100644 --- a/docs/adr/ADR-0003-query-intermediate-representation.md +++ b/docs/adr/ADR-0003-query-intermediate-representation.md @@ -430,7 +430,7 @@ struct Interpreter<'a> { After initial construction, epsilon transitions can be eliminated by computing epsilon closures. The `pre_effects`/`post_effects` split is essential for correctness here. -**Why the split matters**: A match transition overwrites `current` with the matched node. Effects from *preceding* epsilon transitions (like `PushElement`) need the *previous* `current` value. Without the split, merging them into a single post-match list would use the wrong value. +**Why the split matters**: A match transition overwrites `current` with the matched node. Effects from _preceding_ epsilon transitions (like `PushElement`) need the _previous_ `current` value. Without the split, merging them into a single post-match list would use the wrong value. ``` Before (raw graph): @@ -446,6 +446,7 @@ T3': Match(B) + [PushElement] // PushElement runs after Match( ``` **Accumulation rules**: + - Effects from incoming epsilon paths → accumulate into `pre_effects` - Effects from outgoing epsilon paths → accumulate into `post_effects` From 9e87cb3e0774718d373ec80af76b4b0aea510f23 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Wed, 10 Dec 2025 22:12:38 -0300 Subject: [PATCH 4/6] fixes --- README.md | 10 ++++----- docs/REFERENCE.md | 20 ++++++++--------- ...-0003-query-intermediate-representation.md | 22 +++++++++++-------- 3 files changed, 28 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index da6a4173..0ccb9106 100644 --- a/README.md +++ b/README.md @@ -161,12 +161,12 @@ This produces: ```typescript type Statement = - | { tag: "Assign"; target: string; value: Expression } - | { tag: "Call"; func: string; args: Expression[] }; + | { $tag: "Assign"; target: string; value: Expression } + | { $tag: "Call"; func: string; args: Expression[] }; type Expression = - | { tag: "Ident"; name: string } - | { tag: "Num"; value: string }; + | { $tag: "Ident"; name: string } + | { $tag: "Num"; value: string }; type TopDefinitions = { statements: [Statement, ...Statement[]]; @@ -177,7 +177,7 @@ Then process the results: ```typescript for (const stmt of result.statements) { - switch (stmt.tag) { + switch (stmt.$tag) { case "Assign": console.log(`Assignment to ${stmt.target}`); break; diff --git a/docs/REFERENCE.md b/docs/REFERENCE.md index da3c05a4..e1bc5ab2 100644 --- a/docs/REFERENCE.md +++ b/docs/REFERENCE.md @@ -589,10 +589,10 @@ Labels create a discriminated union: ] @stmt :: Stmt ``` -Output type (discriminant is always `tag`): +Output type (discriminant is always `$tag`): ```typescript -type Stmt = { tag: "Assign"; left: Node } | { tag: "Call"; func: Node }; +type Stmt = { $tag: "Assign"; left: Node } | { $tag: "Call"; func: Node }; ``` In Rust, tagged alternations become enums: @@ -754,8 +754,8 @@ Output type: ```typescript type MemberChain = - | { tag: "Base"; name: Node } - | { tag: "Access"; object: MemberChain; property: Node }; + | { $tag: "Base"; name: Node } + | { $tag: "Access"; object: MemberChain; property: Node }; ``` --- @@ -787,14 +787,14 @@ Output types: ```typescript type Statement = - | { tag: "Assign"; target: string; value: Expression } - | { tag: "Call"; func: string; args: Expression[] } - | { tag: "Return"; value?: Expression }; + | { $tag: "Assign"; target: string; value: Expression } + | { $tag: "Call"; func: string; args: Expression[] } + | { $tag: "Return"; value?: Expression }; type Expression = - | { tag: "Ident"; name: string } - | { tag: "Num"; value: string } - | { tag: "Str"; value: string }; + | { $tag: "Ident"; name: string } + | { $tag: "Num"; value: string } + | { $tag: "Str"; value: string }; type Root = { statements: [Statement, ...Statement[]]; diff --git a/docs/adr/ADR-0003-query-intermediate-representation.md b/docs/adr/ADR-0003-query-intermediate-representation.md index 54c4edad..9a3d3633 100644 --- a/docs/adr/ADR-0003-query-intermediate-representation.md +++ b/docs/adr/ADR-0003-query-intermediate-representation.md @@ -351,19 +351,23 @@ EndObject EndVariant ``` -The resulting `Value::Variant` preserves the tag distinct from the payload, preventing name collisions. When serialized to JSON, it flattens to match the documented data model: `{ tag: "A", ...payload }`. +The resulting `Value::Variant` preserves the tag distinct from the payload, preventing name collisions. -**Constraint: branches must produce objects.** Top-level quantifiers in tagged branches are disallowed: +**JSON serialization** depends on payload type: -``` -// Invalid: branch A has top-level quantifier, produces array not object -[A: (foo (bar) @x)* B: (baz) @y] +- **Object payload**: Flatten fields into the tagged object. + ```json + { "$tag": "A", "x": 1, "y": 2 } + ``` +- **Array/Primitive payload**: Wrap in a `content` field. + ```json + { "$tag": "A", "content": [1, 2, 3] } + { "$tag": "B", "content": "foo" } + ``` -// Valid: wrap quantifier in a sequence with capture -[A: { (foo (bar) @x)* } @items B: (baz) @y] -``` +The `$tag` key avoids collisions with user-defined `@tag` captures. -Flattening requires object payloads (`{ tag: "A", ...payload }`). Arrays cannot be spread into objects. This constraint is enforced during query validation; the diagnostic suggests wrapping with `{ ... } @name`. +This mirrors Rust's serde adjacently-tagged representation and remains fully readable for LLMs. No query validation restriction—all payload types are valid. ### Definition References and Recursion From d68691142fca65de3c40db8fc18e047edb0e1080 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Wed, 10 Dec 2025 22:20:42 -0300 Subject: [PATCH 5/6] Update ADR-0003 with revised tagged variant representation Add `$data` field for array/primitive payloads instead of `content` --- docs/adr/ADR-0003-query-intermediate-representation.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/adr/ADR-0003-query-intermediate-representation.md b/docs/adr/ADR-0003-query-intermediate-representation.md index 9a3d3633..4d96f868 100644 --- a/docs/adr/ADR-0003-query-intermediate-representation.md +++ b/docs/adr/ADR-0003-query-intermediate-representation.md @@ -359,13 +359,13 @@ The resulting `Value::Variant` preserves the tag distinct from the payload, prev ```json { "$tag": "A", "x": 1, "y": 2 } ``` -- **Array/Primitive payload**: Wrap in a `content` field. +- **Array/Primitive payload**: Wrap in a `$data` field. ```json - { "$tag": "A", "content": [1, 2, 3] } - { "$tag": "B", "content": "foo" } + { "$tag": "A", "$data": [1, 2, 3] } + { "$tag": "B", "$data": "foo" } ``` -The `$tag` key avoids collisions with user-defined `@tag` captures. +The `$tag` and `$data` keys avoid collisions with user-defined captures. This mirrors Rust's serde adjacently-tagged representation and remains fully readable for LLMs. No query validation restriction—all payload types are valid. From d45307c957549b8880982508027c7003babb9028 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Wed, 10 Dec 2025 22:28:51 -0300 Subject: [PATCH 6/6] Update documentation for tagged alternation design --- README.md | 14 +++++------ docs/REFERENCE.md | 25 +++++++++++-------- ...-0003-query-intermediate-representation.md | 22 +++++++--------- 3 files changed, 31 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index 0ccb9106..88443a46 100644 --- a/README.md +++ b/README.md @@ -126,7 +126,7 @@ Plotnik extends Tree-sitter's query syntax with: - **Named expressions** for composition and reuse - **Recursion** for arbitrarily nested structures - **Type annotations** for precise output shapes -- **Tagged alternations** for discriminated unions +- **Alternations**: untagged for simplicity, tagged for precision (discriminated unions) ## Use cases @@ -161,12 +161,12 @@ This produces: ```typescript type Statement = - | { $tag: "Assign"; target: string; value: Expression } - | { $tag: "Call"; func: string; args: Expression[] }; + | { $tag: "Assign"; $data: { target: string; value: Expression } } + | { $tag: "Call"; $data: { func: string; args: Expression[] } }; type Expression = - | { $tag: "Ident"; name: string } - | { $tag: "Num"; value: string }; + | { $tag: "Ident"; $data: { name: string } } + | { $tag: "Num"; $data: { value: string } }; type TopDefinitions = { statements: [Statement, ...Statement[]]; @@ -179,10 +179,10 @@ Then process the results: for (const stmt of result.statements) { switch (stmt.$tag) { case "Assign": - console.log(`Assignment to ${stmt.target}`); + console.log(`Assignment to ${stmt.$data.target}`); break; case "Call": - console.log(`Call to ${stmt.func} with ${stmt.args.length} args`); + console.log(`Call to ${stmt.$data.func} with ${stmt.$data.args.length} args`); break; } } diff --git a/docs/REFERENCE.md b/docs/REFERENCE.md index e1bc5ab2..47ad6719 100644 --- a/docs/REFERENCE.md +++ b/docs/REFERENCE.md @@ -492,6 +492,9 @@ interface Section { Match one of several alternatives with `[...]`: +- **Untagged** (no labels): Simpler output, fields merge. Use when you only need the captured data. +- **Tagged** (with labels): Precise discriminated union. Use when you need to know which branch matched. + ``` [ (identifier) @@ -589,10 +592,12 @@ Labels create a discriminated union: ] @stmt :: Stmt ``` -Output type (discriminant is always `$tag`): +Output type (discriminant is always `$tag`, payload in `$data`): ```typescript -type Stmt = { $tag: "Assign"; left: Node } | { $tag: "Call"; func: Node }; +type Stmt = + | { $tag: "Assign"; $data: { left: Node } } + | { $tag: "Call"; $data: { func: Node } }; ``` In Rust, tagged alternations become enums: @@ -754,8 +759,8 @@ Output type: ```typescript type MemberChain = - | { $tag: "Base"; name: Node } - | { $tag: "Access"; object: MemberChain; property: Node }; + | { $tag: "Base"; $data: { name: Node } } + | { $tag: "Access"; $data: { object: MemberChain; property: Node } }; ``` --- @@ -787,14 +792,14 @@ Output types: ```typescript type Statement = - | { $tag: "Assign"; target: string; value: Expression } - | { $tag: "Call"; func: string; args: Expression[] } - | { $tag: "Return"; value?: Expression }; + | { $tag: "Assign"; $data: { target: string; value: Expression } } + | { $tag: "Call"; $data: { func: string; args: Expression[] } } + | { $tag: "Return"; $data: { value?: Expression } }; type Expression = - | { $tag: "Ident"; name: string } - | { $tag: "Num"; value: string } - | { $tag: "Str"; value: string }; + | { $tag: "Ident"; $data: { name: string } } + | { $tag: "Num"; $data: { value: string } } + | { $tag: "Str"; $data: { value: string } }; type Root = { statements: [Statement, ...Statement[]]; diff --git a/docs/adr/ADR-0003-query-intermediate-representation.md b/docs/adr/ADR-0003-query-intermediate-representation.md index 4d96f868..5e16db7d 100644 --- a/docs/adr/ADR-0003-query-intermediate-representation.md +++ b/docs/adr/ADR-0003-query-intermediate-representation.md @@ -353,19 +353,15 @@ EndVariant The resulting `Value::Variant` preserves the tag distinct from the payload, preventing name collisions. -**JSON serialization** depends on payload type: - -- **Object payload**: Flatten fields into the tagged object. - ```json - { "$tag": "A", "x": 1, "y": 2 } - ``` -- **Array/Primitive payload**: Wrap in a `$data` field. - ```json - { "$tag": "A", "$data": [1, 2, 3] } - { "$tag": "B", "$data": "foo" } - ``` - -The `$tag` and `$data` keys avoid collisions with user-defined captures. +**JSON serialization** always uses `$data` wrapper for uniformity: + +```json +{ "$tag": "A", "$data": { "x": 1, "y": 2 } } +{ "$tag": "B", "$data": [1, 2, 3] } +{ "$tag": "C", "$data": "foo" } +``` + +The `$tag` and `$data` keys avoid collisions with user-defined captures. Uniform structure simplifies parsing (always access `.$data`) and eliminates conditional flatten-vs-wrap logic. This mirrors Rust's serde adjacently-tagged representation and remains fully readable for LLMs. No query validation restriction—all payload types are valid.