diff --git a/README.md b/README.md index e6e507b..f66a73f 100644 --- a/README.md +++ b/README.md @@ -63,6 +63,8 @@ As agents write more code, it's important to understand what came from AI versus | Mixed | `mixed` | Human-edited AI output or AI-edited human code | | Unknown | `unknown` | Origin cannot be determined | +Note: Agent Trace does not prescribe a universal threshold for when a contribution becomes `mixed` (e.g., formatting-only edits vs substantial rewrites). Producers should apply a consistent heuristic, and may record details about that heuristic under `metadata`. + --- ## 5. Architecture Overview @@ -135,11 +137,17 @@ The fundamental unit of Agent Trace is the **Trace Record**: "revision": { "type": "string", "description": "Revision identifier (e.g., git commit SHA, jj change ID)" + }, + "repository_url": { + "type": "string", + "format": "uri", + "description": "Optional repository URL (useful for disambiguating revision identifiers outside of repo context)" } } }, "tool": { "type": "object", + "required": ["name"], "properties": { "name": { "type": "string" }, "version": { "type": "string" } @@ -177,6 +185,16 @@ The fundamental unit of Agent Trace is the **Trace Record**: } } }, + "ids": { + "type": "object", + "description": "Opaque correlation identifiers for linking to external logs/artifacts", + "additionalProperties": { + "anyOf": [ + { "type": "string" }, + { "type": "array", "items": { "type": "string" } } + ] + } + }, "conversation": { "type": "object", "required": ["ranges"], @@ -186,6 +204,18 @@ The fundamental unit of Agent Trace is the **Trace Record**: "format": "uri", "description": "URL to look up the conversation that produced this code" }, + "tool": { + "$ref": "#/$defs/tool", + "description": "Tool/IDE that produced ranges in this conversation (overrides TraceRecord.tool)" + }, + "kind": { + "type": "string", + "description": "Contribution modality for this conversation (e.g., 'chat', 'tab_autocomplete', 'inline_edit')" + }, + "ids": { + "$ref": "#/$defs/ids", + "description": "Opaque correlation identifiers for linking this conversation to external logs/transcripts" + }, "contributor": { "$ref": "#/$defs/contributor", "description": "The contributor for ranges in this conversation (can be overridden per-range)" @@ -219,7 +249,11 @@ The fundamental unit of Agent Trace is the **Trace Record**: "end_line": { "type": "integer", "minimum": 1 }, "content_hash": { "type": "string", - "description": "Hash of attributed content for position-independent tracking" + "description": "Hash of attributed content for position-independent tracking (recommended format: ':')" + }, + "ids": { + "$ref": "#/$defs/ids", + "description": "Optional IDs for linking this specific range to a turn/tool call/message" }, "contributor": { "$ref": "#/$defs/contributor", @@ -240,7 +274,8 @@ The fundamental unit of Agent Trace is the **Trace Record**: "timestamp": "2026-01-23T14:30:00Z", "vcs": { "type": "git", - "revision": "a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0" + "revision": "a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0", + "repository_url": "https://github.com/example-org/example-repo" }, "tool": { "name": "cursor", @@ -252,6 +287,8 @@ The fundamental unit of Agent Trace is the **Trace Record**: "conversations": [ { "url": "https://api.cursor.com/v1/conversations/12345", + "kind": "chat", + "ids": { "session_id": "cursor-composer-uuid", "tool_call_ids": ["tc_1", "tc_2"] }, "contributor": { "type": "ai", "model_id": "anthropic/claude-opus-4-5-20251101" @@ -260,7 +297,8 @@ The fundamental unit of Agent Trace is the **Trace Record**: { "start_line": 42, "end_line": 67, - "content_hash": "murmur3:9f2e8a1b" + "content_hash": "sha256:2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae", + "ids": { "turn_id": "turn_12" } } ], "related": [ @@ -343,7 +381,7 @@ Agent Trace supports multiple version control systems through the `vcs` field: ```json // Git -{ "vcs": { "type": "git", "revision": "a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0" } } +{ "vcs": { "type": "git", "revision": "a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0", "repository_url": "https://github.com/example-org/example-repo" } } // Jujutsu (using change ID for stability across rebases) { "vcs": { "type": "jj", "revision": "kkmpptxz" } } @@ -358,6 +396,10 @@ The `revision` field format is VCS-specific: - **jj**: Change ID (stable across amend/rebase operations) - **hg**: Changeset identifier +`vcs.repository_url` is optional. When present, it should identify the repository that the revision belongs to (useful when traces are stored or queried outside the repo). + +Git note: `vcs.revision` is a snapshot at a specific commit. If history is rewritten (rebase/amend/squash/cherry-pick), producers may need to re-emit traces for new revisions, or rely on implementation-specific mapping strategies (often using `content_hash` and/or `metadata`). + ### 6.5 Line Tracking Line numbers in a trace refer to positions at the recorded revision, not current positions. To query ownership of a specific line of code: @@ -368,7 +410,16 @@ Line numbers in a trace refer to positions at the recorded revision, not current ### 6.6 Content Hashes -For tracking attribution across code movement, use content hashes at the range level: +For tracking attribution across code movement, use `content_hash` at the range level. + +`content_hash` is an optional string with the recommended format `:`. + +Recommendations for interoperability: + +- Producers are encouraged to use `sha256` by default. +- Consumers are encouraged to support `sha256`. +- Normalize line endings to `\\n` before hashing. +- Hash the exact attributed content as it exists at the recorded revision (i.e., after any post-processing that is part of the revision such as formatters/linters). ```json { @@ -386,7 +437,7 @@ For tracking attribution across code movement, use content hashes at the range l { "start_line": 10, "end_line": 25, - "content_hash": "murmur3:9f2e8a1b" + "content_hash": "sha256:2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae" } ] } @@ -415,7 +466,16 @@ Format: `provider/model-name` ### 6.8 Linked Resources -Each conversation has a `url` field and optional `related` array for linking to related sub-resources: +Each conversation has a `url` field and optional `related` array for linking to related sub-resources. + +To improve interoperability, the following well-known `related.type` values are recommended when applicable: + +- `transcript` +- `session` +- `prompt_artifact` +- `tool_calls` +- `diff_artifact` +- `inference_artifact` ```json { @@ -436,8 +496,12 @@ Each conversation has a `url` field and optional `related` array for linking to "url": "https://api.example.com/v1/sessions/xyz789" }, { - "type": "prompt", + "type": "prompt_artifact", "url": "https://api.example.com/v1/prompts/def456" + }, + { + "type": "transcript", + "url": "https://api.example.com/v1/conversations/abc123/transcript" } ] } @@ -447,6 +511,43 @@ Each conversation has a `url` field and optional `related` array for linking to } ``` +### 6.9 Conversation Metadata + +In addition to `url`, conversations may include optional fields to improve interoperability across tools: + +- `conversation.tool`: overrides `TraceRecord.tool` for this conversation (useful when a single trace record spans multiple IDEs/agents) +- `conversation.kind`: contribution modality (recommended baseline values: `chat`, `tab_autocomplete`, `inline_edit`, `agent_action`, `human_typing`) +- `conversation.ids`: opaque correlation IDs for linking to provider logs/transcripts +- `range.ids`: opaque correlation IDs for linking a specific line range to a turn/tool call/message within the conversation + +```json +{ + "conversations": [ + { + "kind": "inline_edit", + "tool": { "name": "vscode-copilot-chat" }, + "ids": { "session_id": "sess_123", "request_id": "req_456", "tool_call_ids": ["tc_1", "tc_2"] }, + "contributor": { "type": "ai", "model_id": "openai/gpt-4o" }, + "ranges": [{ "start_line": 10, "end_line": 12, "ids": { "tool_call_id": "tc_2" } }] + } + ] +} +``` + +### 6.10 Trace Lifecycle (Provisional vs Final) + +Implementations vary in when traces are emitted. A recommended model: + +- Provisional (workspace) traces may omit `vcs` when work is uncommitted. +- Final (revision) traces should include `vcs` once a revision boundary exists, and should reflect the state after any post-processing that is part of the revision (formatters, linters, codemods). +- If post-processing occurs, producers may record tools used under `metadata` (e.g., `post_processing_tools`) and compute `content_hash` against the final recorded content. + +### 6.11 Privacy and Durability Considerations + +- `conversation.url` and `related[]` links may require authentication and may not be durable. +- Prompts and tool outputs can contain secrets. Producers should consider redaction and secret scanning before persisting or publishing trace records. +- If durable context is important, store stable artifacts and link them via `related[]` (or omit links when not safe to share). + --- ## 7. Extensibility @@ -524,7 +625,16 @@ This spec intentionally does not define how traces are stored. This could be loc **How should I handle rebases or merge commits?** -We expect to see different implementations in open source. This may influence the spec in the future. We are open to feedback. +Trace records are anchored to `vcs.revision` and should be interpreted as a snapshot at that revision. + +For git, rebases/amends/squashes/cherry-picks create new commit SHAs. Implementations typically handle this by: + +- Re-emitting traces for the new SHAs, or +- Treating traces as snapshot-only and not attempting to carry them across rewrites (optionally relying on `content_hash` and/or vendor `metadata` for mapping). + +For VCSes with stable IDs (e.g., Jujutsu change IDs), prefer using stable identifiers where available. + +We expect to see different implementations in open source. This may influence the spec in the future, and we are open to feedback. **What happens when agents create scripts to write code?** diff --git a/reference/trace-store.ts b/reference/trace-store.ts index c9ccf55..8796d79 100644 --- a/reference/trace-store.ts +++ b/reference/trace-store.ts @@ -6,6 +6,7 @@ export interface Range { start_line: number; end_line: number; content_hash?: string; + ids?: Record; contributor?: { type: "human" | "ai" | "mixed" | "unknown"; model_id?: string; @@ -14,6 +15,9 @@ export interface Range { export interface Conversation { url?: string; + tool?: { name: string; version?: string }; + kind?: string; + ids?: Record; contributor?: { type: "human" | "ai" | "mixed" | "unknown"; model_id?: string; @@ -32,6 +36,7 @@ export type VcsType = "git" | "jj" | "hg" | "svn"; export interface Vcs { type: VcsType; revision: string; + repository_url?: string; } export interface TraceRecord { @@ -68,12 +73,39 @@ export function getToolInfo(): { name: string; version?: string } { export function getVcsInfo(cwd: string): Vcs | undefined { try { const revision = execFileSync("git", ["rev-parse", "HEAD"], { cwd, encoding: "utf-8" }).trim(); - return { type: "git", revision }; + const repository_url = getRepositoryUrl(cwd); + return { type: "git", revision, repository_url }; + } catch { + return undefined; + } +} + +function getRepositoryUrl(cwd: string): string | undefined { + try { + const raw = execFileSync("git", ["config", "--get", "remote.origin.url"], { cwd, encoding: "utf-8" }).trim(); + if (!raw) return undefined; + return normalizeRepositoryUrl(raw); } catch { return undefined; } } +function normalizeRepositoryUrl(raw: string): string | undefined { + // Already a URL/URI with a scheme. + if (/^[a-zA-Z][a-zA-Z0-9+.-]*:\/\//.test(raw)) return raw; + + // Common SCP-like SSH form: git@github.com:org/repo(.git) + const m = raw.match(/^([^@]+)@([^:]+):(.+)$/); + if (m) { + const user = m[1]; + const host = m[2]; + const path = m[3]; + return `ssh://${user}@${host}/${path}`; + } + + return undefined; +} + export function toRelativePath(absolutePath: string, root: string): string { return absolutePath.startsWith(root) ? relative(root, absolutePath) : absolutePath; } @@ -148,7 +180,7 @@ export function createTrace( }; return { - version: "1.0", + version: "0.1.0", id: crypto.randomUUID(), timestamp: new Date().toISOString(), vcs: getVcsInfo(root), diff --git a/schemas.ts b/schemas.ts index a205580..7d7b26c 100644 --- a/schemas.ts +++ b/schemas.ts @@ -12,7 +12,7 @@ export const ContributorTypeSchema = z.enum([ export const ToolSchema = z.object({ name: z.string().describe("Name of the tool that produced the code"), - version: z.string().describe("Version of the tool"), + version: z.string().optional().describe("Version of the tool"), }); export const ContributorSchema = z.object({ @@ -31,6 +31,12 @@ export const RelatedResourceSchema = z.object({ url: z.string().url().describe("URL to the related resource"), }); +export const IdValueSchema = z.union([z.string(), z.array(z.string())]); + +export const IdsSchema = z + .record(IdValueSchema) + .describe("Opaque correlation identifiers for linking to external logs/artifacts"); + export const RangeSchema = z.object({ start_line: z.number().int().min(1).describe("1-indexed start line number"), end_line: z.number().int().min(1).describe("1-indexed end line number"), @@ -38,8 +44,11 @@ export const RangeSchema = z.object({ .string() .optional() .describe( - "Hash of attributed content for position-independent tracking" + "Hash of attributed content for position-independent tracking (recommended format: ':')" ), + ids: IdsSchema.optional().describe( + "Optional IDs for linking this specific range to a turn/tool call/message" + ), contributor: ContributorSchema.optional().describe( "Override contributor for this specific range (e.g., for agent handoffs)" ), @@ -51,6 +60,18 @@ export const ConversationSchema = z.object({ .url() .optional() .describe("URL to look up the conversation that produced this code"), + tool: ToolSchema.optional().describe( + "Tool/IDE that produced ranges in this conversation (overrides TraceRecord.tool)" + ), + kind: z + .string() + .optional() + .describe( + "Contribution modality for this conversation (e.g., 'chat', 'tab_autocomplete', 'inline_edit')" + ), + ids: IdsSchema.optional().describe( + "Opaque correlation identifiers for linking this conversation to external logs/transcripts" + ), contributor: ContributorSchema.optional().describe( "The contributor for ranges in this conversation (can be overridden per-range)" ), @@ -81,13 +102,20 @@ export const VcsSchema = z.object({ .describe( "Revision identifier (e.g., git commit SHA, jj change ID, hg changeset)" ), + repository_url: z + .string() + .url() + .optional() + .describe( + "Optional repository URL (useful for disambiguating revision identifiers outside of repo context)" + ), }); export const TraceRecordSchema = z.object({ version: z .string() .regex(/^[0-9]+\\.[0-9]+\\.[0-9]+$/) - .describe("Agent Trace specification version (e.g., '1.0')"), + .describe("Agent Trace specification version (e.g., '1.0.0')"), id: z.string().uuid().describe("Unique identifier for this trace record"), timestamp: z .string()