wild-edge · piotrekno1 · Mar 31, 2026 · Mar 21, 2026 · Mar 21, 2026 · Mar 22, 2026
diff --git a/.gitignore b/.gitignore
@@ -213,4 +213,5 @@ marimo/_lsp/
 __marimo__/
 
 # Streamlit
-.streamlit/secrets.toml
+.streamlit/secrets.toml
+internal/
diff --git a/README.md b/README.md
@@ -59,7 +59,6 @@ client = wildedge.init(
 If no DSN is configured, the client becomes a no-op and logs a warning.
 
 `init(...)` is a convenience wrapper for `WildEdge(...)` + `instrument(...)`.
-
 ## Supported integrations
 
 **On-device**

diff --git a/docs/manual-tracking.md b/docs/manual-tracking.md
@@ -215,3 +215,58 @@ handle.feedback(FeedbackType.THUMBS_DOWN)
 ```
 
 `FeedbackType` values: `THUMBS_UP`, `THUMBS_DOWN`.
+
+## Track spans for agentic workflows
+
+Use span events to track non-inference steps like planning, tool calls, retrieval, or memory updates.
+
+```python
+from wildedge.timing import Timer
+
+with Timer() as t:
+    tool_result = call_tool()
+
+client.track_span(
+    kind="tool",
+    name="call_tool",
+    duration_ms=t.elapsed_ms,
+    status="ok",
+    attributes={"tool": "search"},
+)
+```
+
+You can also attach optional correlation fields (`trace_id`, `span_id`,
+`parent_span_id`, `run_id`, `agent_id`, `step_index`, `conversation_id`) to any
+event by passing them into `track_inference`, `track_error`, `track_feedback`,
+or `track_span`. Use `context=` for correlation attributes shared across events.
+
+### Trace context helpers
+
+Use `client.trace()` and `client.span()` to auto-populate correlation fields for
+all events emitted inside the block. `client.span()` times the block and emits a
+span event on exit:
+
+```python
+import wildedge
+from wildedge.timing import Timer
+
+client = wildedge.init()
+handle = client.register_model(my_model, model_id="my-org/my-model")
+
+with client.trace(run_id="run-123", agent_id="agent-1"):
+    with client.span(kind="agent_step", name="plan", step_index=1):
+        with Timer() as t:
+            result = my_model(prompt)
+        handle.track_inference(duration_ms=t.elapsed_ms, input_modality="text", output_modality="generation")
+```
+
+If you need to set correlation fields without emitting a span event, use the
+lower-level `span_context()` directly:
+
+```python
+with client.trace(run_id="run-123", agent_id="agent-1"):
+    with wildedge.span_context(step_index=1):
+        with Timer() as t:
+            result = my_model(prompt)
+        handle.track_inference(duration_ms=t.elapsed_ms, input_modality="text", output_modality="generation")
+```
diff --git a/examples/agentic_example.py b/examples/agentic_example.py
@@ -0,0 +1,181 @@
+# /// script
+# requires-python = ">=3.10"
+# dependencies = ["wildedge-sdk", "openai"]
+#
+# [tool.uv.sources]
+# wildedge-sdk = { path = "..", editable = true }
+# ///
+"""Agentic workflow example with tool use.
+
+Demonstrates WildEdge tracing for a simple agent that:
+  - Runs within a trace (one per agent session)
+  - Wraps each reasoning step in an agent_step span
+  - Wraps each tool call in a tool span
+  - Tracks LLM inference automatically via the OpenAI integration
+
+Run with: uv run agentic_example.py
+Requires: OPENROUTER_API_KEY environment variable. Set WILDEDGE_DSN to send events.
+"""
+
+import json
+import os
+import time
+import uuid
+
+from openai import OpenAI
+
+import wildedge
+
+we = wildedge.init(
+    app_version="1.0.0",
+    integrations="openai",
+)
+
+openai_client = OpenAI(
+    base_url="https://openrouter.ai/api/v1",
+    api_key=os.getenv("OPENROUTER_API_KEY"),
+)
+
+# --- Tools -------------------------------------------------------------------
+
+TOOLS = [
+    {
+        "type": "function",
+        "function": {
+            "name": "get_weather",
+            "description": "Return current weather for a city.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "city": {"type": "string"},
+                },
+                "required": ["city"],
+            },
+        },
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "calculator",
+            "description": "Evaluate a simple arithmetic expression.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "expression": {"type": "string"},
+                },
+                "required": ["expression"],
+            },
+        },
+    },
+]
+
+
+def get_weather(city: str) -> str:
+    # ~150ms to simulate a real weather API call.
+    time.sleep(0.15)
+    return json.dumps({"city": city, "temperature_c": 18, "condition": "partly cloudy"})
+
+
+def calculator(expression: str) -> str:
+    # ~60ms to simulate a remote computation call.
+    time.sleep(0.06)
+    try:
+        result = eval(expression, {"__builtins__": {}})  # noqa: S307
+        return json.dumps({"expression": expression, "result": result})
+    except Exception as e:
+        return json.dumps({"error": str(e)})
+
+
+TOOL_HANDLERS = {
+    "get_weather": get_weather,
+    "calculator": calculator,
+}
+
+
+# --- Agent loop --------------------------------------------------------------
+
+
+def call_tool(name: str, arguments: dict) -> str:
+    with we.span(
+        kind="tool",
+        name=name,
+        input_summary=json.dumps(arguments)[:200],
+    ) as span:
+        result = TOOL_HANDLERS[name](**arguments)
+        span.output_summary = result[:200]
+    return result
+
+
+def retrieve_context(query: str) -> str:
+    """Fetch relevant context from the vector store (~120ms)."""
+    with we.span(
+        kind="retrieval",
+        name="vector_search",
+        input_summary=query[:200],
+    ) as span:
+        time.sleep(0.12)
+        result = f"[context: background knowledge relevant to '{query[:40]}']"
+        span.output_summary = result
+    return result
+
+
+def run_agent(task: str, step_index: int, messages: list) -> str:
+    # Fetch context before the first reasoning step, include it in the user turn.
+    context = retrieve_context(task)
+    messages.append({"role": "user", "content": f"{task}\n\nContext: {context}"})
+
+    while True:
+        with we.span(
+            kind="agent_step",
+            name="reason",
+            step_index=step_index,
+            input_summary=task[:200],
+        ) as span:
+            response = openai_client.chat.completions.create(
+                model="qwen/qwen3.5-flash-02-23",
+                messages=messages,
+                tools=TOOLS,
+                tool_choice="auto",
+                max_tokens=512,
+            )
+            choice = response.choices[0]
+            span.output_summary = choice.finish_reason
+
+        messages.append(choice.message.model_dump(exclude_none=True))
+
+        if choice.finish_reason == "tool_calls":
+            step_index += 1
+            for tool_call in choice.message.tool_calls:
+                arguments = json.loads(tool_call.function.arguments)
+                result = call_tool(tool_call.function.name, arguments)
+                messages.append(
+                    {
+                        "role": "tool",
+                        "tool_call_id": tool_call.id,
+                        "content": result,
+                    }
+                )
+                # Not instrumented: context window update between tool calls (~80ms).
+                # Shows up as a gap stripe in the trace view.
+                time.sleep(0.08)
+        else:
+            return choice.message.content or ""
+
+
+# --- Main --------------------------------------------------------------------
+
+TASKS = [
+    "What's the weather like in Tokyo, and what is 42 * 18?",
+    "Is it warmer in Paris or Berlin right now?",
+]
+
+system_prompt = "You are a helpful assistant. Use tools when needed."
+messages = [{"role": "system", "content": system_prompt}]
+
+with we.trace(agent_id="demo-agent", run_id=str(uuid.uuid4())):
+    for i, task in enumerate(TASKS, start=1):
+        print(f"\nTask {i}: {task}")
+        reply = run_agent(task, step_index=i, messages=messages)
+        print(f"Reply: {reply}")
+
+we.flush()
diff --git a/tests/test_event_serialization.py b/tests/test_event_serialization.py
@@ -4,6 +4,7 @@
 from wildedge.events.inference import InferenceEvent, TextInputMeta
 from wildedge.events.model_download import AdapterDownload, ModelDownloadEvent
 from wildedge.events.model_load import AdapterLoad, ModelLoadEvent
+from wildedge.events.span import SpanEvent
 
 
 def test_inference_event_to_dict_omits_none_fields():
@@ -72,3 +73,44 @@ def test_feedback_event_enum_and_string_forms():
     )
     assert enum_event.to_dict()["feedback"]["feedback_type"] == "accept"
     assert string_event.to_dict()["feedback"]["feedback_type"] == "reject"
+
+
+def test_span_event_to_dict_includes_required_fields():
+    event = SpanEvent(
+        kind="tool",
+        name="search",
+        duration_ms=250,
+        status="ok",
+        attributes={"provider": "custom"},
+    )
+    data = event.to_dict()
+    assert data["event_type"] == "span"
+    assert data["span"]["kind"] == "tool"
+    assert data["span"]["attributes"]["provider"] == "custom"
+
+
+def test_span_event_context_serializes_under_context_key():
+    event = SpanEvent(
+        kind="agent_step",
+        name="plan",
+        duration_ms=10,
+        status="ok",
+        context={"user_id": "u1"},
+    )
+    data = event.to_dict()
+    assert data["context"] == {"user_id": "u1"}
+    assert "attributes" not in data
+
+
+def test_span_event_attributes_and_context_are_independent():
+    event = SpanEvent(
+        kind="tool",
+        name="search",
+        duration_ms=50,
+        status="ok",
+        attributes={"provider": "custom"},
+        context={"user_id": "u1"},
+    )
+    data = event.to_dict()
+    assert data["span"]["attributes"] == {"provider": "custom"}
+    assert data["context"] == {"user_id": "u1"}