Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -213,4 +213,5 @@ marimo/_lsp/
__marimo__/

# Streamlit
.streamlit/secrets.toml
.streamlit/secrets.toml
internal/
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ client = wildedge.init(
If no DSN is configured, the client becomes a no-op and logs a warning.

`init(...)` is a convenience wrapper for `WildEdge(...)` + `instrument(...)`.

## Supported integrations

**On-device**
Expand Down
55 changes: 55 additions & 0 deletions docs/manual-tracking.md
Original file line number Diff line number Diff line change
Expand Up @@ -215,3 +215,58 @@ handle.feedback(FeedbackType.THUMBS_DOWN)
```

`FeedbackType` values: `THUMBS_UP`, `THUMBS_DOWN`.

## Track spans for agentic workflows

Use span events to track non-inference steps like planning, tool calls, retrieval, or memory updates.

```python
from wildedge.timing import Timer

with Timer() as t:
tool_result = call_tool()

client.track_span(
kind="tool",
name="call_tool",
duration_ms=t.elapsed_ms,
status="ok",
attributes={"tool": "search"},
)
```

You can also attach optional correlation fields (`trace_id`, `span_id`,
`parent_span_id`, `run_id`, `agent_id`, `step_index`, `conversation_id`) to any
event by passing them into `track_inference`, `track_error`, `track_feedback`,
or `track_span`. Use `context=` for correlation attributes shared across events.

### Trace context helpers

Use `client.trace()` and `client.span()` to auto-populate correlation fields for
all events emitted inside the block. `client.span()` times the block and emits a
span event on exit:

```python
import wildedge
from wildedge.timing import Timer

client = wildedge.init()
handle = client.register_model(my_model, model_id="my-org/my-model")

with client.trace(run_id="run-123", agent_id="agent-1"):
with client.span(kind="agent_step", name="plan", step_index=1):
with Timer() as t:
result = my_model(prompt)
handle.track_inference(duration_ms=t.elapsed_ms, input_modality="text", output_modality="generation")
```

If you need to set correlation fields without emitting a span event, use the
lower-level `span_context()` directly:

```python
with client.trace(run_id="run-123", agent_id="agent-1"):
with wildedge.span_context(step_index=1):
with Timer() as t:
result = my_model(prompt)
handle.track_inference(duration_ms=t.elapsed_ms, input_modality="text", output_modality="generation")
```
181 changes: 181 additions & 0 deletions examples/agentic_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
# /// script
# requires-python = ">=3.10"
# dependencies = ["wildedge-sdk", "openai"]
#
# [tool.uv.sources]
# wildedge-sdk = { path = "..", editable = true }
# ///
"""Agentic workflow example with tool use.

Demonstrates WildEdge tracing for a simple agent that:
- Runs within a trace (one per agent session)
- Wraps each reasoning step in an agent_step span
- Wraps each tool call in a tool span
- Tracks LLM inference automatically via the OpenAI integration

Run with: uv run agentic_example.py
Requires: OPENROUTER_API_KEY environment variable. Set WILDEDGE_DSN to send events.
"""

import json
import os
import time
import uuid

from openai import OpenAI

import wildedge

we = wildedge.init(
app_version="1.0.0",
integrations="openai",
)

openai_client = OpenAI(
base_url="https://openrouter.ai/api/v1",
api_key=os.getenv("OPENROUTER_API_KEY"),
)

# --- Tools -------------------------------------------------------------------

TOOLS = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Return current weather for a city.",
"parameters": {
"type": "object",
"properties": {
"city": {"type": "string"},
},
"required": ["city"],
},
},
},
{
"type": "function",
"function": {
"name": "calculator",
"description": "Evaluate a simple arithmetic expression.",
"parameters": {
"type": "object",
"properties": {
"expression": {"type": "string"},
},
"required": ["expression"],
},
},
},
]


def get_weather(city: str) -> str:
# ~150ms to simulate a real weather API call.
time.sleep(0.15)
return json.dumps({"city": city, "temperature_c": 18, "condition": "partly cloudy"})


def calculator(expression: str) -> str:
# ~60ms to simulate a remote computation call.
time.sleep(0.06)
try:
result = eval(expression, {"__builtins__": {}}) # noqa: S307
return json.dumps({"expression": expression, "result": result})
except Exception as e:
return json.dumps({"error": str(e)})


TOOL_HANDLERS = {
"get_weather": get_weather,
"calculator": calculator,
}


# --- Agent loop --------------------------------------------------------------


def call_tool(name: str, arguments: dict) -> str:
with we.span(
kind="tool",
name=name,
input_summary=json.dumps(arguments)[:200],
) as span:
result = TOOL_HANDLERS[name](**arguments)
span.output_summary = result[:200]
return result


def retrieve_context(query: str) -> str:
"""Fetch relevant context from the vector store (~120ms)."""
with we.span(
kind="retrieval",
name="vector_search",
input_summary=query[:200],
) as span:
time.sleep(0.12)
result = f"[context: background knowledge relevant to '{query[:40]}']"
span.output_summary = result
return result


def run_agent(task: str, step_index: int, messages: list) -> str:
# Fetch context before the first reasoning step, include it in the user turn.
context = retrieve_context(task)
messages.append({"role": "user", "content": f"{task}\n\nContext: {context}"})

while True:
with we.span(
kind="agent_step",
name="reason",
step_index=step_index,
input_summary=task[:200],
) as span:
response = openai_client.chat.completions.create(
model="qwen/qwen3.5-flash-02-23",
messages=messages,
tools=TOOLS,
tool_choice="auto",
max_tokens=512,
)
choice = response.choices[0]
span.output_summary = choice.finish_reason

messages.append(choice.message.model_dump(exclude_none=True))

if choice.finish_reason == "tool_calls":
step_index += 1
for tool_call in choice.message.tool_calls:
arguments = json.loads(tool_call.function.arguments)
result = call_tool(tool_call.function.name, arguments)
messages.append(
{
"role": "tool",
"tool_call_id": tool_call.id,
"content": result,
}
)
# Not instrumented: context window update between tool calls (~80ms).
# Shows up as a gap stripe in the trace view.
time.sleep(0.08)
else:
return choice.message.content or ""


# --- Main --------------------------------------------------------------------

TASKS = [
"What's the weather like in Tokyo, and what is 42 * 18?",
"Is it warmer in Paris or Berlin right now?",
]

system_prompt = "You are a helpful assistant. Use tools when needed."
messages = [{"role": "system", "content": system_prompt}]

with we.trace(agent_id="demo-agent", run_id=str(uuid.uuid4())):
for i, task in enumerate(TASKS, start=1):
print(f"\nTask {i}: {task}")
reply = run_agent(task, step_index=i, messages=messages)
print(f"Reply: {reply}")

we.flush()
42 changes: 42 additions & 0 deletions tests/test_event_serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from wildedge.events.inference import InferenceEvent, TextInputMeta
from wildedge.events.model_download import AdapterDownload, ModelDownloadEvent
from wildedge.events.model_load import AdapterLoad, ModelLoadEvent
from wildedge.events.span import SpanEvent


def test_inference_event_to_dict_omits_none_fields():
Expand Down Expand Up @@ -72,3 +73,44 @@ def test_feedback_event_enum_and_string_forms():
)
assert enum_event.to_dict()["feedback"]["feedback_type"] == "accept"
assert string_event.to_dict()["feedback"]["feedback_type"] == "reject"


def test_span_event_to_dict_includes_required_fields():
event = SpanEvent(
kind="tool",
name="search",
duration_ms=250,
status="ok",
attributes={"provider": "custom"},
)
data = event.to_dict()
assert data["event_type"] == "span"
assert data["span"]["kind"] == "tool"
assert data["span"]["attributes"]["provider"] == "custom"


def test_span_event_context_serializes_under_context_key():
event = SpanEvent(
kind="agent_step",
name="plan",
duration_ms=10,
status="ok",
context={"user_id": "u1"},
)
data = event.to_dict()
assert data["context"] == {"user_id": "u1"}
assert "attributes" not in data


def test_span_event_attributes_and_context_are_independent():
event = SpanEvent(
kind="tool",
name="search",
duration_ms=50,
status="ok",
attributes={"provider": "custom"},
context={"user_id": "u1"},
)
data = event.to_dict()
assert data["span"]["attributes"] == {"provider": "custom"}
assert data["context"] == {"user_id": "u1"}
Loading
Loading