Skip to content
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -213,4 +213,5 @@ marimo/_lsp/
__marimo__/

# Streamlit
.streamlit/secrets.toml
.streamlit/secrets.toml
internal/
10 changes: 9 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ client = wildedge.init(
If no DSN is configured, the client becomes a no-op and logs a warning.

`init(...)` is a convenience wrapper for `WildEdge(...)` + `instrument(...)`.

## Supported integrations

**On-device**
Expand Down Expand Up @@ -105,6 +104,15 @@ For unsupported frameworks, see [Manual tracking](https://github.com/wild-edge/w

For advanced options (batching, queue tuning, dead-letter storage), see [Configuration](https://github.com/wild-edge/wildedge-python/blob/main/docs/configuration.md).

## Projects using this SDK

| Name | Link |
|---|---|
| agntr | [github.com/pmaciolek/agntr](https://github.com/pmaciolek/agntr) |
| *(your project here)* | - |

Using WildEdge in your project? Open a PR to add it to the list.

## Privacy

Report security & priact issues to: wildedge@googlegroups.com.
Expand Down
55 changes: 55 additions & 0 deletions docs/manual-tracking.md
Original file line number Diff line number Diff line change
Expand Up @@ -215,3 +215,58 @@ handle.feedback(FeedbackType.THUMBS_DOWN)
```

`FeedbackType` values: `THUMBS_UP`, `THUMBS_DOWN`.

## Track spans for agentic workflows

Use span events to track non-inference steps like planning, tool calls, retrieval, or memory updates.

```python
from wildedge.timing import Timer

with Timer() as t:
tool_result = call_tool()

client.track_span(
kind="tool",
name="call_tool",
duration_ms=t.elapsed_ms,
status="ok",
attributes={"tool": "search"},
)
```

You can also attach optional correlation fields (`trace_id`, `span_id`,
`parent_span_id`, `run_id`, `agent_id`, `step_index`, `conversation_id`) to any
event by passing them into `track_inference`, `track_error`, `track_feedback`,
or `track_span`. Use `context=` for correlation attributes shared across events.

### Trace context helpers

Use `client.trace()` and `client.span()` to auto-populate correlation fields for
all events emitted inside the block. `client.span()` times the block and emits a
span event on exit:

```python
import wildedge
from wildedge.timing import Timer

client = wildedge.init()
handle = client.register_model(my_model, model_id="my-org/my-model")

with client.trace(run_id="run-123", agent_id="agent-1"):
with client.span(kind="agent_step", name="plan", step_index=1):
with Timer() as t:
result = my_model(prompt)
handle.track_inference(duration_ms=t.elapsed_ms, input_modality="text", output_modality="generation")
```

If you need to set correlation fields without emitting a span event, use the
lower-level `span_context()` directly:

```python
with client.trace(run_id="run-123", agent_id="agent-1"):
with wildedge.span_context(step_index=1):
with Timer() as t:
result = my_model(prompt)
handle.track_inference(duration_ms=t.elapsed_ms, input_modality="text", output_modality="generation")
```
181 changes: 181 additions & 0 deletions examples/agentic_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
# /// script
# requires-python = ">=3.10"
# dependencies = ["wildedge-sdk", "openai"]
#
# [tool.uv.sources]
# wildedge-sdk = { path = "..", editable = true }
# ///
"""Agentic workflow example with tool use.

Demonstrates WildEdge tracing for a simple agent that:
- Runs within a trace (one per agent session)
- Wraps each reasoning step in an agent_step span
- Wraps each tool call in a tool span
- Tracks LLM inference automatically via the OpenAI integration

Run with: uv run agentic_example.py
Requires: OPENROUTER_API_KEY environment variable. Set WILDEDGE_DSN to send events.
"""

import json
import os
import time
import uuid

from openai import OpenAI

import wildedge

we = wildedge.init(
app_version="1.0.0",
integrations="openai",
)

openai_client = OpenAI(
base_url="https://openrouter.ai/api/v1",
api_key=os.getenv("OPENROUTER_API_KEY"),
)

# --- Tools -------------------------------------------------------------------

TOOLS = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Return current weather for a city.",
"parameters": {
"type": "object",
"properties": {
"city": {"type": "string"},
},
"required": ["city"],
},
},
},
{
"type": "function",
"function": {
"name": "calculator",
"description": "Evaluate a simple arithmetic expression.",
"parameters": {
"type": "object",
"properties": {
"expression": {"type": "string"},
},
"required": ["expression"],
},
},
},
]


def get_weather(city: str) -> str:
# ~150ms to simulate a real weather API call.
time.sleep(0.15)
return json.dumps({"city": city, "temperature_c": 18, "condition": "partly cloudy"})


def calculator(expression: str) -> str:
# ~60ms to simulate a remote computation call.
time.sleep(0.06)
try:
result = eval(expression, {"__builtins__": {}}) # noqa: S307
return json.dumps({"expression": expression, "result": result})
except Exception as e:
return json.dumps({"error": str(e)})


TOOL_HANDLERS = {
"get_weather": get_weather,
"calculator": calculator,
}


# --- Agent loop --------------------------------------------------------------


def call_tool(name: str, arguments: dict) -> str:
with we.span(
kind="tool",
name=name,
input_summary=json.dumps(arguments)[:200],
) as span:
result = TOOL_HANDLERS[name](**arguments)
span.output_summary = result[:200]
return result


def retrieve_context(query: str) -> str:
"""Fetch relevant context from the vector store (~120ms)."""
with we.span(
kind="retrieval",
name="vector_search",
input_summary=query[:200],
) as span:
time.sleep(0.12)
result = f"[context: background knowledge relevant to '{query[:40]}']"
span.output_summary = result
return result


def run_agent(task: str, step_index: int, messages: list) -> str:
# Fetch context before the first reasoning step, include it in the user turn.
context = retrieve_context(task)
messages.append({"role": "user", "content": f"{task}\n\nContext: {context}"})

while True:
with we.span(
kind="agent_step",
name="reason",
step_index=step_index,
input_summary=task[:200],
) as span:
response = openai_client.chat.completions.create(
model="qwen/qwen3.5-flash-02-23",
messages=messages,
tools=TOOLS,
tool_choice="auto",
max_tokens=512,
)
choice = response.choices[0]
span.output_summary = choice.finish_reason

messages.append(choice.message.model_dump(exclude_none=True))

if choice.finish_reason == "tool_calls":
step_index += 1
for tool_call in choice.message.tool_calls:
arguments = json.loads(tool_call.function.arguments)
result = call_tool(tool_call.function.name, arguments)
messages.append(
{
"role": "tool",
"tool_call_id": tool_call.id,
"content": result,
}
)
# Not instrumented: context window update between tool calls (~80ms).
# Shows up as a gap stripe in the trace view.
time.sleep(0.08)
else:
return choice.message.content or ""


# --- Main --------------------------------------------------------------------

TASKS = [
"What's the weather like in Tokyo, and what is 42 * 18?",
"Is it warmer in Paris or Berlin right now?",
]

system_prompt = "You are a helpful assistant. Use tools when needed."
messages = [{"role": "system", "content": system_prompt}]

with we.trace(agent_id="demo-agent", run_id=str(uuid.uuid4())):
for i, task in enumerate(TASKS, start=1):
print(f"\nTask {i}: {task}")
reply = run_agent(task, step_index=i, messages=messages)
print(f"Reply: {reply}")

we.flush()
9 changes: 6 additions & 3 deletions examples/gguf_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@
]

for prompt in prompts:
result = llm(prompt, max_tokens=128, temperature=0.7)
text = result["choices"][0]["text"].strip()
print(f"Q: {prompt}\nA: {text}\n")
stream = llm(prompt, max_tokens=128, temperature=0.7, stream=True)
print(f"Q: {prompt}\nA: ", end="", flush=True)
for chunk in stream:
token = chunk["choices"][0].get("text", "")
print(token, end="", flush=True)
print("\n")
10 changes: 8 additions & 2 deletions examples/openai_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,19 @@
]

for prompt in prompts:
response = openai_client.chat.completions.create(
stream = openai_client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": prompt}],
temperature=0.7,
max_tokens=256,
stream=True,
stream_options={"include_usage": True},
)
print(f"Q: {prompt}\nA: {response.choices[0].message.content}\n")
print(f"Q: {prompt}\nA: ", end="", flush=True)
for chunk in stream:
if chunk.choices and chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="", flush=True)
print("\n")

client.flush()
print("Done. Events flushed to WildEdge.")
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "wildedge-sdk"
version = "0.1.2"
version = "0.1.3"
description = "On-device ML inference monitoring for Python"
readme = "README.md"
requires-python = ">=3.10"
Expand Down Expand Up @@ -42,6 +42,7 @@ build-backend = "hatchling.build"
[tool.hatch.build]
exclude = [
"/scripts",
"/examples",
]

[tool.hatch.build.targets.wheel]
Expand Down
42 changes: 42 additions & 0 deletions tests/test_event_serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from wildedge.events.inference import InferenceEvent, TextInputMeta
from wildedge.events.model_download import AdapterDownload, ModelDownloadEvent
from wildedge.events.model_load import AdapterLoad, ModelLoadEvent
from wildedge.events.span import SpanEvent


def test_inference_event_to_dict_omits_none_fields():
Expand Down Expand Up @@ -72,3 +73,44 @@ def test_feedback_event_enum_and_string_forms():
)
assert enum_event.to_dict()["feedback"]["feedback_type"] == "accept"
assert string_event.to_dict()["feedback"]["feedback_type"] == "reject"


def test_span_event_to_dict_includes_required_fields():
event = SpanEvent(
kind="tool",
name="search",
duration_ms=250,
status="ok",
attributes={"provider": "custom"},
)
data = event.to_dict()
assert data["event_type"] == "span"
assert data["span"]["kind"] == "tool"
assert data["span"]["attributes"]["provider"] == "custom"


def test_span_event_context_serializes_under_context_key():
event = SpanEvent(
kind="agent_step",
name="plan",
duration_ms=10,
status="ok",
context={"user_id": "u1"},
)
data = event.to_dict()
assert data["context"] == {"user_id": "u1"}
assert "attributes" not in data


def test_span_event_attributes_and_context_are_independent():
event = SpanEvent(
kind="tool",
name="search",
duration_ms=50,
status="ok",
attributes={"provider": "custom"},
context={"user_id": "u1"},
)
data = event.to_dict()
assert data["span"]["attributes"] == {"provider": "custom"}
assert data["context"] == {"user_id": "u1"}
Loading
Loading