diff --git a/inference-platforms/agent.py b/inference-platforms/agent.py new file mode 100644 index 0000000..5c97999 --- /dev/null +++ b/inference-platforms/agent.py @@ -0,0 +1,68 @@ +# run like this: uv run --exact -q --env-file .env agent.py +# /// script +# dependencies = [ +# "openai-agents", +# "httpx", +# "mcp", +# "elastic-opentelemetry", +# "openinference-instrumentation-openai-agents", +# "opentelemetry-instrumentation-httpx", +# "openinference-instrumentation-mcp", +# ] +# /// +# ruff: noqa: E402 +from opentelemetry.instrumentation import auto_instrumentation + +# This must precede any other imports you want to instrument! +auto_instrumentation.initialize() + +import asyncio +import os +from datetime import datetime, timedelta + +from agents import ( + Agent, + OpenAIProvider, + RunConfig, + Runner, + Tool, +) +from agents.mcp import MCPServerStreamableHttp, MCPUtil + + +async def run_agent(tools: list[Tool]): + model_name = os.getenv("AGENT_MODEL", "gpt-5-nano") + model = OpenAIProvider(use_responses=False).get_model(model_name) + agent = Agent( + name="flight-search-agent", + model=model, + tools=tools, + ) + + next_week = (datetime.now() + timedelta(weeks=1)).strftime("%Y-%m-%d") + result = await Runner.run( + starting_agent=agent, + input=f"Give me the best flight from New York to Kota Kinabalu on {next_week}", + run_config=RunConfig(workflow_name="flight search"), + ) + print(result.final_output) + + +async def main(): + mcp_url = os.getenv("MCP_URL", "https://mcp.kiwi.com") + async with MCPServerStreamableHttp( + { + "url": mcp_url, + "timeout": 30.0, + }, + cache_tools_list=True, + client_session_timeout_seconds=60.0, + ) as server: + tools = await server.list_tools() + util = MCPUtil() + tools = [util.to_function_tool(tool, server, False) for tool in tools] + await run_agent(tools) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/inference-platforms/aigw/README.md b/inference-platforms/aigw/README.md index b6e27d2..3428c67 100644 --- a/inference-platforms/aigw/README.md +++ b/inference-platforms/aigw/README.md @@ -1,10 +1,23 @@ # Envoy AI Gateway -This shows how to use [Envoy AI Gateway][docs] to proxy Ollama, accessible via an -OpenAI compatible API. - -Envoy AI Gateway is automatically configured by OpenAI and OpenTelemetry -environment variables read by `aigw run`, such as `OPENAI_API_KEY`. +This shows how to use [Envoy AI Gateway][docs] to proxy LLM and MCP servers, +specifically Ollama and Kiwi flight search. + +Envoy AI Gateway exposes OpenAI and MCP compatible endpoints with configurable +backends. It is automatically configured by OpenAI and OpenTelemetry +environment variables read by `aigw run`, such as `OPENAI_API_KEY`. In the case +of MCP, it uses the canonical JSON format like this: + +```json +{ +"mcpServers": { + "kiwi": { + "type": "http", + "url": "https://mcp.kiwi.com" + } +} +} +``` `aigw run` launches an Envoy proxy to handle requests. OpenTelemetry support for GenAI metrics and traces is handled directly in the `aigw` (go) binary. @@ -33,10 +46,18 @@ docker compose down Once Envoy AI Gateway is running, use [uv][uv] to make an OpenAI request via [chat.py](../chat.py): +### Chat Completion + ```bash OPENAI_BASE_URL=http://localhost:1975/v1 uv run --exact -q --env-file env.local ../chat.py ``` +### MCP Agent + +```bash +OPENAI_BASE_URL=http://localhost:1975/v1 MCP_URL=http://localhost:1975/mcp uv run --exact -q --env-file env.local ../agent.py +``` + ## Notes Here are some constraints about the Envoy AI Gateway implementation: diff --git a/inference-platforms/aigw/docker-compose.yml b/inference-platforms/aigw/docker-compose.yml index 250781c..33ba790 100644 --- a/inference-platforms/aigw/docker-compose.yml +++ b/inference-platforms/aigw/docker-compose.yml @@ -1,3 +1,16 @@ +configs: + # MCP servers configuration for aigw + mcp-config: + content: | + { + "mcpServers": { + "kiwi": { + "type": "http", + "url": "https://mcp.kiwi.com" + } + } + } + services: ollama-pull: image: alpine/ollama @@ -24,7 +37,10 @@ services: - OPENAI_BASE_URL=http://host.docker.internal:11434/v1 - OTEL_EXPORTER_OTLP_ENDPOINT=http://host.docker.internal:4318 ports: - - "1975:1975" # OpenAI compatible endpoint at /v1 - extra_hosts: # localhost:host-gateway trick doesn't work with aigw + - "1975:1975" # OpenAI compatible endpoint at /v1, MCP server at /mcp + configs: + - source: mcp-config + target: /etc/aigw/mcp-servers.json + extra_hosts: # localhost:host-gateway trick doesn't work with aigw - "host.docker.internal:host-gateway" - command: ["run", "/config.yaml"] + command: ["run", "--mcp-config", "/etc/aigw/mcp-servers.json"] diff --git a/inference-platforms/aigw/env.local b/inference-platforms/aigw/env.local index 699753e..5e58ab2 100644 --- a/inference-platforms/aigw/env.local +++ b/inference-platforms/aigw/env.local @@ -1,7 +1,8 @@ # Override default ENV variables for Ollama OPENAI_BASE_URL=http://localhost:11434/v1 OPENAI_API_KEY=unused -CHAT_MODEL=qwen3:0.6B +CHAT_MODEL=qwen3:0.6b +AGENT_MODEL=qwen3:1.7b # OpenTelemetry configuration OTEL_SERVICE_NAME=aigw diff --git a/inference-platforms/chat.py b/inference-platforms/chat.py index b7365e2..bcabd27 100644 --- a/inference-platforms/chat.py +++ b/inference-platforms/chat.py @@ -1,3 +1,4 @@ +# run like this: uv run --exact -q --env-file .env chat.py # /// script # dependencies = [ # "openai", @@ -6,13 +7,16 @@ # "opentelemetry-instrumentation-httpx" # ] # /// +# ruff: noqa: E402 +from opentelemetry.instrumentation import auto_instrumentation + +# This must precede any other imports you want to instrument! +auto_instrumentation.initialize() + import argparse import os import openai -from opentelemetry.instrumentation import auto_instrumentation - -auto_instrumentation.initialize() model = os.getenv("CHAT_MODEL", "gpt-4o-mini")