containers · nathan-weinberg · Jul 1, 2025 · Jun 30, 2025 · Jun 30, 2025 · Jun 30, 2025
diff --git a/pyproject.toml b/pyproject.toml
@@ -21,7 +21,7 @@ dependencies = [
     "datasets>=3.6.0",
     "fastapi>=0.115.12",
     "httpx>=0.28.1",
-    "llama-stack==0.2.11",
+    "llama-stack==0.2.13",
     "mcp>=1.9.2",
     "numpy>=2.2.6",
     "openai>=1.82.0",
@@ -31,7 +31,7 @@ dependencies = [
     "psutil>=7.0.0",
     "pydantic>=2.11.5",
     "pymilvus>=2.5.10",
-    "ramalama==0.9.3",
+    "ramalama==0.10.0",
     "requests>=2.32.3",
     "sentence-transformers>=3.0.0",
     "six>=1.17.0",

diff --git a/requirements.txt b/requirements.txt
@@ -14,7 +14,9 @@ aiohttp==3.12.7
 aiosignal==1.3.2
     # via aiohttp
 aiosqlite==0.21.0
-    # via ramalama-stack
+    # via
+    #   llama-stack
+    #   ramalama-stack
 annotated-types==0.7.0
     # via pydantic
 anyio==4.9.0
@@ -27,6 +29,8 @@ anyio==4.9.0
     #   starlette
 argcomplete==3.6.2
     # via ramalama
+asyncpg==0.30.0
+    # via llama-stack
 attrs==25.3.0
     # via
     #   aiohttp
@@ -88,7 +92,9 @@ filelock==3.18.0
     #   torch
     #   transformers
 fire==0.7.0
-    # via llama-stack
+    # via
+    #   llama-stack
+    #   llama-stack-client
 frozenlist==1.6.0
     # via
     #   aiohttp
@@ -153,9 +159,9 @@ jsonschema==4.24.0
     #   llama-stack
 jsonschema-specifications==2025.4.1
     # via jsonschema
-llama-stack==0.2.11
+llama-stack==0.2.13
     # via ramalama-stack
-llama-stack-client==0.2.12
+llama-stack-client==0.2.13
     # via llama-stack
 lxml==5.4.0
     # via blobfile
@@ -238,13 +244,16 @@ opentelemetry-api==1.33.1
 opentelemetry-exporter-otlp-proto-common==1.33.1
     # via opentelemetry-exporter-otlp-proto-http
 opentelemetry-exporter-otlp-proto-http==1.33.1
-    # via ramalama-stack
+    # via
+    #   llama-stack
+    #   ramalama-stack
 opentelemetry-proto==1.33.1
     # via
     #   opentelemetry-exporter-otlp-proto-common
     #   opentelemetry-exporter-otlp-proto-http
 opentelemetry-sdk==1.33.1
     # via
+    #   llama-stack
     #   opentelemetry-exporter-otlp-proto-http
     #   ramalama-stack
 opentelemetry-semantic-conventions==0.54b1
@@ -338,7 +347,7 @@ pyyaml==6.0.2
     #   peft
     #   pyaml
     #   transformers
-ramalama==0.9.3
+ramalama==0.10.0
     # via ramalama-stack
 referencing==0.36.2
     # via
@@ -352,7 +361,7 @@ requests==2.32.3
     # via
     #   datasets
     #   huggingface-hub
-    #   llama-stack
+    #   llama-stack-client
     #   opentelemetry-exporter-otlp-proto-http
     #   ramalama-stack
     #   tiktoken
@@ -382,7 +391,6 @@ sentence-transformers==4.1.0
     # via ramalama-stack
 setuptools==80.9.0
     # via
-    #   llama-stack
     #   pymilvus
     #   torch
     #   triton
@@ -474,6 +482,7 @@ urllib3==2.4.0
     #   requests
 uvicorn==0.34.3
     # via
+    #   llama-stack
     #   mcp
     #   ramalama-stack
 wcwidth==0.2.13

diff --git a/src/ramalama_stack/ramalama-run.yaml b/src/ramalama_stack/ramalama-run.yaml
@@ -12,65 +12,61 @@ apis:
 - tool_runtime
 - vector_io
 providers:
-  inference:
-  - provider_id: ramalama
-    provider_type: remote::ramalama
-    config:
-      url: ${env.RAMALAMA_URL:http://localhost:8080}
-  - provider_id: sentence-transformers
-    provider_type: inline::sentence-transformers
-    config: {}
-  vector_io:
-  - provider_id: milvus
-    provider_type: inline::milvus
-    config:
-      db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ramalama}/milvus_store.db
-  safety:
-  - provider_id: llama-guard
-    provider_type: inline::llama-guard
-    config:
-      excluded_categories: []
+  # all available providers can be found here: https://llama-stack.readthedocs.io/en/latest/providers/index.html
   agents:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ramalama}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ramalama}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ramalama}/responses_store.db
-  telemetry:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      service_name: ${env.OTEL_SERVICE_NAME:llamastack}
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/ramalama}/trace_store.db
-  eval:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ramalama}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ramalama}/responses_store.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ramalama}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ramalama}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ramalama}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ramalama}/localfs_datasetio.db
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ramalama}/meta_reference_eval.db
+  inference:
+  - provider_id: ramalama
+    provider_type: remote::ramalama
+    config:
+      url: ${env.RAMALAMA_URL:=http://localhost:8080}
+  - provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+    config: {}
+  post_training:
+  - provider_id: huggingface
+    provider_type: inline::huggingface
+    config:
+      checkpoint_format: huggingface
+      distributed_backend: null
+      device: cpu
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config:
+      excluded_categories: []
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -81,24 +77,24 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
-  post_training:
-  - provider_id: huggingface
-    provider_type: inline::huggingface
+      openai_api_key: ${env.OPENAI_API_KEY:+}
+  telemetry:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
     config:
-      checkpoint_format: huggingface
-      distributed_backend: null
-      device: cpu
+      service_name: ${env.OTEL_SERVICE_NAME:=llamastack}
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_DB_PATH:=~/.llama/distributions/ramalama}/trace_store.db
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -109,13 +105,22 @@ providers:
   - provider_id: wolfram-alpha
     provider_type: remote::wolfram-alpha
     config:
-      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
+      api_key: ${env.WOLFRAM_ALPHA_API_KEY:+}
+  vector_io:
+  - provider_id: milvus
+    provider_type: inline::milvus
+    config:
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ramalama/milvus.db}
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ramalama/milvus_registry.db}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ramalama}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ramalama}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ramalama}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ramalama}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
@@ -140,4 +145,4 @@ tool_groups:
   provider_id: wolfram-alpha
 server:
   port: 8321
-external_providers_dir: ${env.EXTERNAL_PROVIDERS_DIR:~/.llama/providers.d}
+external_providers_dir: ${env.EXTERNAL_PROVIDERS_DIR:=~/.llama/providers.d}
diff --git a/tests/utils.sh b/tests/utils.sh
@@ -157,7 +157,8 @@ function test_llama_stack_openai_models {
 function test_llama_stack_chat_completion {
   echo "===> test_llama_stack_chat_completion: start"
   nohup uv run llama-stack-client configure --endpoint http://localhost:8321 --api-key none
-  if nohup uv run llama-stack-client inference chat-completion --message "tell me a joke" | grep -q "completion_message"; then
+  resp=$(nohup uv run llama-stack-client inference chat-completion --message "tell me a joke")
+  if echo "$resp" | grep -q "OpenAIChatCompletion"; then
     echo "===> test_llama_stack_chat_completion: pass"
     return
   else