llama-stack upstream reached version 0.5.1 and ramalama /ramalma-stack is using version 0.2.14
I would be nice to keep things more in sync with the upstream project. But one big change in 0.5.0 is that the openAI API endpoint is now /v1 and not /v1/openai/v1 anymore.
{
"Accelerator": "cuda",
"Config": {
"benchmarks": {},
"provider": {
"openai": {}
},
"settings": {
"config_files": [
"/home/christian/.local/share/uv/tools/ramalama/share/ramalama/ramalama.conf"
]
},
"user": {}
},
"Engine": {
"Info": {
"host": {
"arch": "amd64",
"buildahVersion": "1.43.0",
"cgroupControllers": [
"memory",
"pids"
],
"cgroupManager": "systemd",
"cgroupVersion": "v2",
"conmon": {
"package": "conmon-1:2.2.1-1.1",
"path": "/usr/bin/conmon",
"version": "conmon version 2.2.1, commit: c8cc2c4db27531bd4e084ce7857f73cd21ee639d"
},
"cpuUtilization": {
"idlePercent": 89.83,
"systemPercent": 3.23,
"userPercent": 6.94
},
"cpus": 8,
"databaseBackend": "sqlite",
"distribution": {
"distribution": "cachyos",
"version": "unknown"
},
"eventLogger": "journald",
"freeLocks": 2029,
"hostname": "kafka",
"idMappings": {
"gidmap": [
{
"container_id": 0,
"host_id": 1000,
"size": 1
},
{
"container_id": 1,
"host_id": 100000,
"size": 65536
}
],
"uidmap": [
{
"container_id": 0,
"host_id": 1000,
"size": 1
},
{
"container_id": 1,
"host_id": 100000,
"size": 65536
}
]
},
"kernel": "6.18.13-2-cachyos-lts",
"linkmode": "dynamic",
"logDriver": "journald",
"memFree": 5633376256,
"memTotal": 16624513024,
"networkBackend": "netavark",
"networkBackendInfo": {
"backend": "netavark",
"defaultNetwork": "podman",
"dns": {
"package": "aardvark-dns-1.17.0-1.1",
"path": "/usr/lib/podman/aardvark-dns",
"version": "aardvark-dns 1.17.0"
},
"package": "netavark-1.17.2-1.1",
"path": "/usr/lib/podman/netavark",
"version": "netavark 1.17.2"
},
"ociRuntime": {
"name": "crun",
"package": "crun-1.26-3.1",
"path": "/usr/bin/crun",
"version": "crun version 1.26\ncommit: 3241e671f92c33b0c003cd7de319e4f32add6231\nrundir: /run/user/1000/crun\nspec: 1.0.0\n+SYSTEMD +SELINUX +APPARMOR +CAP +SECCOMP +EBPF +CRIU +LIBKRUN +YAJL"
},
"os": "linux",
"pasta": {
"executable": "/usr/bin/pasta",
"package": "passt-2026_01_20.386b5f5-1.1",
"version": "pasta 2026_01_20.386b5f5\nCopyright Red Hat\nGNU General Public License, version 2 or later\n <https://www.gnu.org/licenses/old-licenses/gpl-2.0.html>\nThis is free software: you are free to change and redistribute it.\nThere is NO WARRANTY, to the extent permitted by law.\n"
},
"remoteSocket": {
"exists": true,
"path": "/run/user/1000/podman/podman.sock"
},
"rootlessNetworkCmd": "pasta",
"security": {
"apparmorEnabled": false,
"capabilities": "CAP_CHOWN,CAP_DAC_OVERRIDE,CAP_FOWNER,CAP_FSETID,CAP_KILL,CAP_NET_BIND_SERVICE,CAP_SETFCAP,CAP_SETGID,CAP_SETPCAP,CAP_SETUID,CAP_SYS_CHROOT",
"rootless": true,
"seccompEnabled": true,
"seccompProfilePath": "/etc/containers/seccomp.json",
"selinuxEnabled": false
},
"serviceIsRemote": false,
"slirp4netns": {
"executable": "",
"package": "",
"version": ""
},
"swapFree": 9191038976,
"swapTotal": 16624119808,
"uptime": "161h 50m 56.00s (Approximately 6.71 days)",
"variant": ""
},
"plugins": {
"authorization": null,
"log": [
"k8s-file",
"none",
"passthrough",
"journald"
],
"network": [
"bridge",
"macvlan",
"ipvlan"
],
"volume": [
"local"
]
},
"registries": {},
"store": {
"configFile": "/home/christian/.config/containers/storage.conf",
"containerStore": {
"number": 16,
"paused": 0,
"running": 7,
"stopped": 9
},
"graphDriverName": "overlay",
"graphOptions": {},
"graphRoot": "/home/christian/.local/share/containers/storage",
"graphRootAllocated": 838860800000,
"graphRootUsed": 88629252096,
"graphStatus": {
"Backing Filesystem": "btrfs",
"Native Overlay Diff": "true",
"Supports d_type": "true",
"Supports shifting": "false",
"Supports volatile": "true",
"Using metacopy": "false"
},
"imageCopyTmpDir": "/var/tmp",
"imageStore": {
"number": 42
},
"runRoot": "/run/user/1000/containers",
"transientStore": false,
"volumePath": "/home/christian/.local/share/containers/storage/volumes"
},
"version": {
"APIVersion": "5.8.0",
"Built": 1771872866,
"BuiltTime": "Mon Feb 23 19:54:26 2026",
"GitCommit": "07efc23e05c3d9aa15a0f30d57194737bfc4b6b1",
"GoVersion": "go1.26.0-X:nodwarf5",
"Os": "linux",
"OsArch": "linux/amd64",
"Version": "5.8.0"
}
},
"Name": "podman"
},
"Image": "quay.io/ramalama/cuda:latest",
"Inference": {
"Default": "llama.cpp",
"Engines": {
"llama.cpp": "/home/christian/.local/share/uv/tools/ramalama/share/ramalama/inference/llama.cpp.yaml",
"mlx": "/home/christian/.local/share/uv/tools/ramalama/share/ramalama/inference/mlx.yaml",
"schema.1-0-0": "/home/christian/.local/share/uv/tools/ramalama/share/ramalama/inference/schema.1-0-0.json",
"vllm": "/home/christian/.local/share/uv/tools/ramalama/share/ramalama/inference/vllm.yaml"
},
"Schema": {
"1-0-0": "/home/christian/.local/share/uv/tools/ramalama/share/ramalama/inference/schema.1-0-0.json"
}
},
"RagImage": "quay.io/ramalama/cuda-rag:latest",
"Selinux": false,
"Shortnames": {
"Files": [
"/home/christian/.local/share/uv/tools/ramalama/share/ramalama/shortnames.conf"
],
"Names": {
"cerebrum": "huggingface://froggeric/Cerebrum-1.0-7b-GGUF/Cerebrum-1.0-7b-Q4_KS.gguf",
"deepseek": "ollama://deepseek-r1",
"dragon": "huggingface://llmware/dragon-mistral-7b-v0/dragon-mistral-7b-q4_k_m.gguf",
"gemma3": "hf://ggml-org/gemma-3-4b-it-GGUF",
"gemma3:12b": "hf://ggml-org/gemma-3-12b-it-GGUF",
"gemma3:1b": "hf://ggml-org/gemma-3-1b-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf",
"gemma3:27b": "hf://ggml-org/gemma-3-27b-it-GGUF",
"gemma3:4b": "hf://ggml-org/gemma-3-4b-it-GGUF",
"gemma3n": "hf://ggml-org/gemma-3n-E4B-it-GGUF/gemma-3n-E4B-it-Q8_0.gguf",
"gemma3n:e2b": "hf://ggml-org/gemma-3n-E2B-it-GGUF/gemma-3n-E2B-it-Q8_0.gguf",
"gemma3n:e2b-it-f16": "hf://ggml-org/gemma-3n-E2B-it-GGUF/gemma-3n-E2B-it-f16.gguf",
"gemma3n:e2b-it-q8_0": "hf://ggml-org/gemma-3n-E2B-it-GGUF/gemma-3n-E2B-it-Q8_0.gguf",
"gemma3n:e4b": "hf://ggml-org/gemma-3n-E4B-it-GGUF/gemma-3n-E4B-it-Q8_0.gguf",
"gemma3n:e4b-it-f16": "hf://ggml-org/gemma-3n-E4B-it-GGUF/gemma-3n-E4B-it-f16.gguf",
"gemma3n:e4b-it-q8_0": "hf://ggml-org/gemma-3n-E4B-it-GGUF/gemma-3n-E4B-it-Q8_0.gguf",
"gpt-5.1": "openai://gpt-5.1-2025-11-13",
"gpt-oss": "hf://ggml-org/gpt-oss-20b-GGUF",
"gpt-oss:120b": "hf://ggml-org/gpt-oss-120b-GGUF",
"gpt-oss:20b": "hf://ggml-org/gpt-oss-20b-GGUF",
"granite": "ollama://granite3.1-dense",
"granite-be-3.0:1b": "hf://taronaeo/Granite-3.0-1B-A400M-Instruct-BE-GGUF/granite-3.0-1b-a400m-instruct-be.Q2_K.gguf",
"granite-be-3.3:2b": "hf://taronaeo/Granite-3.3-2B-Instruct-BE-GGUF/granite-3.3-2b-instruct-be.Q4_K_M.gguf",
"granite-lab-7b": "huggingface://instructlab/granite-7b-lab-GGUF/granite-7b-lab-Q4_K_M.gguf",
"granite-lab-8b": "huggingface://ibm-granite/granite-3.3-8b-instruct-GGUF/granite-3.3-8b-instruct-Q4_K_M.gguf",
"granite-lab:7b": "huggingface://instructlab/granite-7b-lab-GGUF/granite-7b-lab-Q4_K_M.gguf",
"granite:2b": "ollama://granite3.1-dense:2b",
"granite:7b": "huggingface://instructlab/granite-7b-lab-GGUF/granite-7b-lab-Q4_K_M.gguf",
"granite:8b": "ollama://granite3.1-dense:8b",
"hermes": "huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q4_K_M.gguf",
"ibm/granite": "ollama://granite3.1-dense:8b",
"ibm/granite:2b": "ollama://granite3.1-dense:2b",
"ibm/granite:7b": "huggingface://instructlab/granite-7b-lab-GGUF/granite-7b-lab-Q4_K_M.gguf",
"ibm/granite:8b": "ollama://granite3.1-dense:8b",
"merlinite": "huggingface://instructlab/merlinite-7b-lab-GGUF/merlinite-7b-lab-Q4_K_M.gguf",
"merlinite-lab-7b": "huggingface://instructlab/merlinite-7b-lab-GGUF/merlinite-7b-lab-Q4_K_M.gguf",
"merlinite-lab:7b": "huggingface://instructlab/merlinite-7b-lab-GGUF/merlinite-7b-lab-Q4_K_M.gguf",
"merlinite:7b": "huggingface://instructlab/merlinite-7b-lab-GGUF/merlinite-7b-lab-Q4_K_M.gguf",
"mistral": "hf://lmstudio-community/Mistral-7B-Instruct-v0.3-GGUF/Mistral-7B-Instruct-v0.3-Q4_K_M.gguf",
"mistral-small3.1": "hf://bartowski/mistralai_Mistral-Small-3.1-24B-Instruct-2503-GGUF/mistralai_Mistral-Small-3.1-24B-Instruct-2503-IQ2_M.gguf",
"mistral-small3.1:24b": "hf://bartowski/mistralai_Mistral-Small-3.1-24B-Instruct-2503-GGUF/mistralai_Mistral-Small-3.1-24B-Instruct-2503-IQ2_M.gguf",
"mistral:7b": "hf://lmstudio-community/Mistral-7B-Instruct-v0.3-GGUF/Mistral-7B-Instruct-v0.3-Q4_K_M.gguf",
"mistral:7b-v1": "huggingface://TheBloke/Mistral-7B-Instruct-v0.1-GGUF/mistral-7b-instruct-v0.1.Q5_K_M.gguf",
"mistral:7b-v2": "huggingface://TheBloke/Mistral-7B-Instruct-v0.2-GGUF/mistral-7b-instruct-v0.2.Q4_K_M.gguf",
"mistral:7b-v3": "hf://lmstudio-community/Mistral-7B-Instruct-v0.3-GGUF/Mistral-7B-Instruct-v0.3-Q4_K_M.gguf",
"mistral_code_16k": "huggingface://TheBloke/Mistral-7B-Code-16K-qlora-GGUF/mistral-7b-code-16k-qlora.Q4_K_M.gguf",
"mistral_codealpaca": "huggingface://TheBloke/Mistral-7B-codealpaca-lora-GGUF/mistral-7b-codealpaca-lora.Q4_K_M.gguf",
"mixtao": "huggingface://MaziyarPanahi/MixTAO-7Bx2-MoE-Instruct-v7.0-GGUF/MixTAO-7Bx2-MoE-Instruct-v7.0.Q4_K_M.gguf",
"openchat": "huggingface://TheBloke/openchat-3.5-0106-GGUF/openchat-3.5-0106.Q4_K_M.gguf",
"openorca": "huggingface://TheBloke/Mistral-7B-OpenOrca-GGUF/mistral-7b-openorca.Q4_K_M.gguf",
"phi2": "huggingface://MaziyarPanahi/phi-2-GGUF/phi-2.Q4_K_M.gguf",
"qwen2.5vl": "hf://ggml-org/Qwen2.5-VL-32B-Instruct-GGUF",
"qwen2.5vl:2b": "hf://ggml-org/Qwen2.5-VL-2B-Instruct-GGUF",
"qwen2.5vl:32b": "hf://ggml-org/Qwen2.5-VL-32B-Instruct-GGUF",
"qwen2.5vl:3b": "hf://ggml-org/Qwen2.5-VL-3B-Instruct-GGUF",
"qwen2.5vl:7b": "hf://ggml-org/Qwen2.5-VL-7B-Instruct-GGUF",
"smollm:135m": "hf://HuggingFaceTB/smollm-135M-instruct-v0.2-Q8_0-GGUF",
"smolvlm": "hf://ggml-org/SmolVLM-500M-Instruct-GGUF",
"smolvlm:256m": "hf://ggml-org/SmolVLM-256M-Instruct-GGUF",
"smolvlm:2b": "hf://ggml-org/SmolVLM-Instruct-GGUF",
"smolvlm:500m": "hf://ggml-org/SmolVLM-500M-Instruct-GGUF",
"stories-be:260k": "hf://taronaeo/tinyllamas-BE/stories260K-be.gguf",
"tiny": "hf://TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
"tinyllama": "hf://TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
}
},
"Store": "/home/christian/.local/share/ramalama",
"UseContainer": true,
"Version": "0.17.1"
}
Issue Description
llama-stack upstream reached version 0.5.1 and ramalama /ramalma-stack is using version 0.2.14
Steps to reproduce the issue
follow the getting started https://llamastack.github.io/docs/getting_started/detailed_tutorial#step-3-run-client-cli
(note that I am running the patched ramalama-stack from containers/ramalama-stack#148) and it is nice that the client tells me to 'upgrade' the server. Official llama-stack is version 0.2.14 (https://github.com/containers/ramalama-stack/blob/main/pyproject.toml#L24)
Describe the results you received
can not use the latest llama-stack-client or its client side libraries
Describe the results you expected
I would be nice to keep things more in sync with the upstream project. But one big change in 0.5.0 is that the openAI API endpoint is now /v1 and not /v1/openai/v1 anymore.
ramalama info output
Upstream Latest Release
Yes
Additional environment details
No response
Additional information
I started an integration for homeassistent https://gitlab.com/homeassistant2081608/ramalama_conversation which is based on ramalama and its llama-stack and it would be nice to stay up to date until things stabilize a bit.