Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@ dependencies = [
"datasets>=3.6.0",
"fastapi>=0.115.12",
"httpx>=0.28.1",
"llama-stack==0.2.14",
"llama-stack==0.2.23",
"mcp>=1.9.2",
"milvus-lite>=2.5.1",
"numpy>=2.2.6",
"openai>=1.82.0",
"opentelemetry-exporter-otlp-proto-http>=1.33.1",
Expand All @@ -31,7 +32,7 @@ dependencies = [
"psutil>=7.0.0",
"pydantic>=2.11.5",
"pymilvus>=2.5.10",
"ramalama==0.10.1",
"ramalama==0.17.1",
"requests>=2.32.3",
"sentence-transformers>=3.0.0",
"six>=1.17.0",
Expand All @@ -40,6 +41,7 @@ dependencies = [
"trl>=0.18.1",
"urllib3>=2.4.0",
"uvicorn>=0.34.2",
"setuptools<70",
]

[dependency-groups]
Expand Down
70 changes: 48 additions & 22 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,19 @@ accelerate==1.7.0
# trl
aiohappyeyeballs==2.6.1
# via aiohttp
aiohttp==3.12.7
aiohttp==3.13.3
# via
# fsspec
# llama-stack
# ramalama-stack
aiosignal==1.3.2
aiosignal==1.4.0
# via aiohttp
aiosqlite==0.21.0
# via
# llama-stack
# ramalama-stack
annotated-doc==0.0.4
# via fastapi
annotated-types==0.7.0
# via pydantic
anyio==4.9.0
Expand Down Expand Up @@ -47,6 +49,8 @@ certifi==2025.4.26
# httpcore
# httpx
# requests
cffi==2.0.0 ; platform_python_implementation != 'PyPy'
# via cryptography
chardet==5.2.0
# via ramalama-stack
charset-normalizer==3.4.2
Expand All @@ -61,6 +65,10 @@ colorama==0.4.6 ; sys_platform == 'win32'
# via
# click
# tqdm
cryptography==45.0.7
# via
# pyjwt
# python-jose
datasets==3.6.0
# via
# ramalama-stack
Expand All @@ -80,7 +88,7 @@ distro==1.9.0
# openai
ecdsa==0.19.1
# via python-jose
fastapi==0.115.12
fastapi==0.129.0
# via
# llama-stack
# ramalama-stack
Expand Down Expand Up @@ -115,7 +123,7 @@ h11==0.16.0
# httpcore
# llama-stack
# uvicorn
hf-xet==1.1.2 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
hf-xet==1.2.0 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
# via huggingface-hub
httpcore==1.0.9
# via httpx
Expand All @@ -128,7 +136,7 @@ httpx==0.28.1
# ramalama-stack
httpx-sse==0.4.0
# via mcp
huggingface-hub==0.32.4
huggingface-hub==0.36.2
# via
# accelerate
# datasets
Expand All @@ -148,6 +156,7 @@ importlib-metadata==8.6.1
jinja2==3.1.6
# via
# llama-stack
# ramalama
# torch
jiter==0.10.0
# via openai
Expand All @@ -157,24 +166,28 @@ jsonschema==4.24.0
# via
# autoevals
# llama-stack
# mcp
# ramalama
jsonschema-specifications==2025.4.1
# via jsonschema
llama-stack==0.2.14
llama-stack==0.2.23
# via ramalama-stack
llama-stack-client==0.2.14
llama-stack-client==0.2.23
# via llama-stack
lxml==5.4.0
# via blobfile
markdown-it-py==3.0.0
# via rich
markupsafe==3.0.2
# via jinja2
mcp==1.9.2
mcp==1.26.0
# via ramalama-stack
mdurl==0.1.2
# via markdown-it-py
milvus-lite==2.4.12 ; sys_platform != 'win32'
# via pymilvus
milvus-lite==2.5.1
# via
# pymilvus
# ramalama-stack
mpmath==1.3.0
# via sympy
multidict==6.4.4
Expand All @@ -185,7 +198,7 @@ multiprocess==0.70.16
# via datasets
networkx==3.5
# via torch
numpy==2.2.6
numpy==2.4.2
# via
# accelerate
# datasets
Expand Down Expand Up @@ -232,7 +245,7 @@ nvidia-nvjitlink-cu12==12.6.85 ; platform_machine == 'x86_64' and sys_platform =
# torch
nvidia-nvtx-cu12==12.6.77 ; platform_machine == 'x86_64' and sys_platform == 'linux'
# via torch
openai==1.84.0
openai==2.21.0
# via
# llama-stack
# ramalama-stack
Expand Down Expand Up @@ -296,17 +309,19 @@ psutil==7.0.0
# accelerate
# peft
# ramalama-stack
pyaml==25.5.0
pyaml==26.2.1
# via llama-stack-client
pyarrow==20.0.0
# via datasets
pyasn1==0.6.1
pyasn1==0.6.2
# via
# python-jose
# rsa
pycparser==3.0 ; implementation_name != 'PyPy' and platform_python_implementation != 'PyPy'
# via cffi
pycryptodomex==3.23.0
# via blobfile
pydantic==2.11.5
pydantic==2.12.5
# via
# fastapi
# llama-stack
Expand All @@ -315,12 +330,14 @@ pydantic==2.11.5
# openai
# pydantic-settings
# ramalama-stack
pydantic-core==2.33.2
pydantic-core==2.41.5
# via pydantic
pydantic-settings==2.9.1
# via mcp
pygments==2.19.1
# via rich
pyjwt==2.11.0
# via mcp
pymilvus==2.5.10
# via ramalama-stack
python-dateutil==2.9.0.post0
Expand All @@ -338,6 +355,8 @@ python-multipart==0.0.20
# mcp
pytz==2025.2
# via pandas
pywin32==311 ; sys_platform == 'win32'
# via mcp
pyyaml==6.0.2
# via
# accelerate
Expand All @@ -346,8 +365,9 @@ pyyaml==6.0.2
# huggingface-hub
# peft
# pyaml
# ramalama
# transformers
ramalama==0.10.1
ramalama==0.17.1
# via ramalama-stack
referencing==0.36.2
# via
Expand Down Expand Up @@ -389,9 +409,10 @@ scipy==1.15.3
# sentence-transformers
sentence-transformers==4.1.0
# via ramalama-stack
setuptools==80.9.0
setuptools==69.5.1
# via
# pymilvus
# ramalama-stack
# torch
# triton
six==1.17.0
Expand All @@ -408,7 +429,7 @@ sqlalchemy==2.0.41
# via ramalama-stack
sse-starlette==2.3.6
# via mcp
starlette==0.46.2
starlette==0.52.1
# via
# fastapi
# llama-stack
Expand Down Expand Up @@ -451,31 +472,36 @@ triton==3.3.0 ; platform_machine == 'x86_64' and sys_platform == 'linux'
# via torch
trl==0.18.1
# via ramalama-stack
typing-extensions==4.14.0
typing-extensions==4.15.0
# via
# aiosignal
# aiosqlite
# anyio
# fastapi
# huggingface-hub
# llama-stack-client
# mcp
# openai
# opentelemetry-sdk
# pydantic
# pydantic-core
# referencing
# sentence-transformers
# sqlalchemy
# starlette
# torch
# typing-inspection
typing-inspection==0.4.1
typing-inspection==0.4.2
# via
# fastapi
# mcp
# pydantic
# pydantic-settings
tzdata==2025.2
# via pandas
ujson==5.10.0
# via pymilvus
urllib3==2.4.0
urllib3==2.6.3
# via
# blobfile
# ramalama-stack
Expand Down
22 changes: 5 additions & 17 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,28 +9,16 @@ def run(self):
# Run the standard install
super().run()

# Write 'providers.d' to '~/.llama/providers.d'
# This allows users to see the remote providers
providers_dir = os.path.join(self.install_lib, "ramalama_stack", "providers.d")
target_dir_1 = os.path.expanduser("~/.llama/providers.d")
try:
os.makedirs(target_dir_1, exist_ok=True)
shutil.copytree(providers_dir, target_dir_1, dirs_exist_ok=True)
print(f"Copied {providers_dir} to {target_dir_1}")
except Exception as error:
print(f"Failed to copy {providers_dir} to {target_dir_1}. Error: {error}")
raise

# Write `ramalama-run.yaml` to '~/.llama/distributions/ramalama'
# This allows users to run the stack
run_yaml = os.path.join(self.install_lib, "ramalama_stack", "ramalama-run.yaml")
target_dir_2 = os.path.expanduser("~/.llama/distributions/ramalama")
target_dir = os.path.expanduser("~/.llama/distributions/ramalama")
try:
os.makedirs(target_dir_2, exist_ok=True)
shutil.copy(run_yaml, target_dir_2)
print(f"Copied {run_yaml} to {target_dir_2}")
os.makedirs(target_dir, exist_ok=True)
shutil.copy(run_yaml, target_dir)
print(f"Copied {run_yaml} to {target_dir}")
except Exception as error:
print(f"Failed to copy {providers_dir} to {target_dir_1}. Error: {error}")
print(f"Failed to copy {providers_dir} to {target_dir}. Error: {error}")
raise


Expand Down
29 changes: 0 additions & 29 deletions src/ramalama_stack/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,66 +9,37 @@
from llama_stack.providers.utils.inference.model_registry import (
ProviderModelEntry,
build_hf_repo_model_entry,
build_model_entry,
)

model_entries = [
build_hf_repo_model_entry(
"llama3.1:8b-instruct-fp16",
CoreModelId.llama3_1_8b_instruct.value,
),
build_model_entry(
"llama3.1:8b",
CoreModelId.llama3_1_8b_instruct.value,
),
build_hf_repo_model_entry(
"llama3.1:70b-instruct-fp16",
CoreModelId.llama3_1_70b_instruct.value,
),
build_model_entry(
"llama3.1:70b",
CoreModelId.llama3_1_70b_instruct.value,
),
build_hf_repo_model_entry(
"llama3.1:405b-instruct-fp16",
CoreModelId.llama3_1_405b_instruct.value,
),
build_model_entry(
"llama3.1:405b",
CoreModelId.llama3_1_405b_instruct.value,
),
build_hf_repo_model_entry(
"llama3.2:1b-instruct-fp16",
CoreModelId.llama3_2_1b_instruct.value,
),
build_model_entry(
"llama3.2:1b",
CoreModelId.llama3_2_1b_instruct.value,
),
build_hf_repo_model_entry(
"llama3.2:3b-instruct-fp16",
CoreModelId.llama3_2_3b_instruct.value,
),
build_model_entry(
"llama3.2:3b",
CoreModelId.llama3_2_3b_instruct.value,
),
build_hf_repo_model_entry(
"llama3.2-vision:11b-instruct-fp16",
CoreModelId.llama3_2_11b_vision_instruct.value,
),
build_model_entry(
"llama3.2-vision:latest",
CoreModelId.llama3_2_11b_vision_instruct.value,
),
build_hf_repo_model_entry(
"llama3.2-vision:90b-instruct-fp16",
CoreModelId.llama3_2_90b_vision_instruct.value,
),
build_model_entry(
"llama3.2-vision:90b",
CoreModelId.llama3_2_90b_vision_instruct.value,
),
build_hf_repo_model_entry(
"llama3.3:70b",
CoreModelId.llama3_3_70b_instruct.value,
Expand Down
Loading