-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathdocker-compose.yml
More file actions
97 lines (94 loc) · 3.91 KB
/
docker-compose.yml
File metadata and controls
97 lines (94 loc) · 3.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# HermesClaw — Hermes Agent + local llama.cpp inference
#
# QUICK START (Docker — no NVIDIA hardware required)
# --------------------------------------------------
# 1. Copy and fill in your settings:
# cp .env.example .env
# # Edit .env: set MODEL_FILE=your-model-name.gguf
#
# 2. Download a model into models/:
# curl -L -o models/your-model.gguf <huggingface-url>
#
# 3. Start llama-server on your HOST machine FIRST:
# (Hermes connects to it via host.docker.internal:8080)
#
# macOS:
# brew install llama.cpp
# llama-server -m models/your-model.gguf --port 8080 --ctx-size 32768 -ngl 99 --log-disable
#
# Linux (build: https://github.com/ggerganov/llama.cpp#build):
# llama-server -m models/your-model.gguf --port 8080 --ctx-size 32768 -ngl 99
#
# NOTE: --ctx-size 32768 is required. Hermes system prompt alone is ~11k tokens;
# values below 32768 cause "context length exceeded" on every query.
#
# 4. Start the Hermes container:
# docker compose up
#
# 5. Test Hermes:
# docker exec -it hermesclaw hermes chat -q "hello"
# docker exec -it hermesclaw hermes status
#
# For OpenShell sandbox mode (NVIDIA hardware):
# ./scripts/setup.sh
# hermesclaw start
services:
# ── Hermes Agent ────────────────────────────────────────────────────────────
# llama-server runs on the HOST via Homebrew (llama-server binary).
# On macOS, running llama.cpp natively is faster (Apple Metal GPU) than
# running it inside Docker (emulated ARM). The container reaches it via
# host.docker.internal which resolves to the host on Docker Desktop for Mac.
#
# Start llama-server before docker compose up:
# llama-server -m models/<model>.gguf --port 8080 --host 127.0.0.1
hermesclaw:
build:
context: .
dockerfile: Dockerfile
container_name: hermesclaw
env_file:
- path: .env
required: false # Start even without a .env file
environment:
# Inference: host llama-server reachable from Docker via host.docker.internal
# OpenShell: OPENAI_BASE_URL is overridden to http://inference.local/v1
# Local llama.cpp server running on the host (via Homebrew)
OPENAI_BASE_URL: "http://host.docker.internal:${LLAMA_PORT:-8080}/v1"
# Do NOT set OPENAI_API_KEY here — hermes treats it as an OpenRouter key
# and routes to cloud. Local inference uses the config.yaml provider=llamacpp.
OPENAI_API_KEY: ""
OPENROUTER_API_KEY: ""
HERMES_HOME: "/root/.hermes"
# Gateway tokens (override in .env)
TELEGRAM_BOT_TOKEN: "${TELEGRAM_BOT_TOKEN:-}"
DISCORD_BOT_TOKEN: "${DISCORD_BOT_TOKEN:-}"
SLACK_BOT_TOKEN: "${SLACK_BOT_TOKEN:-}"
# Privacy routing — sensitivity threshold for local vs cloud inference
# 0.0 = always local, 1.0 = always cloud, 0.7 = default (route only low-sensitivity to cloud)
HERMES_PRIVACY_THRESHOLD: "${HERMES_PRIVACY_THRESHOLD:-0.0}"
# Approval mode: manual | smart | off
HERMES_APPROVAL_MODE: "${HERMES_APPROVAL_MODE:-smart}"
volumes:
# Persist Hermes memories across container restarts
- hermesclaw-memories:/root/.hermes/memories
# Persist skills across container restarts (self-improvement survives restarts)
- hermesclaw-skills:/root/.hermes/skills
# Mount user knowledge docs read-only for RAG context
- ./knowledge:/sandbox/knowledge:ro
# Mount persona + config files read-only
- ./configs:/sandbox/configs:ro
ports:
# Expose gateway webhook port (for Telegram webhook mode)
- "${HERMESCLAW_PORT:-8090}:8090"
restart: unless-stopped
healthcheck:
test: ["CMD", "hermes", "status"]
interval: 30s
timeout: 10s
retries: 3
start_period: 60s
volumes:
hermesclaw-memories:
driver: local
hermesclaw-skills:
driver: local