hermesclaw/docker-compose.yml at main · TheAiSingularity/hermesclaw · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# HermesClaw — Hermes Agent + local llama.cpp inference
#
# QUICK START (Docker — no NVIDIA hardware required)
# --------------------------------------------------
# 1. Copy and fill in your settings:
#      cp .env.example .env
#      # Edit .env: set MODEL_FILE=your-model-name.gguf
#
# 2. Download a model into models/:
#      curl -L -o models/your-model.gguf <huggingface-url>
#
# 3. Start llama-server on your HOST machine FIRST:
#    (Hermes connects to it via host.docker.internal:8080)
#
#    macOS:
#      brew install llama.cpp
#      llama-server -m models/your-model.gguf --port 8080 --ctx-size 32768 -ngl 99 --log-disable
#
#    Linux (build: https://github.com/ggerganov/llama.cpp#build):
#      llama-server -m models/your-model.gguf --port 8080 --ctx-size 32768 -ngl 99
#
#    NOTE: --ctx-size 32768 is required. Hermes system prompt alone is ~11k tokens;
#          values below 32768 cause "context length exceeded" on every query.
#
# 4. Start the Hermes container:
#      docker compose up
#
# 5. Test Hermes:
#      docker exec -it hermesclaw hermes chat -q "hello"
#      docker exec -it hermesclaw hermes status
#
# For OpenShell sandbox mode (NVIDIA hardware):
#      ./scripts/setup.sh
#      hermesclaw start

services:

  # ── Hermes Agent ────────────────────────────────────────────────────────────
  # llama-server runs on the HOST via Homebrew (llama-server binary).
  # On macOS, running llama.cpp natively is faster (Apple Metal GPU) than
  # running it inside Docker (emulated ARM). The container reaches it via
  # host.docker.internal which resolves to the host on Docker Desktop for Mac.
  #
  # Start llama-server before docker compose up:
  #   llama-server -m models/<model>.gguf --port 8080 --host 127.0.0.1
  hermesclaw:
    build:
      context: .
      dockerfile: Dockerfile
    container_name: hermesclaw
    env_file:
      - path: .env
        required: false         # Start even without a .env file
    environment:
      # Inference: host llama-server reachable from Docker via host.docker.internal
      # OpenShell: OPENAI_BASE_URL is overridden to http://inference.local/v1
      # Local llama.cpp server running on the host (via Homebrew)
      OPENAI_BASE_URL: "http://host.docker.internal:${LLAMA_PORT:-8080}/v1"
      # Do NOT set OPENAI_API_KEY here — hermes treats it as an OpenRouter key
      # and routes to cloud. Local inference uses the config.yaml provider=llamacpp.
      OPENAI_API_KEY: ""
      OPENROUTER_API_KEY: ""
      HERMES_HOME: "/root/.hermes"
      # Gateway tokens (override in .env)
      TELEGRAM_BOT_TOKEN: "${TELEGRAM_BOT_TOKEN:-}"
      DISCORD_BOT_TOKEN: "${DISCORD_BOT_TOKEN:-}"
      SLACK_BOT_TOKEN: "${SLACK_BOT_TOKEN:-}"
      # Privacy routing — sensitivity threshold for local vs cloud inference
      # 0.0 = always local, 1.0 = always cloud, 0.7 = default (route only low-sensitivity to cloud)
      HERMES_PRIVACY_THRESHOLD: "${HERMES_PRIVACY_THRESHOLD:-0.0}"
      # Approval mode: manual | smart | off
      HERMES_APPROVAL_MODE: "${HERMES_APPROVAL_MODE:-smart}"
    volumes:
      # Persist Hermes memories across container restarts
      - hermesclaw-memories:/root/.hermes/memories
      # Persist skills across container restarts (self-improvement survives restarts)
      - hermesclaw-skills:/root/.hermes/skills
      # Mount user knowledge docs read-only for RAG context
      - ./knowledge:/sandbox/knowledge:ro
      # Mount persona + config files read-only
      - ./configs:/sandbox/configs:ro
    ports:
      # Expose gateway webhook port (for Telegram webhook mode)
      - "${HERMESCLAW_PORT:-8090}:8090"
    restart: unless-stopped
    healthcheck:
      test: ["CMD", "hermes", "status"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 60s

volumes:
  hermesclaw-memories:
    driver: local
  hermesclaw-skills:
    driver: local