-
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdocker-compose.yml
More file actions
121 lines (103 loc) · 6.1 KB
/
docker-compose.yml
File metadata and controls
121 lines (103 loc) · 6.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
services:
# ────────────────────────────────────────────────────────────────────────
# Kafka-compatible broker (Redpanda, no Zookeeper)
# DLQ topics auto-created on first connect.
# ────────────────────────────────────────────────────────────────────────
kafka:
image: docker.redpanda.com/redpandadata/redpanda:v24.1.9
container_name: telemetry_kafka
restart: unless-stopped
command:
- redpanda
- start
- --overprovisioned
- --smp
- "1"
- --memory
- 1G
- --reserve-memory
- 0M
- --node-id
- "0"
- --check=false
- --kafka-addr
- INTERNAL://0.0.0.0:29092,EXTERNAL://0.0.0.0:9092
- --advertise-kafka-addr
- INTERNAL://kafka:29092,EXTERNAL://localhost:9092
ports:
- "9092:9092"
volumes:
- telemetry_kafka_data:/var/lib/redpanda/data
healthcheck:
test: ["CMD", "rpk", "cluster", "info", "--brokers", "localhost:9092"]
interval: 10s
timeout: 5s
retries: 5
telemetry:
build:
context: .
dockerfile: Dockerfile
container_name: telemetry_rocm
restart: unless-stopped
shm_size: '8gb'
# ────────────────────────────────────────────────────────────────────────
# ENVIRONMENT: ROCm + GPU Configuration
# ────────────────────────────────────────────────────────────────────────
depends_on:
kafka:
condition: service_healthy
environment:
# Kafka DLQ routing — use the internal compose listener
- KAFKA_BOOTSTRAP_SERVERS=kafka:29092
# ROCm runtime configuration for AMD GPU (7900 XT / RDNA3)
- ROCM_HOME=/opt/rocm
- LD_LIBRARY_PATH=/opt/rocm/lib:/opt/rocm-6.2.0/lib:/usr/local/lib
- PATH=/opt/rocm/bin:/opt/rocm/sbin:$PATH
# GPU-specific overrides (gfx1100 = 7900 XT)
- HSA_OVERRIDE_GFX_VERSION=11.0.0
- HSA_ENABLE_SDMA=0
# ────────────────────────────────────────────────────────────────────────
# GPU DEVICE PASSTHROUGH
# ────────────────────────────────────────────────────────────────────────
# For LINUX: Native ROCm GPU passthrough (recommended for production)
devices:
- /dev/kfd:/dev/kfd # AMD KFD (Kernel Fusion Driver)
- /dev/dri:/dev/dri # DRI render nodes (GPU access)
# For WSL2: Uncomment below + comment out /dev/kfd and /dev/dri above
# devices:
# - /dev/dxg:/dev/dxg # WSL2 DirectX gateway (doesn't support ROCm)
# - /dev/dxg:/dev/kfd
# ────────────────────────────────────────────────────────────────────────
# VOLUMES: Code and Data Mounting
# ────────────────────────────────────────────────────────────────────────
volumes:
# Mount repo code (allows live edits)
- .:/app
# Local docker volume for outputs (persistent between runs)
- telemetry_outputs:/app/outputs
- telemetry_results:/app/results
# For WSL2: Map Windows host path to container
# Uncomment if running under WSL2 with Windows host
# - /mnt/g/Docker/resilient-rap-framework:/app
# ────────────────────────────────────────────────────────────────────────
# SECURITY & NETWORKING
# ────────────────────────────────────────────────────────────────────────
working_dir: /app
security_opt:
- seccomp:unconfined
cap_add:
- SYS_ADMIN
group_add:
- video # GPU access (required for /dev/dri)
- render # Additional render permissions
# ────────────────────────────────────────────────────────────────────────
# DEFAULT COMMAND
# ────────────────────────────────────────────────────────────────────────
command: ["/bin/bash", "-c", "exec /bin/bash"]
# ────────────────────────────────────────────────────────────────────────────
# PERSISTENT VOLUMES
# ────────────────────────────────────────────────────────────────────────────
volumes:
telemetry_outputs:
telemetry_results:
telemetry_kafka_data: