Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions clean.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/bin/bash
# Fix script for Tempo port conflict
echo "=== Fixing Tempo port 4317 conflict ==="

# 1. Stop with force and longer grace period
docker-compose stop -t 30 tempo

# 2. Kill any remaining tempo processes in containers
for container in $(docker ps -aq --filter "name=tempo"); do
echo "Checking container $container..."
docker exec $container pkill -9 -f "tempo|grpc" 2>/dev/null || true
done

# 3. Force remove
docker-compose rm -f tempo

# 4. Clean Docker network
docker network prune -f

# 5. Remove tempo volume (optional - will lose trace data)
# docker volume rm $(docker volume ls -q | grep tempo) 2>/dev/null || true

# 6. Start with longer healthcheck timeout
echo "Starting Tempo with extended healthcheck..."
docker-compose up -d tempo

# 7. Wait and check logs
sleep 5
echo "=== Checking Tempo logs ==="
docker-compose logs --tail=50 tempo
125 changes: 113 additions & 12 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,46 +1,137 @@
version: "3.8"
version: "3.9"

services:
# -----------------------------
# Prometheus - Metrics
# -----------------------------
prometheus:
image: prom/prometheus:latest
container_name: prometheus
ports:
- "9090:9090"
volumes:
- ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
- ./prometheus/alert.rules.yml:/etc/prometheus/alert.rules.yml:ro
- prometheus-data:/prometheus
command:
- "--config.file=/etc/prometheus/prometheus.yml"
- "--web.enable-lifecycle"
ports:
- "9090:9090"
- --config.file=/etc/prometheus/prometheus.yml
- --web.enable-lifecycle
depends_on:
- alertmanager
alertmanager:
condition: service_healthy
restart: unless-stopped
healthcheck:
test:
[
"CMD-SHELL",
"wget --spider -q http://prometheus:9090/-/ready || exit 1",
]
interval: 10s
timeout: 5s
retries: 5

# -----------------------------
# Alertmanager - Alerts
# -----------------------------
alertmanager:
image: prom/alertmanager:latest
container_name: alertmanager
volumes:
- ./alertmanager/alertmanager.yml:/etc/alertmanager/alertmanager.yml:ro
command:
- "--config.file=/etc/alertmanager/alertmanager.yml"
ports:
- "9093:9093"
volumes:
- ./alertmanager/alertmanager.yml:/etc/alertmanager/alertmanager.yml:ro
- alertmanager-data:/alertmanager
command: ["--config.file=/etc/alertmanager/alertmanager.yml"]
restart: unless-stopped
healthcheck:
test:
[
"CMD-SHELL",
"wget --spider -q http://alertmanager:9093/-/ready || exit 1",
]
interval: 10s
timeout: 5s
retries: 5

# -----------------------------
# Grafana - Dashboards / Visualization
# -----------------------------
grafana:
image: grafana/grafana:latest
container_name: grafana
ports:
- "3000:3000"
environment:
- GF_SECURITY_ADMIN_USER=admin
- GF_SECURITY_ADMIN_PASSWORD=admin
GF_SECURITY_ADMIN_USER: admin
GF_SECURITY_ADMIN_PASSWORD: admin
GF_LOG_LEVEL: info
depends_on:
- prometheus
volumes:
- grafana-storage:/var/lib/grafana
restart: unless-stopped
healthcheck:
test:
[
"CMD-SHELL",
"wget --spider -q http://grafana:3000/api/health || exit 1",
]
interval: 10s
timeout: 5s
retries: 5

# -----------------------------
# Loki - Logs aggregation
# -----------------------------
loki:
image: grafana/loki:2.8.2
container_name: loki
ports:
- "3100:3100"
user: "10001:10001"
command:
- -config.file=/etc/loki/loki-config.yaml
volumes:
- ./loki/loki-config.yaml:/etc/loki/loki-config.yaml:ro
- loki-index:/loki/index
- loki-cache:/loki/cache
- loki-chunks:/loki/chunks
- loki-wal:/loki/wal
- loki-compactor:/loki/compactor
restart: unless-stopped
healthcheck:
test: ["CMD-SHELL", "wget --spider -q http://loki:3100/ready || exit 1"]
interval: 10s
timeout: 5s
retries: 5

# -----------------------------
# Tempo - Traces aggregation
# -----------------------------
tempo:
image: grafana/tempo:2.5.0
container_name: tempo
ports:
- "3200:3200" # HTTP API/Query
- "4320:4320" # Internal gRPC (server port)
- "4318:4318" # OTLP gRPC receiver (for receiving traces)
- "4319:4319" # OTLP HTTP receiver
user: "10001:10001"
volumes:
- ./tempo/tempo-config.yaml:/etc/tempo/tempo-config.yaml:ro
- tempo-data:/tempo-data
command:
- -config.file=/etc/tempo/tempo-config.yaml
restart: unless-stopped
healthcheck:
test: ["CMD-SHELL", "wget --spider -q http://tempo:3200/ready || exit 1"]
interval: 10s
timeout: 5s
retries: 5

# -----------------------------
# IRC relay - Alert notifications
# -----------------------------
irc-relay:
build:
context: ./irc-deamon
Expand All @@ -52,6 +143,16 @@ services:
- ./irc-deamon/config.yml:/etc/alertmanager-irc-relay/config.yml:ro
command: ["--config", "/etc/alertmanager-irc-relay/config.yml"]
restart: unless-stopped
depends_on:
- alertmanager

volumes:
prometheus-data:
alertmanager-data:
grafana-storage:
tempo-data:
loki-index:
loki-cache:
loki-chunks:
loki-wal:
loki-compactor:
158 changes: 158 additions & 0 deletions docker-compose.yml.bak
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
version: "3.9"

services:
# -----------------------------
# Prometheus - Metrics
# -----------------------------
prometheus:
image: prom/prometheus:latest
container_name: prometheus
ports:
- "9090:9090"
volumes:
- ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
- ./prometheus/alert.rules.yml:/etc/prometheus/alert.rules.yml:ro
- prometheus-data:/prometheus
command:
- --config.file=/etc/prometheus/prometheus.yml
- --web.enable-lifecycle
depends_on:
alertmanager:
condition: service_healthy
restart: unless-stopped
healthcheck:
test:
[
"CMD-SHELL",
"wget --spider -q http://prometheus:9090/-/ready || exit 1",
]
interval: 10s
timeout: 5s
retries: 5

# -----------------------------
# Alertmanager - Alerts
# -----------------------------
alertmanager:
image: prom/alertmanager:latest
container_name: alertmanager
ports:
- "9093:9093"
volumes:
- ./alertmanager/alertmanager.yml:/etc/alertmanager/alertmanager.yml:ro
- alertmanager-data:/alertmanager
command:
- --config.file=/etc/alertmanager/alertmanager.yml
restart: unless-stopped
healthcheck:
test:
[
"CMD-SHELL",
"wget --spider -q http://alertmanager:9093/-/ready || exit 1",
]
interval: 10s
timeout: 5s
retries: 5

# -----------------------------
# Grafana - Dashboards / Visualization
# -----------------------------
grafana:
image: grafana/grafana:latest
container_name: grafana
ports:
- "3000:3000"
environment:
GF_SECURITY_ADMIN_USER: admin
GF_SECURITY_ADMIN_PASSWORD: admin
GF_LOG_LEVEL: info
depends_on:
- prometheus
volumes:
- grafana-storage:/var/lib/grafana
restart: unless-stopped
healthcheck:
test:
[
"CMD-SHELL",
"wget --spider -q http://grafana:3000/api/health || exit 1",
]
interval: 10s
timeout: 5s
retries: 5

# -----------------------------
# Loki - Logs aggregation
# -----------------------------
loki:
image: grafana/loki:2.8.2
container_name: loki
ports:
- "3100:3100"
user: "0:0"
#user: "10001:10001" # ensures proper permissions on volumes
command:
- -config.file=/etc/loki/loki-config.yaml
volumes:
- ./loki/loki-config.yaml:/etc/loki/loki-config.yaml:ro
- loki-index:/loki/index
#- loki-cache:/loki/cache
- loki-chunks:/loki/chunks
#- ./wal:/wal
- loki-wal:/loki/wal
restart: unless-stopped
healthcheck:
test: ["CMD-SHELL", "wget --spider -q http://loki:3100/ready || exit 1"]
interval: 10s
timeout: 5s
retries: 5
entrypoint: ["/loki-init.sh"]

# -----------------------------
# Tempo - Traces aggregation
# -----------------------------
tempo:
image: grafana/tempo:2.6.0
container_name: tempo
ports:
- "3200:3200" # HTTP API
- "4317:4317" # OTLP gRPC
user: "10001:10001" # ensures proper permissions
volumes:
- ./tempo/tempo-config.yaml:/etc/tempo/tempo-config.yaml:ro
- tempo-data:/tempo-data
command:
- -config.file=/etc/tempo/tempo-config.yaml
restart: unless-stopped
healthcheck:
test: ["CMD-SHELL", "wget --spider -q http://tempo:3200/ready || exit 1"]
interval: 10s
timeout: 5s
retries: 5

# -----------------------------
# IRC relay - Alert notifications
# -----------------------------
irc-relay:
build:
context: ./irc-deamon
dockerfile: Dockerfile.irc
container_name: irc-relay
ports:
- "8010:8010"
volumes:
- ./irc-deamon/config.yml:/etc/alertmanager-irc-relay/config.yml:ro
command: ["--config", "/etc/alertmanager-irc-relay/config.yml"]
restart: unless-stopped
depends_on:
- alertmanager

volumes:
prometheus-data:
alertmanager-data:
grafana-storage:
tempo-data:
loki-index:
loki-cache:
loki-chunks:
loki-wal:
14 changes: 14 additions & 0 deletions loki-init.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/sh
# loki-init.sh
# Ensure all mounted volumes have correct ownership before starting Loki

LOKI_UID=10001
LOKI_GID=10001

echo "Fixing permissions for Loki volumes..."
# Recursively chown mounted paths
chown -R $LOKI_UID:$LOKI_GID /loki/index /loki/chunks /loki/cache || true

echo "Starting Loki..."
# Execute original Loki command
exec /usr/bin/loki "$@"
1 change: 1 addition & 0 deletions loki/data/index/uploader/name
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0cb302e1b50b-1764570257388144636
Loading