Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 70 additions & 3 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ env:
jobs:
go:
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- uses: actions/checkout@v4

Expand All @@ -29,12 +30,49 @@ jobs:
with:
version: latest

- name: Unit tests
run: go test -race -count=1 ./...
- name: Unit tests with coverage
run: go test -race -count=1 -coverprofile=coverage.out ./...

- name: Upload coverage artifact
uses: actions/upload-artifact@v4
with:
name: coverage
path: coverage.out

- name: Display coverage summary
run: go tool cover -func=coverage.out | tail -1

security:
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- uses: actions/checkout@v4

- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: '1.24'

- name: Run govulncheck
run: |
go install golang.org/x/vuln/cmd/govulncheck@latest
govulncheck ./...

- name: Build image for scanning
run: docker build -t docker-guardian:scan .

- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@master
with:
image-ref: docker-guardian:scan
format: table
exit-code: 0
severity: CRITICAL,HIGH

test:
needs: go
runs-on: ubuntu-latest
timeout-minutes: 15
steps:
- uses: actions/checkout@v4

Expand All @@ -59,8 +97,37 @@ jobs:
- name: Run notifications test
run: GUARDIAN_IMAGE=docker-guardian bash tests/test-notifications.sh

- name: Run opt-out test
run: GUARDIAN_IMAGE=docker-guardian bash tests/test-opt-out.sh

- name: Run circuit breaker test
run: GUARDIAN_IMAGE=docker-guardian bash tests/test-circuit-breaker.sh

- name: Run custom label test
run: GUARDIAN_IMAGE=docker-guardian bash tests/test-custom-label.sh

- name: Capture docker logs on failure
if: failure()
run: |
echo "=== Docker containers ==="
docker ps -a --format "table {{.Names}}\t{{.Status}}\t{{.Image}}" || true
echo ""
for c in $(docker ps -a --filter "name=dg-test" --format "{{.Names}}"); do
echo "=== Logs: $c ==="
docker logs "$c" 2>&1 | tail -50
echo ""
done

- name: Upload failure logs
if: failure()
uses: actions/upload-artifact@v4
with:
name: test-failure-logs
path: /tmp/dg-test-logs/
if-no-files-found: ignore

build:
needs: test
needs: [test, security]
runs-on: ubuntu-latest
if: github.event_name == 'push'
permissions:
Expand Down
7 changes: 7 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,20 @@ ENV AUTOHEAL_CONTAINER_LABEL=autoheal \
AUTOHEAL_WATCHTOWER_COOLDOWN=300 \
AUTOHEAL_WATCHTOWER_SCOPE=all \
AUTOHEAL_WATCHTOWER_EVENTS=orchestration \
AUTOHEAL_BACKOFF_MULTIPLIER=2 \
AUTOHEAL_BACKOFF_MAX=300 \
AUTOHEAL_BACKOFF_RESET_AFTER=600 \
AUTOHEAL_RESTART_BUDGET=5 \
AUTOHEAL_RESTART_WINDOW=300 \
METRICS_PORT=0 \
DOCKER_SOCK=/var/run/docker.sock \
CURL_TIMEOUT=30 \
WEBHOOK_URL="" \
WEBHOOK_JSON_KEY="content" \
APPRISE_URL="" \
POST_RESTART_SCRIPT="" \
NOTIFY_EVENTS="actions" \
NOTIFY_RATE_LIMIT=60 \
NOTIFY_GOTIFY_URL="" \
NOTIFY_GOTIFY_TOKEN="" \
NOTIFY_DISCORD_WEBHOOK="" \
Expand Down
9 changes: 9 additions & 0 deletions cmd/guardian/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"github.com/Will-Luck/Docker-Guardian/internal/docker"
"github.com/Will-Luck/Docker-Guardian/internal/guardian"
"github.com/Will-Luck/Docker-Guardian/internal/logging"
"github.com/Will-Luck/Docker-Guardian/internal/metrics"
"github.com/Will-Luck/Docker-Guardian/internal/notify"
)

Expand All @@ -24,6 +25,10 @@ func main() {
}

cfg := config.Load()
if err := cfg.Validate(); err != nil {
fmt.Fprintf(os.Stderr, "configuration error: %v\n", err)
os.Exit(1)
}
log := logging.New(cfg.LogJSON)

// Banner: plain stdout for acceptance test compatibility
Expand All @@ -48,6 +53,8 @@ func main() {
resolved := cfg.ResolvedNotifyEvents()
fmt.Printf("NOTIFY_EVENTS=%s (resolved: %s)\n", cfg.NotifyEvents, strings.Join(resolved, ","))

metrics.Serve(cfg.MetricsPort)

g := guardian.New(cfg, client, dispatcher, log)

if cfg.StartPeriod > 0 {
Expand All @@ -66,4 +73,6 @@ func main() {
log.Error("guardian exited with error", "error", err)
os.Exit(1)
}

dispatcher.Close()
}
11 changes: 10 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ require (

require (
github.com/Microsoft/go-winio v0.6.2 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/containerd/errdefs v1.0.0 // indirect
github.com/containerd/errdefs/pkg v0.3.0 // indirect
github.com/distribution/reference v0.6.0 // indirect
Expand All @@ -20,12 +22,19 @@ require (
github.com/go-logr/logr v1.4.2 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/moby/docker-image-spec v1.3.1 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/opencontainers/go-digest v1.0.0 // indirect
github.com/opencontainers/image-spec v1.1.1 // indirect
github.com/prometheus/client_golang v1.23.2 // indirect
github.com/prometheus/client_model v0.6.2 // indirect
github.com/prometheus/common v0.66.1 // indirect
github.com/prometheus/procfs v0.16.1 // indirect
go.opentelemetry.io/auto/sdk v1.1.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0 // indirect
go.opentelemetry.io/otel v1.35.0 // indirect
go.opentelemetry.io/otel/metric v1.35.0 // indirect
go.opentelemetry.io/otel/trace v1.35.0 // indirect
golang.org/x/sys v0.33.0 // indirect
go.yaml.in/yaml/v2 v2.4.2 // indirect
golang.org/x/sys v0.35.0 // indirect
google.golang.org/protobuf v1.36.8 // indirect
)
21 changes: 21 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI=
github.com/containerd/errdefs v1.0.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M=
github.com/containerd/errdefs/pkg v0.3.0 h1:9IKJ06FvyNlexW690DXuQNx2KA2cUJXx151Xdx3ZPPE=
Expand Down Expand Up @@ -29,12 +33,22 @@ github.com/moby/moby/api v1.53.0 h1:PihqG1ncw4W+8mZs69jlwGXdaYBeb5brF6BL7mPIS/w=
github.com/moby/moby/api v1.53.0/go.mod h1:8mb+ReTlisw4pS6BRzCMts5M49W5M7bKt1cJy/YbAqc=
github.com/moby/moby/client v0.2.2 h1:Pt4hRMCAIlyjL3cr8M5TrXCwKzguebPAc2do2ur7dEM=
github.com/moby/moby/client v0.2.2/go.mod h1:2EkIPVNCqR05CMIzL1mfA07t0HvVUUOl85pasRz/GmQ=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040=
github.com/opencontainers/image-spec v1.1.1/go.mod h1:qpqAh3Dmcf36wStyyWU+kCeDgrGnAve2nCC8+7h8Q0M=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o=
github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg=
github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk=
github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs=
github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA=
github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg=
github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
Expand All @@ -51,8 +65,15 @@ go.opentelemetry.io/otel/sdk/metric v1.35.0 h1:1RriWBmCKgkeHEhM7a2uMjMUfP7MsOF5J
go.opentelemetry.io/otel/sdk/metric v1.35.0/go.mod h1:is6XYCUMpcKi+ZsOvfluY5YstFnhW0BidkR+gL+qN+w=
go.opentelemetry.io/otel/trace v1.35.0 h1:dPpEfJu1sDIqruz7BHFG3c7528f6ddfSWfFDVt/xgMs=
go.opentelemetry.io/otel/trace v1.35.0/go.mod h1:WUk7DtFp1Aw2MkvqGdwiXYDZZNvA/1J8o6xRXLrIkyc=
go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI=
go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU=
golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw=
golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI=
golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc=
google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gotest.tools/v3 v3.5.2 h1:7koQfIKdy+I8UTetycgUqXWSDwpgv193Ka+qRsmBY8Q=
Expand Down
17 changes: 17 additions & 0 deletions internal/clock/clock.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package clock

import "time"

// Clock abstracts time operations for testability.
type Clock interface {
Now() time.Time
After(d time.Duration) <-chan time.Time
Since(t time.Time) time.Duration
}

// Real uses the standard library time functions.
type Real struct{}

func (Real) Now() time.Time { return time.Now() }
func (Real) After(d time.Duration) <-chan time.Time { return time.After(d) }
func (Real) Since(t time.Time) time.Duration { return time.Since(t) }
78 changes: 77 additions & 1 deletion internal/config/config.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
package config

import (
"errors"
"fmt"
"net/url"
"os"
"strconv"
"strings"
Expand Down Expand Up @@ -31,11 +33,19 @@ type Config struct {
WatchtowerScope string // "all" or "affected"
WatchtowerEvents string // "orchestration" or "all"

// Circuit breaker / backoff
BackoffMultiplier float64
BackoffMax int // seconds
BackoffResetAfter int // seconds
RestartBudget int
RestartWindow int // seconds

// Post-restart script
PostRestartScript string

// Notification events
NotifyEvents string
NotifyEvents string
NotifyRateLimit int // seconds (0 = unlimited)

// Notification services
WebhookURL string
Expand Down Expand Up @@ -63,6 +73,9 @@ type Config struct {
EmailUser string
EmailPass string

// Metrics
MetricsPort int

// Logging
LogJSON bool
}
Expand All @@ -89,8 +102,15 @@ func Load() *Config {
WatchtowerScope: envStr("AUTOHEAL_WATCHTOWER_SCOPE", "all"),
WatchtowerEvents: envStr("AUTOHEAL_WATCHTOWER_EVENTS", "orchestration"),

BackoffMultiplier: envFloat("AUTOHEAL_BACKOFF_MULTIPLIER", 2),
BackoffMax: envInt("AUTOHEAL_BACKOFF_MAX", 300),
BackoffResetAfter: envInt("AUTOHEAL_BACKOFF_RESET_AFTER", 600),
RestartBudget: envInt("AUTOHEAL_RESTART_BUDGET", 5),
RestartWindow: envInt("AUTOHEAL_RESTART_WINDOW", 300),

PostRestartScript: envStr("POST_RESTART_SCRIPT", ""),
NotifyEvents: envStr("NOTIFY_EVENTS", "actions"),
NotifyRateLimit: envInt("NOTIFY_RATE_LIMIT", 60),

WebhookURL: envStr("WEBHOOK_URL", ""),
WebhookJSONKey: envStr("WEBHOOK_JSON_KEY", "text"),
Expand All @@ -117,6 +137,8 @@ func Load() *Config {
EmailUser: envStr("NOTIFY_EMAIL_USER", ""),
EmailPass: envStr("NOTIFY_EMAIL_PASS", ""),

MetricsPort: envInt("METRICS_PORT", 0),

LogJSON: envBool("LOG_JSON", false),
}
}
Expand All @@ -137,6 +159,11 @@ func (c *Config) PrintBanner() {
fmt.Println("AUTOHEAL_WATCHTOWER_COOLDOWN=" + strconv.Itoa(c.WatchtowerCooldown))
fmt.Println("AUTOHEAL_WATCHTOWER_SCOPE=" + c.WatchtowerScope)
fmt.Println("AUTOHEAL_WATCHTOWER_EVENTS=" + c.WatchtowerEvents)
fmt.Printf("AUTOHEAL_BACKOFF_MULTIPLIER=%g\n", c.BackoffMultiplier)
fmt.Println("AUTOHEAL_BACKOFF_MAX=" + strconv.Itoa(c.BackoffMax))
fmt.Println("AUTOHEAL_BACKOFF_RESET_AFTER=" + strconv.Itoa(c.BackoffResetAfter))
fmt.Println("AUTOHEAL_RESTART_BUDGET=" + strconv.Itoa(c.RestartBudget))
fmt.Println("AUTOHEAL_RESTART_WINDOW=" + strconv.Itoa(c.RestartWindow))
}

// ResolvedNotifyEvents returns the normalised event categories.
Expand Down Expand Up @@ -170,6 +197,43 @@ func (c *Config) ResolvedNotifyEvents() []string {
return result
}

// Validate checks configuration for invalid or dangerous values.
func (c *Config) Validate() error {
var errs []error
if c.Interval <= 0 {
errs = append(errs, fmt.Errorf("AUTOHEAL_INTERVAL must be > 0, got %d", c.Interval))
}
if c.GracePeriod < 0 {
errs = append(errs, fmt.Errorf("AUTOHEAL_GRACE_PERIOD must be >= 0, got %d", c.GracePeriod))
}
if c.DefaultStopTimeout < 0 {
errs = append(errs, fmt.Errorf("AUTOHEAL_DEFAULT_STOP_TIMEOUT must be >= 0, got %d", c.DefaultStopTimeout))
}
if c.WatchtowerScope != "all" && c.WatchtowerScope != "affected" {
errs = append(errs, fmt.Errorf("AUTOHEAL_WATCHTOWER_SCOPE must be \"all\" or \"affected\", got %q", c.WatchtowerScope))
}
if c.WatchtowerEvents != "orchestration" && c.WatchtowerEvents != "all" {
errs = append(errs, fmt.Errorf("AUTOHEAL_WATCHTOWER_EVENTS must be \"orchestration\" or \"all\", got %q", c.WatchtowerEvents))
}
for _, u := range []struct {
name, val string
}{
{"WEBHOOK_URL", c.WebhookURL},
{"APPRISE_URL", c.AppriseURL},
{"NOTIFY_GOTIFY_URL", c.GotifyURL},
{"NOTIFY_DISCORD_WEBHOOK", c.DiscordWebhook},
{"NOTIFY_SLACK_WEBHOOK", c.SlackWebhook},
{"NOTIFY_LUNASEA_WEBHOOK", c.LunaSeaWebhook},
} {
if u.val != "" {
if _, err := url.Parse(u.val); err != nil {
errs = append(errs, fmt.Errorf("%s is not a valid URL: %w", u.name, err))
}
}
}
return errors.Join(errs...)
}

func envStr(key, def string) string {
if v := os.Getenv(key); v != "" {
return v
Expand All @@ -189,6 +253,18 @@ func envInt(key string, def int) int {
return n
}

func envFloat(key string, def float64) float64 {
v := os.Getenv(key)
if v == "" {
return def
}
f, err := strconv.ParseFloat(v, 64)
if err != nil {
return def
}
return f
}

func envBool(key string, def bool) bool {
v := os.Getenv(key)
if v == "" {
Expand Down
Loading
Loading