diff --git a/main.go b/main.go index 571e459..5cee252 100644 --- a/main.go +++ b/main.go @@ -45,6 +45,7 @@ func cli() *cobra.Command { var mappingsFile string var updateFlag bool var noBuiltInFlag bool + var multistageFlag bool var strictFlag bool var warnMissingPackagesFlag bool @@ -112,7 +113,8 @@ func cli() *cobra.Command { Registry: registry, Update: updateFlag, NoBuiltIn: noBuiltInFlag, - Strict: strictFlag, + ConvertToMultistage: multistageFlag, + Strict: strictFlag, WarnMissingPackages: warnMissingPackagesFlag, } @@ -200,6 +202,7 @@ func cli() *cobra.Command { cmd.Flags().StringVarP(&mappingsFile, "mappings", "m", "", "path to a custom package mappings YAML file (instead of the default)") cmd.Flags().BoolVar(&updateFlag, "update", false, "check for and apply available updates") cmd.Flags().BoolVar(&noBuiltInFlag, "no-builtin", false, "skip built-in package/image mappings, still apply default conversion logic") + cmd.Flags().BoolVar(&multistageFlag, "multistage", false, "convert single-stage Dockerfiles to secure multistage builds") cmd.Flags().Var(&level, "log-level", "log level (e.g. debug, info, warn, error)") cmd.Flags().BoolVar(&strictFlag, "strict", false, "when true, fail if any package is unknown") cmd.Flags().BoolVar(&warnMissingPackagesFlag, "warn-missing-packages", false, "when true, warn about missing package mappings") diff --git a/pkg/dfc/dfc.go b/pkg/dfc/dfc.go index 1b29b8d..a854726 100644 --- a/pkg/dfc/dfc.go +++ b/pkg/dfc/dfc.go @@ -499,6 +499,7 @@ type Options struct { NoBuiltIn bool // When true, don't use built-in mappings, only ExtraMappings FromLineConverter FromLineConverter // Optional custom converter for FROM lines RunLineConverter RunLineConverter // Optional custom converter for RUN lines + ConvertToMultistage bool // When true, convert single-stage builds to multistage for security Strict bool // When true, fail if any package is unknown WarnMissingPackages bool // When true, warn about missing package mappings instead of using the original package name } @@ -523,6 +524,20 @@ func parseImageReference(imageRef string) (base, tag string) { // Convert applies the conversion to the Dockerfile and returns a new converted Dockerfile func (d *Dockerfile) Convert(ctx context.Context, opts Options) (*Dockerfile, error) { + dockerfileToConvert := d + if opts.ConvertToMultistage && shouldConvertToMultistage(d.Lines) { + converted, err := convertSingleStageToMultistageGeneric(d, MultistageOptions{ + BuildAlias: "builder", + RuntimeAlias: "", + PreserveAliases: true, + CopyStrategy: DefaultCopyStrategy, + }) + if err != nil { + return nil, fmt.Errorf("converting to multistage: %w", err) + } + dockerfileToConvert = converted + } + // Initialize mappings var mappings MappingsConfig @@ -556,7 +571,7 @@ func (d *Dockerfile) Convert(ctx context.Context, opts Options) (*Dockerfile, er // Create a new Dockerfile for the converted content converted := &Dockerfile{ - Lines: make([]*DockerfileLine, len(d.Lines)), + Lines: make([]*DockerfileLine, len(dockerfileToConvert.Lines)), } // Track packages installed per stage @@ -567,13 +582,13 @@ func (d *Dockerfile) Convert(ctx context.Context, opts Options) (*Dockerfile, er argsUsedAsBase := make(map[string]bool) // Track stages with RUN commands for determining if we need -dev suffix - stagesWithRunCommands := detectStagesWithRunCommands(d.Lines) + stagesWithRunCommands := detectStagesWithRunCommands(dockerfileToConvert.Lines) // First pass: collect all ARG definitions and identify which ones are used as base images - identifyArgsUsedAsBaseImages(d.Lines, argNameToDockerfileLine, argsUsedAsBase) + identifyArgsUsedAsBaseImages(dockerfileToConvert.Lines, argNameToDockerfileLine, argsUsedAsBase) // Convert each line - for i, line := range d.Lines { + for i, line := range dockerfileToConvert.Lines { // Create a deep copy of the line newLine := &DockerfileLine{ Raw: line.Raw, @@ -608,7 +623,7 @@ func (d *Dockerfile) Convert(ctx context.Context, opts Options) (*Dockerfile, er FromLineConverter: opts.FromLineConverter, RunLineConverter: opts.RunLineConverter, } - argLine, argDetails := convertArgLine(line.Arg, d.Lines, stagesWithRunCommands, optsWithMappings) + argLine, argDetails := convertArgLine(line.Arg, dockerfileToConvert.Lines, stagesWithRunCommands, optsWithMappings) newLine.Converted = argLine newLine.Arg = argDetails } @@ -1630,3 +1645,173 @@ func createApkPackageSpec(name string, spec PackageSpec) string { return pkg } + +// shouldConvertToMultistage determines if a single-stage Dockerfile should be converted to multistage +func shouldConvertToMultistage(lines []*DockerfileLine) bool { + stageCount := 0 + hasPackageInstallCommands := false + + for _, line := range lines { + if line.From != nil { + stageCount++ + } + + // Check for RUN commands that contain package manager commands + if line.Run != nil && line.Run.Shell != nil && line.Run.Shell.Before != nil { + for _, part := range line.Run.Shell.Before.Parts { + switch part.Command { + case "apt-get", "apt", "yum", "dnf", "microdnf", "apk", "pip", "pip3": + for _, arg := range part.Args { + if arg == "install" || arg == "add" { + hasPackageInstallCommands = true + break + } + } + } + if hasPackageInstallCommands { + break + } + } + } + + if hasPackageInstallCommands { + break + } + } + + // Convert to multistage if: + // 1. It's a single-stage build (only one FROM) + // 2. It has RUN commands that install packages + return stageCount == 1 && hasPackageInstallCommands +} + +// MultistageOptions allows customization of multistage conversion +type MultistageOptions struct { + BuildAlias string + RuntimeAlias string + PreserveAliases bool + CopyStrategy func(line string, buildAlias string) string +} + +// DefaultCopyStrategy adds --from=buildAlias if not present +func DefaultCopyStrategy(line string, buildAlias string) string { + if strings.Contains(line, "--from=") { + return line + } + if strings.HasPrefix(strings.TrimSpace(line), "COPY ") { + return strings.Replace(line, "COPY ", "COPY --from="+buildAlias+" ", 1) + } + return line +} + +// convertSingleStageToMultistageGeneric converts a single-stage Dockerfile to a multistage build +func convertSingleStageToMultistageGeneric( + d *Dockerfile, + opts MultistageOptions, +) (*Dockerfile, error) { + if len(d.Lines) == 0 { + return d, nil + } + + fromLineIndex := -1 + var buildLines []*DockerfileLine + var runtimeLines []*DockerfileLine + var copyLines []*DockerfileLine + + inBuildSection := true + buildAlias := opts.BuildAlias + if buildAlias == "" { + buildAlias = "builder" + } + + for i, line := range d.Lines { + if line.From != nil { + fromLineIndex = i + alias := buildAlias + if opts.PreserveAliases && line.From.Alias != "" { + alias = line.From.Alias + } + buildFromLine := &DockerfileLine{ + Raw: line.Raw, + Extra: line.Extra, + Stage: 1, + From: &FromDetails{ + Base: line.From.Base, + Tag: line.From.Tag, + Digest: line.From.Digest, + Alias: alias, + Parent: line.From.Parent, + BaseDynamic: line.From.BaseDynamic, + TagDynamic: line.From.TagDynamic, + Orig: line.From.Orig, + Platform: line.From.Platform, + }, + } + buildLines = append(buildLines, buildFromLine) + continue + } + + if inBuildSection { + if line.Run != nil && line.Run.Manager != "" { + buildLines = append(buildLines, line) + } else if strings.Contains(strings.ToUpper(line.Raw), "COPY") && + !strings.Contains(strings.ToUpper(line.Raw), "--FROM=") { + inBuildSection = false + copyLines = append(copyLines, line) + } else if strings.Contains(strings.ToUpper(line.Raw), "WORKDIR") || + strings.Contains(strings.ToUpper(line.Raw), "ENV") || + strings.Contains(strings.ToUpper(line.Raw), "ARG") { + buildLines = append(buildLines, line) + } else { + buildLines = append(buildLines, line) + } + } else { + runtimeLines = append(runtimeLines, line) + } + } + + var newLines []*DockerfileLine + newLines = append(newLines, buildLines...) + + if fromLineIndex >= 0 { + originalFrom := d.Lines[fromLineIndex] + runtimeFromLine := &DockerfileLine{ + Raw: "", + Stage: 2, + From: &FromDetails{ + Base: originalFrom.From.Base, + Tag: originalFrom.From.Tag, + Digest: originalFrom.From.Digest, + Alias: opts.RuntimeAlias, + Parent: 0, + BaseDynamic: originalFrom.From.BaseDynamic, + TagDynamic: originalFrom.From.TagDynamic, + Orig: originalFrom.From.Orig, + Platform: originalFrom.From.Platform, + }, + } + newLines = append(newLines, runtimeFromLine) + + if len(copyLines) > 0 { + for _, copyLine := range copyLines { + newCopyRaw := copyLine.Raw + if opts.CopyStrategy != nil { + newCopyRaw = opts.CopyStrategy(copyLine.Raw, buildAlias) + } + newCopyLine := &DockerfileLine{ + Raw: newCopyRaw, + Extra: copyLine.Extra, + Stage: 2, + } + newLines = append(newLines, newCopyLine) + } + } + + for _, line := range runtimeLines { + line.Stage = 2 + newLines = append(newLines, line) + } + } + + return &Dockerfile{Lines: newLines}, nil +} diff --git a/pkg/dfc/dfc_test.go b/pkg/dfc/dfc_test.go index 539a509..1b68dd5 100644 --- a/pkg/dfc/dfc_test.go +++ b/pkg/dfc/dfc_test.go @@ -1197,8 +1197,15 @@ func TestFullFileConversion(t *testing.T) { t.Fatalf("Failed to find test files: %v", err) } + var filteredFiles []string + for _, file := range beforeFiles { + if !strings.Contains(filepath.Base(file), "multistage-") { + filteredFiles = append(filteredFiles, file) + } + } + // Test each file - for _, beforeFile := range beforeFiles { + for _, beforeFile := range filteredFiles { name := strings.Split(filepath.Base(beforeFile), ".")[0] t.Run(name, func(t *testing.T) { ctx := context.Background() @@ -2381,3 +2388,162 @@ func TestPlatformFlagPreservedInConversion(t *testing.T) { }) } } + +func TestConvertToMultistage(t *testing.T) { + tests := []struct { + name string + raw string + convertToMultistage bool + expectedStages int + expectBuilderAlias bool + expectCopyFromBuilder bool + }{ + { + name: "single-stage with package installation converts to multistage", + raw: `FROM python:3.9 +WORKDIR /app +COPY requirements.txt . +RUN pip install -r requirements.txt +COPY . . +CMD ["python", "app.py"]`, + convertToMultistage: true, + expectedStages: 2, + expectBuilderAlias: true, + expectCopyFromBuilder: true, + }, + { + name: "single-stage without package installation remains single-stage", + raw: `FROM python:3.9 +WORKDIR /app +COPY . . +CMD ["python", "app.py"]`, + convertToMultistage: true, + expectedStages: 1, + expectBuilderAlias: false, + expectCopyFromBuilder: false, + }, + { + name: "multistage conversion disabled keeps original structure", + raw: `FROM python:3.9 +WORKDIR /app +COPY requirements.txt . +RUN pip install -r requirements.txt +COPY . . +CMD ["python", "app.py"]`, + convertToMultistage: false, + expectedStages: 1, + expectBuilderAlias: false, + expectCopyFromBuilder: false, + }, + { + name: "dockerfile with apt-get converts to multistage", + raw: `FROM ubuntu:20.04 +RUN apt-get update && apt-get install -y python3 +COPY app.py /app/ +CMD ["python3", "/app/app.py"]`, + convertToMultistage: true, + expectedStages: 2, + expectBuilderAlias: true, + expectCopyFromBuilder: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ctx := context.Background() + + parsed, err := ParseDockerfile(ctx, []byte(tt.raw)) + if err != nil { + t.Fatalf("Failed to parse Dockerfile: %v", err) + } + + converted, err := parsed.Convert(ctx, Options{ + ConvertToMultistage: tt.convertToMultistage, + ExtraMappings: MappingsConfig{ + Images: map[string]string{ + "python": "python", + "ubuntu": "chainguard-base", + }, + Packages: PackageMap{}, + }, + NoBuiltIn: true, + }) + if err != nil { + t.Fatalf("Failed to convert Dockerfile: %v", err) + } + + // Count stages and check for builder alias and COPY --from=builder + stageCount := 0 + hasBuilderAlias := false + hasCopyFromBuilder := false + + for _, line := range converted.Lines { + if line.From != nil { + stageCount++ + if line.From.Alias == "builder" { + hasBuilderAlias = true + } + } + if strings.Contains(line.Raw, "COPY --from=builder") || strings.Contains(line.Converted, "COPY --from=builder") { + hasCopyFromBuilder = true + } + } + + if stageCount != tt.expectedStages { + t.Errorf("Expected %d stages, got %d", tt.expectedStages, stageCount) + } + + if hasBuilderAlias != tt.expectBuilderAlias { + t.Errorf("Expected builder alias: %v, got: %v", tt.expectBuilderAlias, hasBuilderAlias) + } + + if hasCopyFromBuilder != tt.expectCopyFromBuilder { + t.Errorf("Expected COPY --from=builder: %v, got: %v", tt.expectCopyFromBuilder, hasCopyFromBuilder) + } + }) + } +} + +// TestMultistageFileConversion tests full file conversion with multistage option enabled +func TestMultistageFileConversion(t *testing.T) { + beforeFiles, err := filepath.Glob("../../testdata/multistage-*.before.Dockerfile") + if err != nil { + t.Fatalf("Failed to find multistage test files: %v", err) + } + + for _, beforeFile := range beforeFiles { + name := strings.Split(filepath.Base(beforeFile), ".")[0] + t.Run(name, func(t *testing.T) { + ctx := context.Background() + + before, err := os.ReadFile(beforeFile) + if err != nil { + t.Fatalf("Failed to read input file: %v", err) + } + + afterFile := strings.Replace(beforeFile, ".before.", ".after.", 1) + after, err := os.ReadFile(afterFile) + if err != nil { + t.Fatalf("Failed to read expected output file: %v", err) + } + + orig, err := ParseDockerfile(ctx, before) + if err != nil { + t.Fatalf("Failed to parse Dockerfile: %v", err) + } + converted, err := orig.Convert(ctx, Options{ + ConvertToMultistage: true, + }) + if err != nil { + t.Fatalf("Failed to convert Dockerfile: %v", err) + } + + got := converted.String() + want := string(after) + + if diff := cmp.Diff(want, got); diff != "" { + t.Errorf("multistage conversion not as expected (-want, +got):\n%s", diff) + } + }) + } +} diff --git a/testdata/multistage-alpine.after.Dockerfile b/testdata/multistage-alpine.after.Dockerfile new file mode 100644 index 0000000..9517030 --- /dev/null +++ b/testdata/multistage-alpine.after.Dockerfile @@ -0,0 +1,19 @@ +FROM cgr.dev/ORG/chainguard-base:latest AS builder +USER root + +RUN apk add --no-cache git nodejs npm + +FROM cgr.dev/ORG/chainguard-base:latest AS builder + +RUN apk add --no-cache nodejs npm + +COPY --from=builder package.json package-lock.json ./ +RUN npm ci --only=production +FROM cgr.dev/ORG/chainguard-base:latest +USER root + +COPY --from=builder src/ ./src/ +COPY public/ ./public/ + +EXPOSE 3000 +CMD ["node", "src/index.js"] \ No newline at end of file diff --git a/testdata/multistage-alpine.before.Dockerfile b/testdata/multistage-alpine.before.Dockerfile new file mode 100644 index 0000000..18658df --- /dev/null +++ b/testdata/multistage-alpine.before.Dockerfile @@ -0,0 +1,17 @@ +FROM cgr.dev/ORG/chainguard-base:latest AS builder +USER root + +RUN apk add --no-cache git nodejs npm + +FROM cgr.dev/ORG/chainguard-base:latest + +RUN apk add --no-cache nodejs npm + +COPY --from=builder package.json package-lock.json ./ +RUN npm ci --only=production + +COPY src/ ./src/ +COPY public/ ./public/ + +EXPOSE 3000 +CMD ["node", "src/index.js"] \ No newline at end of file diff --git a/testdata/multistage-gcc-static.after.Dockerfile b/testdata/multistage-gcc-static.after.Dockerfile new file mode 100644 index 0000000..02c496d --- /dev/null +++ b/testdata/multistage-gcc-static.after.Dockerfile @@ -0,0 +1,14 @@ +FROM cgr.dev/ORG/chainguard-base:latest AS builder +USER root + +RUN apk add --no-cache curl gcc git glibc-dev make +FROM cgr.dev/ORG/chainguard-base:latest + +COPY --from=builder hello.c /app/hello.c +COPY Makefile /app/Makefile + +WORKDIR /app +RUN gcc -static -o hello hello.c + +EXPOSE 8080 +CMD ["./hello"] \ No newline at end of file diff --git a/testdata/multistage-gcc-static.before.Dockerfile b/testdata/multistage-gcc-static.before.Dockerfile new file mode 100644 index 0000000..68f309a --- /dev/null +++ b/testdata/multistage-gcc-static.before.Dockerfile @@ -0,0 +1,17 @@ +FROM ubuntu:20.04 + +RUN apt-get update && apt-get install -y \ + gcc \ + libc6-dev \ + make \ + curl \ + git + +COPY hello.c /app/hello.c +COPY Makefile /app/Makefile + +WORKDIR /app +RUN gcc -static -o hello hello.c + +EXPOSE 8080 +CMD ["./hello"] \ No newline at end of file diff --git a/testdata/multistage-go-static.after.Dockerfile b/testdata/multistage-go-static.after.Dockerfile new file mode 100644 index 0000000..a23dd04 --- /dev/null +++ b/testdata/multistage-go-static.after.Dockerfile @@ -0,0 +1,14 @@ +FROM cgr.dev/ORG/go:1.21-dev AS builder +USER root + +RUN apk add --no-cache ca-certificates curl git +FROM cgr.dev/ORG/go:1.21-dev + +COPY --from=builder go.mod go.sum ./ +RUN go mod download + +COPY . . +RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o main . + +EXPOSE 8080 +CMD ["./main"] \ No newline at end of file diff --git a/testdata/multistage-go-static.before.Dockerfile b/testdata/multistage-go-static.before.Dockerfile new file mode 100644 index 0000000..359aaaf --- /dev/null +++ b/testdata/multistage-go-static.before.Dockerfile @@ -0,0 +1,15 @@ +FROM golang:1.21 + +RUN apt-get update && apt-get install -y \ + git \ + ca-certificates \ + curl + +COPY go.mod go.sum ./ +RUN go mod download + +COPY . . +RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o main . + +EXPOSE 8080 +CMD ["./main"] \ No newline at end of file diff --git a/testdata/multistage-node-distroless.after.Dockerfile b/testdata/multistage-node-distroless.after.Dockerfile new file mode 100644 index 0000000..0034f2c --- /dev/null +++ b/testdata/multistage-node-distroless.after.Dockerfile @@ -0,0 +1,14 @@ +FROM cgr.dev/ORG/node:18-dev AS builder +USER root + +RUN apk add --no-cache curl gcc git make python-3 +FROM cgr.dev/ORG/node:18-dev + +COPY --from=builder package*.json ./ +RUN npm ci --only=production + +COPY src/ ./src/ +COPY public/ ./public/ + +EXPOSE 3000 +CMD ["node", "src/index.js"] \ No newline at end of file diff --git a/testdata/multistage-node-distroless.before.Dockerfile b/testdata/multistage-node-distroless.before.Dockerfile new file mode 100644 index 0000000..d88a8b6 --- /dev/null +++ b/testdata/multistage-node-distroless.before.Dockerfile @@ -0,0 +1,17 @@ +FROM node:18 + +RUN apt-get update && apt-get install -y \ + python3 \ + make \ + g++ \ + git \ + curl + +COPY package*.json ./ +RUN npm ci --only=production + +COPY src/ ./src/ +COPY public/ ./public/ + +EXPOSE 3000 +CMD ["node", "src/index.js"] \ No newline at end of file diff --git a/testdata/multistage-single.after.Dockerfile b/testdata/multistage-single.after.Dockerfile new file mode 100644 index 0000000..4d539e7 --- /dev/null +++ b/testdata/multistage-single.after.Dockerfile @@ -0,0 +1,17 @@ +FROM cgr.dev/ORG/chainguard-base:latest AS builder +USER root + +RUN apk add --no-cache curl git py3-pip python-3 python3-venv +FROM cgr.dev/ORG/chainguard-base:latest + +COPY --from=builder requirements.txt /app/requirements.txt +COPY app.py /app/app.py +COPY static/ /app/static/ + +WORKDIR /app +RUN python3 -m venv /opt/venv +ENV PATH="/opt/venv/bin:$PATH" +RUN pip3 install -r requirements.txt + +EXPOSE 8000 +CMD ["python3", "app.py"] diff --git a/testdata/multistage-single.before.Dockerfile b/testdata/multistage-single.before.Dockerfile new file mode 100644 index 0000000..523f96f --- /dev/null +++ b/testdata/multistage-single.before.Dockerfile @@ -0,0 +1,20 @@ +FROM ubuntu:20.04 + +RUN apt-get update && apt-get install -y \ + python3 \ + python3-pip \ + python3-venv \ + curl \ + git + +COPY requirements.txt /app/requirements.txt +COPY app.py /app/app.py +COPY static/ /app/static/ + +WORKDIR /app +RUN python3 -m venv /opt/venv +ENV PATH="/opt/venv/bin:$PATH" +RUN pip3 install -r requirements.txt + +EXPOSE 8000 +CMD ["python3", "app.py"]