diff --git a/Dockerfile b/Dockerfile index f48b90b437..0945680299 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM alpine:3.22 as builder +FROM alpine:3.22@sha256:55ae5d250caebc548793f321534bc6a8ef1d116f334f18f4ada1b2daad3251b2 as builder RUN ["apk", "add", "jq"] RUN ["apk", "add", "bash"] @@ -13,7 +13,7 @@ ARG ARCH RUN /build/docker/setup.sh # Start from a fresh base image, to remove any build artifacts and scripts. -FROM alpine:3.22 +FROM alpine:3.22@sha256:55ae5d250caebc548793f321534bc6a8ef1d116f334f18f4ada1b2daad3251b2 ENV DATABRICKS_TF_EXEC_PATH "/app/bin/terraform" ENV DATABRICKS_TF_CLI_CONFIG_FILE "/app/config/config.tfrc" diff --git a/bundle/deploy/terraform/pkg.go b/bundle/deploy/terraform/pkg.go index 6c8b108337..83ea796024 100644 --- a/bundle/deploy/terraform/pkg.go +++ b/bundle/deploy/terraform/pkg.go @@ -77,11 +77,12 @@ type Checksum struct { } type TerraformMetadata struct { - Version string `json:"version"` - Checksum Checksum `json:"checksum"` - ProviderHost string `json:"providerHost"` - ProviderSource string `json:"providerSource"` - ProviderVersion string `json:"providerVersion"` + Version string `json:"version"` + Checksum Checksum `json:"checksum"` + ProviderHost string `json:"providerHost"` + ProviderSource string `json:"providerSource"` + ProviderVersion string `json:"providerVersion"` + ProviderChecksum Checksum `json:"providerChecksum"` } func NewTerraformMetadata(ctx context.Context) (*TerraformMetadata, error) { @@ -98,6 +99,10 @@ func NewTerraformMetadata(ctx context.Context) (*TerraformMetadata, error) { ProviderHost: schema.ProviderHost, ProviderSource: schema.ProviderSource, ProviderVersion: schema.ProviderVersion, + ProviderChecksum: Checksum{ + LinuxAmd64: schema.ProviderChecksumLinuxAmd64, + LinuxArm64: schema.ProviderChecksumLinuxArm64, + }, }, nil } diff --git a/bundle/internal/tf/codegen/README.md b/bundle/internal/tf/codegen/README.md index b1f8a33a8b..f309785a21 100644 --- a/bundle/internal/tf/codegen/README.md +++ b/bundle/internal/tf/codegen/README.md @@ -7,6 +7,7 @@ The entry point for this tool is `.`. It uses `./tmp` a temporary data directory and `../schema` as output directory. It automatically installs the Terraform binary as well as the Databricks Terraform provider. +It also fetches SHA256 checksums for the provider archive from GitHub releases. Run with: diff --git a/bundle/internal/tf/codegen/generator/generator.go b/bundle/internal/tf/codegen/generator/generator.go index e135d13f9e..47af677c00 100644 --- a/bundle/internal/tf/codegen/generator/generator.go +++ b/bundle/internal/tf/codegen/generator/generator.go @@ -35,8 +35,10 @@ func (c *collection) Generate(path string) error { } type root struct { - OutputFile string - ProviderVersion string + OutputFile string + ProviderVersion string + ProviderChecksumLinuxAmd64 string + ProviderChecksumLinuxArm64 string } func (r *root) Generate(path string) error { @@ -51,7 +53,7 @@ func (r *root) Generate(path string) error { return tmpl.Execute(f, r) } -func Run(ctx context.Context, schema *tfjson.ProviderSchema, path string) error { +func Run(ctx context.Context, schema *tfjson.ProviderSchema, checksums *schemapkg.ProviderChecksums, path string) error { // Generate types for resources var resources []*namedBlock for _, k := range sortKeys(schema.ResourceSchemas) { @@ -147,8 +149,10 @@ func Run(ctx context.Context, schema *tfjson.ProviderSchema, path string) error // Generate root.go { r := &root{ - OutputFile: "root.go", - ProviderVersion: schemapkg.ProviderVersion, + OutputFile: "root.go", + ProviderVersion: schemapkg.ProviderVersion, + ProviderChecksumLinuxAmd64: checksums.LinuxAmd64, + ProviderChecksumLinuxArm64: checksums.LinuxArm64, } err := r.Generate(path) if err != nil { diff --git a/bundle/internal/tf/codegen/main.go b/bundle/internal/tf/codegen/main.go index e4982c2bc8..bc7ce6663a 100644 --- a/bundle/internal/tf/codegen/main.go +++ b/bundle/internal/tf/codegen/main.go @@ -11,12 +11,20 @@ import ( func main() { ctx := context.Background() - schema, err := schema.Load(ctx) + s, err := schema.Load(ctx) if err != nil { log.Fatal(err) } - err = generator.Run(ctx, schema, "../schema") + log.Printf("fetching provider checksums for v%s", schema.ProviderVersion) + checksums, err := schema.FetchProviderChecksums(schema.ProviderVersion) + if err != nil { + log.Fatal(err) + } + log.Printf(" linux_amd64: %s", checksums.LinuxAmd64) + log.Printf(" linux_arm64: %s", checksums.LinuxArm64) + + err = generator.Run(ctx, s, checksums, "../schema") if err != nil { log.Fatal(err) } diff --git a/bundle/internal/tf/codegen/schema/checksum.go b/bundle/internal/tf/codegen/schema/checksum.go new file mode 100644 index 0000000000..7cc0678eae --- /dev/null +++ b/bundle/internal/tf/codegen/schema/checksum.go @@ -0,0 +1,115 @@ +package schema + +import ( + "bufio" + "crypto/sha256" + "encoding/hex" + "fmt" + "io" + "log" + "net/http" + "strings" +) + +// ProviderChecksums holds the SHA256 checksums for the Databricks Terraform +// provider archive for supported Linux architectures. +type ProviderChecksums struct { + LinuxAmd64 string + LinuxArm64 string +} + +// FetchProviderChecksums downloads the SHA256SUMS file from the GitHub release +// for the given provider version and extracts checksums for the linux_amd64 and +// linux_arm64 archives. It also downloads both zips to verify that the parsed +// checksums are correct. +// https://github.com/databricks/terraform-provider-databricks/releases +func FetchProviderChecksums(version string) (*ProviderChecksums, error) { + url := fmt.Sprintf( + "https://github.com/databricks/terraform-provider-databricks/releases/download/v%s/terraform-provider-databricks_%s_SHA256SUMS", + version, version, + ) + + resp, err := http.Get(url) + if err != nil { + return nil, fmt.Errorf("downloading SHA256SUMS for provider v%s: %w", version, err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("downloading SHA256SUMS for provider v%s: HTTP %s", version, resp.Status) + } + + checksums := &ProviderChecksums{} + amd64Suffix := fmt.Sprintf("terraform-provider-databricks_%s_linux_amd64.zip", version) + arm64Suffix := fmt.Sprintf("terraform-provider-databricks_%s_linux_arm64.zip", version) + + scanner := bufio.NewScanner(resp.Body) + for scanner.Scan() { + line := scanner.Text() + parts := strings.Fields(line) + if len(parts) != 2 { + continue + } + switch parts[1] { + case amd64Suffix: + checksums.LinuxAmd64 = parts[0] + case arm64Suffix: + checksums.LinuxArm64 = parts[0] + } + } + if err := scanner.Err(); err != nil { + return nil, fmt.Errorf("reading SHA256SUMS for provider v%s: %w", version, err) + } + + if checksums.LinuxAmd64 == "" { + return nil, fmt.Errorf("checksum not found for %s in SHA256SUMS", amd64Suffix) + } + if checksums.LinuxArm64 == "" { + return nil, fmt.Errorf("checksum not found for %s in SHA256SUMS", arm64Suffix) + } + + // Sanity check: download both zips and verify the checksums match. + err = verifyProviderChecksum(version, "linux_amd64", checksums.LinuxAmd64) + if err != nil { + return nil, err + } + err = verifyProviderChecksum(version, "linux_arm64", checksums.LinuxArm64) + if err != nil { + return nil, err + } + + return checksums, nil +} + +// verifyProviderChecksum downloads the provider zip for the given platform and +// verifies it matches the expected SHA256 checksum. +func verifyProviderChecksum(version, platform, expectedChecksum string) error { + url := fmt.Sprintf( + "https://github.com/databricks/terraform-provider-databricks/releases/download/v%s/terraform-provider-databricks_%s_%s.zip", + version, version, platform, + ) + + log.Printf("verifying checksum for %s provider archive", platform) + resp, err := http.Get(url) + if err != nil { + return fmt.Errorf("downloading provider archive for checksum verification: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("downloading provider archive for checksum verification: HTTP %s", resp.Status) + } + + hash := sha256.New() + if _, err := io.Copy(hash, resp.Body); err != nil { + return fmt.Errorf("computing checksum for provider archive: %w", err) + } + + actualChecksum := hex.EncodeToString(hash.Sum(nil)) + if actualChecksum != expectedChecksum { + return fmt.Errorf("checksum mismatch for %s provider archive: expected %s, got %s", platform, expectedChecksum, actualChecksum) + } + + log.Printf("checksum verified for %s provider archive", platform) + return nil +} diff --git a/bundle/internal/tf/codegen/templates/root.go.tmpl b/bundle/internal/tf/codegen/templates/root.go.tmpl index b5c53c1615..fc356042cd 100644 --- a/bundle/internal/tf/codegen/templates/root.go.tmpl +++ b/bundle/internal/tf/codegen/templates/root.go.tmpl @@ -22,6 +22,8 @@ type Root struct { const ProviderHost = "registry.terraform.io" const ProviderSource = "databricks/databricks" const ProviderVersion = "{{ .ProviderVersion }}" +const ProviderChecksumLinuxAmd64 = "{{ .ProviderChecksumLinuxAmd64 }}" +const ProviderChecksumLinuxArm64 = "{{ .ProviderChecksumLinuxArm64 }}" func NewRoot() *Root { return &Root{ diff --git a/bundle/internal/tf/schema/root.go b/bundle/internal/tf/schema/root.go index ed77d10967..4c19c2c4a4 100644 --- a/bundle/internal/tf/schema/root.go +++ b/bundle/internal/tf/schema/root.go @@ -22,6 +22,8 @@ type Root struct { const ProviderHost = "registry.terraform.io" const ProviderSource = "databricks/databricks" const ProviderVersion = "1.111.0" +const ProviderChecksumLinuxAmd64 = "c1b46bbaf5c4a0b253309dad072e05025e24731536719d4408bacd48dc0ccfd9" +const ProviderChecksumLinuxArm64 = "ce379c424009b01ec4762dee4d0db27cfc554d921b55a0af8e4203b3652259e9" func NewRoot() *Root { return &Root{ diff --git a/docker/setup.sh b/docker/setup.sh index 0dc06ce1e2..d6e6e3b4ad 100755 --- a/docker/setup.sh +++ b/docker/setup.sh @@ -30,3 +30,11 @@ mv zip/terraform/terraform /app/bin/terraform TF_PROVIDER_NAME=terraform-provider-databricks_${DATABRICKS_TF_PROVIDER_VERSION}_linux_${ARCH}.zip mkdir -p /app/providers/registry.terraform.io/databricks/databricks wget https://github.com/databricks/terraform-provider-databricks/releases/download/v${DATABRICKS_TF_PROVIDER_VERSION}/${TF_PROVIDER_NAME} -O /app/providers/registry.terraform.io/databricks/databricks/${TF_PROVIDER_NAME} + +# Verify the provider checksum. +EXPECTED_PROVIDER_CHECKSUM="$(/app/databricks bundle debug terraform --output json | jq -r .terraform.providerChecksum.linux_$ARCH)" +COMPUTED_PROVIDER_CHECKSUM=$(sha256sum /app/providers/registry.terraform.io/databricks/databricks/${TF_PROVIDER_NAME} | awk '{ print $1 }') +if [ "$COMPUTED_PROVIDER_CHECKSUM" != "$EXPECTED_PROVIDER_CHECKSUM" ]; then + echo "Checksum mismatch for Terraform provider. Version: $DATABRICKS_TF_PROVIDER_VERSION, Arch: $ARCH, Expected checksum: $EXPECTED_PROVIDER_CHECKSUM, Computed checksum: $COMPUTED_PROVIDER_CHECKSUM." + exit 1 +fi