diff --git a/certmagic.go b/certmagic.go index 322a0f1b..2ff53235 100644 --- a/certmagic.go +++ b/certmagic.go @@ -499,3 +499,27 @@ var ( // Maximum size for the stack trace when recovering from panics. const stackTraceBufferSize = 1024 * 128 + +const ( + // Storage mode controls the format in which certificates are stored in `Storage`. + // + // Formats: + // - legacy: Store cert, privkey and meta as three separate storage items (.cert, .key, .json). + // - bundle: Store cert, privkey and meta as a single, bundled storage item (.bundle). + // + // Modes: + // - legacy: Store and load certificates in legacy format. + // - transition: Store in legacy and bundle format, load as bundle with fallback to legacy format. + // - bundle: Store and load certificates in bundle format. + // + // In the transition mode, failures around reads and writes of the bundle are soft. + // They should only log errors and try to work with the legacy format as fallback. + // Operations on the legacy format are hard-failures, implying that errors should be propagated up. + // + // The storage mode is controlled via the CERTMAGIC_STORAGE_MODE environment variable + StorageModeEnv = "CERTMAGIC_STORAGE_MODE" + + StorageModeLegacy = "legacy" + StorageModeTransition = "transition" + StorageModeBundle = "bundle" +) diff --git a/config.go b/config.go index 419b3dcb..8d9c5d65 100644 --- a/config.go +++ b/config.go @@ -32,6 +32,7 @@ import ( "net" "net/http" "net/url" + "os" "strings" "time" @@ -752,8 +753,7 @@ func (cfg *Config) reusePrivateKey(ctx context.Context, domain string) (privKey for i, issuer := range issuers { // see if this issuer location in storage has a private key for the domain - privateKeyStorageKey := StorageKeys.SitePrivateKey(issuer.IssuerKey(), domain) - privKeyPEM, err = cfg.Storage.Load(ctx, privateKeyStorageKey) + certRes, err := cfg.loadCertResource(ctx, issuer, domain) if errors.Is(err, fs.ErrNotExist) { err = nil // obviously, it's OK to not have a private key; so don't prevent obtaining a cert continue @@ -761,6 +761,7 @@ func (cfg *Config) reusePrivateKey(ctx context.Context, domain string) (privKey if err != nil { return nil, nil, nil, fmt.Errorf("loading existing private key for reuse with issuer %s: %v", issuer.IssuerKey(), err) } + privKeyPEM = certRes.PrivateKeyPEM // we loaded a private key; try decoding it so we can use it privKey, err = PEMDecodePrivateKey(privKeyPEM) @@ -1101,7 +1102,8 @@ func (cfg *Config) RevokeCert(ctx context.Context, domain string, reason int, in return err } - if !cfg.Storage.Exists(ctx, StorageKeys.SitePrivateKey(issuerKey, domain)) { + // loadCertResource should already fail if private key is missing. + if len(certRes.PrivateKeyPEM) == 0 { return fmt.Errorf("private key not found for %s", certRes.SANs) } @@ -1266,11 +1268,28 @@ func (cfg *Config) checkStorage(ctx context.Context) error { return nil } -// storageHasCertResources returns true if the storage +// storageHasCertResources returns true if the storage associated with cfg's certificate cache has all the +// resources related to the certificate for domain. +// It switches storage modes between legacy and bundle mode based on the CERTMAGIC_STORAGE_MODE env. +func (cfg *Config) storageHasCertResources(ctx context.Context, issuer Issuer, domain string) bool { + switch os.Getenv(StorageModeEnv) { + case StorageModeTransition: + if cfg.storageHasCertResourcesBundle(ctx, issuer, domain) { + return true + } + return cfg.storageHasCertResourcesLegacy(ctx, issuer, domain) + case StorageModeBundle: + return cfg.storageHasCertResourcesBundle(ctx, issuer, domain) + default: + return cfg.storageHasCertResourcesLegacy(ctx, issuer, domain) + } +} + +// storageHasCertResourcesLegacy returns true if the storage // associated with cfg's certificate cache has all the // resources related to the certificate for domain: the // certificate, the private key, and the metadata. -func (cfg *Config) storageHasCertResources(ctx context.Context, issuer Issuer, domain string) bool { +func (cfg *Config) storageHasCertResourcesLegacy(ctx context.Context, issuer Issuer, domain string) bool { issuerKey := issuer.IssuerKey() certKey := StorageKeys.SiteCert(issuerKey, domain) keyKey := StorageKeys.SitePrivateKey(issuerKey, domain) @@ -1280,10 +1299,40 @@ func (cfg *Config) storageHasCertResources(ctx context.Context, issuer Issuer, d cfg.Storage.Exists(ctx, metaKey) } +// storageHasCertResourcesBundle returns true if the storage +// associated with cfg's certificate cache has the +// certificate resource bundle for domain. +func (cfg *Config) storageHasCertResourcesBundle(ctx context.Context, issuer Issuer, domain string) bool { + issuerKey := issuer.IssuerKey() + certBundle := StorageKeys.SiteBundle(issuerKey, domain) + return cfg.Storage.Exists(ctx, certBundle) +} + // deleteSiteAssets deletes the folder in storage containing the // certificate, private key, and metadata file for domain from the // issuer with the given issuer key. +// It switches storage modes between legacy and bundle mode based on the CERTMAGIC_STORAGE_MODE env. func (cfg *Config) deleteSiteAssets(ctx context.Context, issuerKey, domain string) error { + switch os.Getenv(StorageModeEnv) { + case StorageModeTransition: + if err := cfg.deleteSiteAssetsBundle(ctx, issuerKey, domain); err != nil { + cfg.Logger.Warn("unable to delete certificate resource bundle", + zap.String("issuer", issuerKey), + zap.String("domain", domain), + zap.Error(err)) + } + return cfg.deleteSiteAssetsLegacy(ctx, issuerKey, domain) + case StorageModeBundle: + return cfg.deleteSiteAssetsBundle(ctx, issuerKey, domain) + default: + return cfg.deleteSiteAssetsLegacy(ctx, issuerKey, domain) + } +} + +// deleteSiteAssetsLegacy deletes the folder in storage containing the +// certificate, private key, and metadata file for domain from the +// issuer with the given issuer key. +func (cfg *Config) deleteSiteAssetsLegacy(ctx context.Context, issuerKey, domain string) error { err := cfg.Storage.Delete(ctx, StorageKeys.SiteCert(issuerKey, domain)) if err != nil { return fmt.Errorf("deleting certificate file: %v", err) @@ -1303,6 +1352,16 @@ func (cfg *Config) deleteSiteAssets(ctx context.Context, issuerKey, domain strin return nil } +// deleteSiteAssetsBundle deletes the folder in storage containing the +// certificate bundle for domain from the issuer with the given issuer key. +func (cfg *Config) deleteSiteAssetsBundle(ctx context.Context, issuerKey, domain string) error { + err := cfg.Storage.Delete(ctx, StorageKeys.SiteBundle(issuerKey, domain)) + if err != nil { + return fmt.Errorf("deleting certificate bundle: %v", err) + } + return nil +} + // lockKey returns a key for a lock that is specific to the operation // named op being performed related to domainName and this config's CA. func (cfg *Config) lockKey(op, domainName string) string { diff --git a/config_test.go b/config_test.go index 89a63224..0db73428 100644 --- a/config_test.go +++ b/config_test.go @@ -28,7 +28,7 @@ import ( ) func TestSaveCertResource(t *testing.T) { - ctx := context.Background() + ctx := t.Context() am := &ACMEIssuer{CA: "https://example.com/acme/directory"} testConfig := &Config{ @@ -154,3 +154,157 @@ func mustJSON(val any) []byte { } return result } + +// testStorageModeSetup creates a test config with the specified storage mode +func testStorageModeSetup(t *testing.T, mode, storagePath string) (*Config, *ACMEIssuer) { + t.Helper() + t.Setenv(StorageModeEnv, mode) + + am := &ACMEIssuer{CA: "https://example.com/acme/directory"} + cfg := &Config{ + Issuers: []Issuer{am}, + Storage: &FileStorage{Path: storagePath}, + Logger: defaultTestLogger, + certCache: new(Cache), + } + am.config = cfg + + t.Cleanup(func() { + os.RemoveAll(storagePath) + }) + + return cfg, am +} + +func makeCertResource(am *ACMEIssuer, domain string, useLegacyContent bool) CertificateResource { + return CertificateResource{ + SANs: []string{domain}, + PrivateKeyPEM: []byte("private key"), + CertificatePEM: []byte("certificate"), + IssuerData: mustJSON(acme.Certificate{URL: "https://example.com/cert"}), + issuerKey: am.IssuerKey(), + } +} + +func assertFileExists(t *testing.T, ctx context.Context, storage Storage, path string) { + t.Helper() + if !storage.Exists(ctx, path) { + t.Errorf("Expected file to exist at %s", path) + } +} + +func assertFileNotExists(t *testing.T, ctx context.Context, storage Storage, path string) { + t.Helper() + if storage.Exists(ctx, path) { + t.Errorf("Expected file NOT to exist at %s", path) + } +} + +func assertCertResourceContent(t *testing.T, loaded CertificateResource, expectedKey, expectedCert string) { + t.Helper() + if string(loaded.PrivateKeyPEM) != expectedKey { + t.Errorf("Private key mismatch: expected %q, got %q", expectedKey, string(loaded.PrivateKeyPEM)) + } + if string(loaded.CertificatePEM) != expectedCert { + t.Errorf("Certificate mismatch: expected %q, got %q", expectedCert, string(loaded.CertificatePEM)) + } +} + +func TestStorageModeLegacy(t *testing.T) { + ctx := t.Context() + cfg, am := testStorageModeSetup(t, StorageModeLegacy, "./_testdata_tmp_legacy") + + domain := "example.com" + cert := makeCertResource(am, domain, true) + + if err := cfg.saveCertResource(ctx, am, cert); err != nil { + t.Fatalf("Failed to save cert resource: %v", err) + } + + issuerKey := am.IssuerKey() + assertFileExists(t, ctx, cfg.Storage, StorageKeys.SitePrivateKey(issuerKey, domain)) + assertFileExists(t, ctx, cfg.Storage, StorageKeys.SiteCert(issuerKey, domain)) + assertFileExists(t, ctx, cfg.Storage, StorageKeys.SiteMeta(issuerKey, domain)) + assertFileNotExists(t, ctx, cfg.Storage, StorageKeys.SiteBundle(issuerKey, domain)) + + loaded, err := cfg.loadCertResource(ctx, am, domain) + if err != nil { + t.Fatalf("Failed to load cert resource: %v", err) + } + assertCertResourceContent(t, loaded, "private key", "certificate") +} + +func TestStorageModeBundle(t *testing.T) { + ctx := t.Context() + cfg, am := testStorageModeSetup(t, StorageModeBundle, "./_testdata_tmp_bundle") + + domain := "example.com" + cert := makeCertResource(am, domain, false) + + if err := cfg.saveCertResource(ctx, am, cert); err != nil { + t.Fatalf("Failed to save cert resource: %v", err) + } + + issuerKey := am.IssuerKey() + assertFileExists(t, ctx, cfg.Storage, StorageKeys.SiteBundle(issuerKey, domain)) + assertFileNotExists(t, ctx, cfg.Storage, StorageKeys.SitePrivateKey(issuerKey, domain)) + assertFileNotExists(t, ctx, cfg.Storage, StorageKeys.SiteCert(issuerKey, domain)) + assertFileNotExists(t, ctx, cfg.Storage, StorageKeys.SiteMeta(issuerKey, domain)) + + loaded, err := cfg.loadCertResource(ctx, am, domain) + if err != nil { + t.Fatalf("Failed to load cert resource: %v", err) + } + assertCertResourceContent(t, loaded, "private key", "certificate") +} + +func TestStorageModeTransition(t *testing.T) { + ctx := t.Context() + cfg, am := testStorageModeSetup(t, StorageModeTransition, "./_testdata_tmp_transition") + + domain := "example.com" + cert := makeCertResource(am, domain, false) + + if err := cfg.saveCertResource(ctx, am, cert); err != nil { + t.Fatalf("Failed to save cert resource: %v", err) + } + + // Verify BOTH legacy and bundle files exist + issuerKey := am.IssuerKey() + assertFileExists(t, ctx, cfg.Storage, StorageKeys.SitePrivateKey(issuerKey, domain)) + assertFileExists(t, ctx, cfg.Storage, StorageKeys.SiteCert(issuerKey, domain)) + assertFileExists(t, ctx, cfg.Storage, StorageKeys.SiteMeta(issuerKey, domain)) + assertFileExists(t, ctx, cfg.Storage, StorageKeys.SiteBundle(issuerKey, domain)) + + loaded, err := cfg.loadCertResource(ctx, am, domain) + if err != nil { + t.Fatalf("Failed to load cert resource: %v", err) + } + assertCertResourceContent(t, loaded, "private key", "certificate") +} + +func TestStorageModeTransitionFallback(t *testing.T) { + ctx := t.Context() + cfg, am := testStorageModeSetup(t, StorageModeTransition, "./_testdata_tmp_transition_fallback") + + domain := "example.com" + cert := makeCertResource(am, domain, true) + + // Save in legacy mode to simulate existing data + os.Setenv(StorageModeEnv, StorageModeLegacy) + if err := cfg.saveCertResource(ctx, am, cert); err != nil { + t.Fatalf("Failed to save cert in legacy mode: %v", err) + } + + issuerKey := am.IssuerKey() + assertFileExists(t, ctx, cfg.Storage, StorageKeys.SitePrivateKey(issuerKey, domain)) + assertFileNotExists(t, ctx, cfg.Storage, StorageKeys.SiteBundle(issuerKey, domain)) + + // Switch to transition mode and verify fallback to legacy works + os.Setenv(StorageModeEnv, StorageModeTransition) + loaded, err := cfg.loadCertResource(ctx, am, domain) + if err != nil { + t.Fatalf("Failed to load cert in transition mode with fallback: %v", err) + } + assertCertResourceContent(t, loaded, "private key", "certificate") +} diff --git a/crypto.go b/crypto.go index 9cbbb213..73cbc632 100644 --- a/crypto.go +++ b/crypto.go @@ -30,6 +30,7 @@ import ( "fmt" "hash/fnv" "io/fs" + "os" "sort" "strings" @@ -140,10 +141,29 @@ func fastHash(input []byte) string { return fmt.Sprintf("%x", h.Sum32()) } -// saveCertResource saves the certificate resource to disk. This +// saveCertResource saves the certificate resource to disk. +// It switches storage modes between legacy and bundle mode based on the CERTMAGIC_STORAGE_MODE env. +func (cfg *Config) saveCertResource(ctx context.Context, issuer Issuer, cert CertificateResource) error { + switch os.Getenv(StorageModeEnv) { + case StorageModeTransition: + if err := cfg.saveCertResourceBundle(ctx, issuer, cert); err != nil { + cfg.Logger.Warn("unable to store certificate resource bundle", + zap.String("issuer", issuer.IssuerKey()), + zap.String("domain", cert.NamesKey()), + zap.Error(err)) + } + return cfg.saveCertResourceLegacy(ctx, issuer, cert) + case StorageModeBundle: + return cfg.saveCertResourceBundle(ctx, issuer, cert) + default: + return cfg.saveCertResourceLegacy(ctx, issuer, cert) + } +} + +// saveCertResourceLegacy saves the certificate resource to disk. This // includes the certificate file itself, the private key, and the // metadata file. -func (cfg *Config) saveCertResource(ctx context.Context, issuer Issuer, cert CertificateResource) error { +func (cfg *Config) saveCertResourceLegacy(ctx context.Context, issuer Issuer, cert CertificateResource) error { metaBytes, err := json.MarshalIndent(cert, "", "\t") if err != nil { return fmt.Errorf("encoding certificate metadata: %v", err) @@ -170,6 +190,21 @@ func (cfg *Config) saveCertResource(ctx context.Context, issuer Issuer, cert Cer return storeTx(ctx, cfg.Storage, all) } +// saveCertResourceBundle saves the certificate resource as a bundle to disk. This +// includes the certificate, the private key, and the metadata. +func (cfg *Config) saveCertResourceBundle(ctx context.Context, issuer Issuer, cert CertificateResource) error { + encoded, err := encodeCertResource(cert) + if err != nil { + return fmt.Errorf("encoding certificate resource: %v", err) + } + + issuerKey := issuer.IssuerKey() + certKey := cert.NamesKey() + + key := StorageKeys.SiteBundle(issuerKey, certKey) + return cfg.Storage.Store(ctx, key, encoded) +} + // loadCertResourceAnyIssuer loads and returns the certificate resource from any // of the configured issuers. If multiple are found (e.g. if there are 3 issuers // configured, and all 3 have a resource matching certNamesKey), then the newest @@ -237,7 +272,24 @@ func (cfg *Config) loadCertResourceAnyIssuer(ctx context.Context, certNamesKey s } // loadCertResource loads a certificate resource from the given issuer's storage location. +// It switches storage modes between legacy and bundle mode based on the CERTMAGIC_STORAGE_MODE env. func (cfg *Config) loadCertResource(ctx context.Context, issuer Issuer, certNamesKey string) (CertificateResource, error) { + switch os.Getenv(StorageModeEnv) { + case StorageModeTransition: + certRes, err := cfg.loadCertResourceBundle(ctx, issuer, certNamesKey) + if err == nil { + return certRes, nil + } + return cfg.loadCertResourceLegacy(ctx, issuer, certNamesKey) + case StorageModeBundle: + return cfg.loadCertResourceBundle(ctx, issuer, certNamesKey) + default: + return cfg.loadCertResourceLegacy(ctx, issuer, certNamesKey) + } +} + +// loadCertResourceLegacy loads a certificate resource from the given issuer's storage location. +func (cfg *Config) loadCertResourceLegacy(ctx context.Context, issuer Issuer, certNamesKey string) (CertificateResource, error) { certRes := CertificateResource{issuerKey: issuer.IssuerKey()} // don't use the Lookup profile because we might be loading a wildcard cert which is rejected by the Lookup profile @@ -268,6 +320,63 @@ func (cfg *Config) loadCertResource(ctx context.Context, issuer Issuer, certName return certRes, nil } +// loadCertResourceBundle loads a certificate resource from the given issuer's storage location as bundle. +func (cfg *Config) loadCertResourceBundle(ctx context.Context, issuer Issuer, certNamesKey string) (CertificateResource, error) { + // don't use the Lookup profile because we might be loading a wildcard cert which is rejected by the Lookup profile + normalizedName, err := idna.ToASCII(certNamesKey) + if err != nil { + return CertificateResource{}, fmt.Errorf("converting '%s' to ASCII: %v", certNamesKey, err) + } + + key := StorageKeys.SiteBundle(issuer.IssuerKey(), normalizedName) + encoded, err := cfg.Storage.Load(ctx, key) + if err != nil { + return CertificateResource{}, err + } + + certRes, err := decodeCertResource(encoded) + if err != nil { + return CertificateResource{}, fmt.Errorf("decoding certificate metadata: %v", err) + } + certRes.issuerKey = issuer.IssuerKey() + + return certRes, nil +} + +type storedCertificate struct { + SANs []string `json:"sans,omitempty"` + CertificatePEM []byte `json:"certificate_pem,omitempty"` + PrivateKeyPEM []byte `json:"private_key_pem,omitempty"` + IssuerData json.RawMessage `json:"issuer_data,omitempty"` +} + +func encodeCertResource(cert CertificateResource) ([]byte, error) { + storedCert := storedCertificate{ + SANs: cert.SANs, + CertificatePEM: cert.CertificatePEM, + PrivateKeyPEM: cert.PrivateKeyPEM, + IssuerData: cert.IssuerData, + } + encoded, err := json.Marshal(storedCert) + if err != nil { + return nil, err + } + return encoded, nil +} + +func decodeCertResource(b []byte) (CertificateResource, error) { + var storedCert storedCertificate + if err := json.Unmarshal(b, &storedCert); err != nil { + return CertificateResource{}, err + } + return CertificateResource{ + SANs: storedCert.SANs, + CertificatePEM: storedCert.CertificatePEM, + PrivateKeyPEM: storedCert.PrivateKeyPEM, + IssuerData: storedCert.IssuerData, + }, nil +} + // hashCertificateChain computes the unique hash of certChain, // which is the chain of DER-encoded bytes. It returns the // hex encoding of the hash. diff --git a/maintain.go b/maintain.go index bda4a93f..0bd4c6f1 100644 --- a/maintain.go +++ b/maintain.go @@ -22,6 +22,7 @@ import ( "errors" "fmt" "io/fs" + "os" "path" "runtime" "strings" @@ -427,9 +428,26 @@ func (cfg *Config) storageHasNewerARI(ctx context.Context, cert Certificate) (bo return false, acme.RenewalInfo{}, nil } -// loadStoredACMECertificateMetadata loads the stored ACME certificate data -// from the cert's sidecar JSON file. +// loadStoredACMECertificateMetadata loads the stored ACME certificate data. +// It switches storage modes between legacy and bundle mode based on the CERTMAGIC_STORAGE_MODE env. func (cfg *Config) loadStoredACMECertificateMetadata(ctx context.Context, cert Certificate) (acme.Certificate, error) { + switch os.Getenv(StorageModeEnv) { + case StorageModeTransition: + acmecert, err := cfg.loadStoredACMECertificateMetadataBundle(ctx, cert) + if err == nil { + return acmecert, nil + } + return cfg.loadStoredACMECertificateMetadataLegacy(ctx, cert) + case StorageModeBundle: + return cfg.loadStoredACMECertificateMetadataBundle(ctx, cert) + default: + return cfg.loadStoredACMECertificateMetadataLegacy(ctx, cert) + } +} + +// loadStoredACMECertificateMetadataLegacy loads the stored ACME certificate data +// from the cert's sidecar JSON file. +func (cfg *Config) loadStoredACMECertificateMetadataLegacy(ctx context.Context, cert Certificate) (acme.Certificate, error) { metaBytes, err := cfg.Storage.Load(ctx, StorageKeys.SiteMeta(cert.issuerKey, cert.Names[0])) if err != nil { return acme.Certificate{}, fmt.Errorf("loading cert metadata: %w", err) @@ -448,6 +466,26 @@ func (cfg *Config) loadStoredACMECertificateMetadata(ctx context.Context, cert C return acmeCert, nil } +// loadStoredACMECertificateMetadataBundle loads the stored ACME certificate data from the cert bundle. +func (cfg *Config) loadStoredACMECertificateMetadataBundle(ctx context.Context, cert Certificate) (acme.Certificate, error) { + bundleBytes, err := cfg.Storage.Load(ctx, StorageKeys.SiteBundle(cert.issuerKey, cert.Names[0])) + if err != nil { + return acme.Certificate{}, fmt.Errorf("loading cert metadata: %w", err) + } + + certRes, err := decodeCertResource(bundleBytes) + if err != nil { + return acme.Certificate{}, fmt.Errorf("unmarshaling cert metadata: %w", err) + } + + var acmeCert acme.Certificate + if err = json.Unmarshal(certRes.IssuerData, &acmeCert); err != nil { + return acme.Certificate{}, fmt.Errorf("unmarshaling potential ACME issuer metadata: %v", err) + } + + return acmeCert, nil +} + // updateARI updates the cert's ACME renewal info, first by checking storage for a newer // one, or getting it from the CA if needed. The updated info is stored in storage and // updated in the cache. The certificate with the updated ARI is returned. If true is @@ -456,7 +494,63 @@ func (cfg *Config) loadStoredACMECertificateMetadata(ctx context.Context, cert C // // This will always try to ARI without checking if it needs to be refreshed. Call // NeedsRefresh() on the RenewalInfo first, and only call this if that returns true. +// It switches storage modes between legacy and bundle mode based on the CERTMAGIC_STORAGE_MODE env. func (cfg *Config) updateARI(ctx context.Context, cert Certificate, logger *zap.Logger) (updatedCert Certificate, changed bool, err error) { + switch os.Getenv(StorageModeEnv) { + case StorageModeTransition: + updatedCert, changed, err = cfg.updateARILegacy(ctx, cert, logger) + if err == nil { + // Also update bundle storage with the new ARI + if bundleErr := cfg.storeARIToBundle(ctx, updatedCert); bundleErr != nil { + cfg.Logger.Warn("unable to update ARI in bundle", + zap.Strings("identifiers", cert.Names), + zap.String("issuer", cert.issuerKey), + zap.Error(bundleErr)) + } + } + return updatedCert, changed, err + case StorageModeBundle: + return cfg.updateARIBundle(ctx, cert, logger) + default: + return cfg.updateARILegacy(ctx, cert, logger) + } +} + +// storeARIToBundle updates the ARI in the bundle storage without fetching from CA. +// Note: This function only exists for transition mode to minimize CA requests. +// In transition mode, we use updateARILegacy as the source of truth (which fetches +// from CA if needed), then call this function to also update the bundle storage. +func (cfg *Config) storeARIToBundle(ctx context.Context, cert Certificate) error { + bundleBytes, err := cfg.Storage.Load(ctx, StorageKeys.SiteBundle(cert.issuerKey, cert.Names[0])) + if err != nil { + return fmt.Errorf("loading certificate bundle: %v", err) + } + certRes, err := decodeCertResource(bundleBytes) + if err != nil { + return fmt.Errorf("decoding certificate bundle: %v", err) + } + var certData acme.Certificate + if err = json.Unmarshal(certRes.IssuerData, &certData); err != nil { + return fmt.Errorf("unmarshaling ACME issuer metadata: %v", err) + } + certData.RenewalInfo = &cert.ari + certDataBytes, err := json.Marshal(certData) + if err != nil { + return fmt.Errorf("marshaling certificate ACME metadata: %v", err) + } + certRes.IssuerData = certDataBytes + encoded, err := encodeCertResource(certRes) + if err != nil { + return fmt.Errorf("encoding certificate bundle: %v", err) + } + if err = cfg.Storage.Store(ctx, StorageKeys.SiteBundle(cert.issuerKey, cert.Names[0]), encoded); err != nil { + return fmt.Errorf("storing certificate bundle: %v", err) + } + return nil +} + +// updateARILegacy updates the cert's ACME renewal info using the legacy storage format. +func (cfg *Config) updateARILegacy(ctx context.Context, cert Certificate, logger *zap.Logger) (updatedCert Certificate, changed bool, err error) { logger = logger.With( zap.Strings("identifiers", cert.Names), zap.String("cert_hash", cert.hash), @@ -585,7 +679,7 @@ func (cfg *Config) updateARI(ctx context.Context, cert Certificate, logger *zap. // update the ARI value in storage var certData acme.Certificate - certData, err = cfg.loadStoredACMECertificateMetadata(ctx, cert) + certData, err = cfg.loadStoredACMECertificateMetadataLegacy(ctx, cert) if err != nil { err = fmt.Errorf("got new ARI from %s, but failed loading stored certificate metadata: %v", iss.IssuerKey(), err) return @@ -623,6 +717,184 @@ func (cfg *Config) updateARI(ctx context.Context, cert Certificate, logger *zap. return } +// updateARIBundle updates the cert's ACME renewal info using the bundle storage format. +func (cfg *Config) updateARIBundle(ctx context.Context, cert Certificate, logger *zap.Logger) (updatedCert Certificate, changed bool, err error) { + logger = logger.With( + zap.Strings("identifiers", cert.Names), + zap.String("cert_hash", cert.hash), + zap.String("ari_unique_id", cert.ari.UniqueIdentifier), + zap.Time("cert_expiry", cert.Leaf.NotAfter)) + + updatedCert = cert + oldARI := cert.ari + + // synchronize ARI fetching; see #297 + lockName := "ari_" + cert.ari.UniqueIdentifier + if _, ok := cfg.Storage.(TryLocker); ok { + ok, err := tryAcquireLock(ctx, cfg.Storage, lockName) + if err != nil { + return cert, false, fmt.Errorf("unable to obtain ARI lock: %v", err) + } + if !ok { + logger.Debug("attempted to obtain ARI lock but it was already taken") + return cert, false, nil + } + } else if err := acquireLock(ctx, cfg.Storage, lockName); err != nil { + return cert, false, fmt.Errorf("unable to obtain ARI lock: %v", err) + } + defer func() { + if err := releaseLock(ctx, cfg.Storage, lockName); err != nil { + logger.Error("unable to release ARI lock", zap.Error(err)) + } + }() + + // see if the stored value has been refreshed already by another instance + gotNewARI, newARI, err := cfg.storageHasNewerARI(ctx, cert) + + // when we're all done, log if something about the schedule is different + // ("WARN" level because ARI window changing may be a sign of external trouble + // and we want to draw their attention to a potential explanation URL) + defer func() { + changed = !newARI.SameWindow(oldARI) + + if changed { + logger.Warn("ARI window or selected renewal time changed", + zap.Time("prev_start", oldARI.SuggestedWindow.Start), + zap.Time("next_start", newARI.SuggestedWindow.Start), + zap.Time("prev_end", oldARI.SuggestedWindow.End), + zap.Time("next_end", newARI.SuggestedWindow.End), + zap.Time("prev_selected_time", oldARI.SelectedTime), + zap.Time("next_selected_time", newARI.SelectedTime), + zap.String("explanation_url", newARI.ExplanationURL)) + } + }() + + if err == nil && gotNewARI { + // great, storage has a newer one we can use + cfg.certCache.mu.Lock() + var ok bool + updatedCert, ok = cfg.certCache.cache[cert.hash] + if !ok { + // cert is no longer in the cache... why? what's the right thing to do here? + cfg.certCache.mu.Unlock() + updatedCert = cert // return input cert, not an empty one + updatedCert.ari = newARI // might as well give it the new ARI for the benefit of our caller, but it won't be updated in the cache or in storage + logger.Warn("loaded newer ARI from storage, but certificate is no longer in cache; newer ARI will be returned to caller, but not persisted in the cache", + zap.Time("selected_time", newARI.SelectedTime), + zap.Timep("next_update", newARI.RetryAfter), + zap.String("explanation_url", newARI.ExplanationURL)) + return + } + updatedCert.ari = newARI + cfg.certCache.cache[cert.hash] = updatedCert + cfg.certCache.mu.Unlock() + logger.Info("reloaded ARI with newer one in storage", + zap.Timep("next_refresh", newARI.RetryAfter), + zap.Time("renewal_time", newARI.SelectedTime)) + return + } + + if err != nil { + logger.Error("error while checking storage for updated ARI; updating ARI now", zap.Error(err)) + } + + // of the issuers configured, hopefully one of them is the ACME CA we got the cert from + for _, iss := range cfg.Issuers { + if ariGetter, ok := iss.(RenewalInfoGetter); ok && iss.IssuerKey() == cert.issuerKey { + newARI, err = ariGetter.GetRenewalInfo(ctx, cert) // be sure to use existing newARI variable so we can compare against old value in the defer + if err != nil { + // could be anything, but a common error might simply be the "wrong" ACME CA + // (meaning, different from the one that issued the cert, thus the only one + // that would have any ARI for it) if multiple ACME CAs are configured + logger.Error("failed updating renewal info from ACME CA", + zap.String("issuer", iss.IssuerKey()), + zap.Error(err)) + continue + } + + // when we get the latest ARI, the acme package will select a time within the window + // for us; of course, since it's random, it's likely different from the previously- + // selected time; but if the window doesn't change, there's no need to change the + // selected time (the acme package doesn't know the previous window to know better) + // ... so if the window hasn't changed we'll just put back the selected time + if newARI.SameWindow(oldARI) && !oldARI.SelectedTime.IsZero() { + newARI.SelectedTime = oldARI.SelectedTime + } + + // then store the updated ARI (even if the window didn't change, the Retry-After + // likely did) in cache and storage + + // be sure we get the cert from the cache while inside a lock to avoid logical races + cfg.certCache.mu.Lock() + updatedCert, ok = cfg.certCache.cache[cert.hash] + if !ok { + // cert is no longer in the cache; this can happen for several reasons (past expiration, + // rejected by on-demand permission module, random eviction due to full cache, etc), but + // it probably means we don't have use of this ARI update now, so while we can return it + // to the caller, we don't persist it anywhere beyond that... + cfg.certCache.mu.Unlock() + updatedCert = cert // return input cert, not an empty one + updatedCert.ari = newARI // might as well give it the new ARI for the benefit of our caller, but it won't be updated in the cache or in storage + logger.Warn("obtained ARI update, but certificate no longer in cache; ARI update will be returned to caller, but not stored", + zap.Time("selected_time", newARI.SelectedTime), + zap.Timep("next_update", newARI.RetryAfter), + zap.String("explanation_url", newARI.ExplanationURL)) + return + } + updatedCert.ari = newARI + cfg.certCache.cache[cert.hash] = updatedCert + cfg.certCache.mu.Unlock() + + // update the ARI value in storage + var bundleBytes []byte + bundleBytes, err = cfg.Storage.Load(ctx, StorageKeys.SiteBundle(cert.issuerKey, cert.Names[0])) + if err != nil { + err = fmt.Errorf("got new ARI from %s, but failed loading certificate bundle: %v", iss.IssuerKey(), err) + return + } + var certRes CertificateResource + certRes, err = decodeCertResource(bundleBytes) + if err != nil { + err = fmt.Errorf("got new ARI from %s, but failed decoding certificate bundle: %v", iss.IssuerKey(), err) + return + } + var certData acme.Certificate + if err = json.Unmarshal(certRes.IssuerData, &certData); err != nil { + err = fmt.Errorf("got new ARI from %s, but failed unmarshaling ACME issuer metadata: %v", iss.IssuerKey(), err) + return + } + certData.RenewalInfo = &newARI + var certDataBytes []byte + certDataBytes, err = json.Marshal(certData) + if err != nil { + err = fmt.Errorf("got new ARI from %s, but failed marshaling certificate ACME metadata: %v", iss.IssuerKey(), err) + return + } + certRes.IssuerData = certDataBytes + var encoded []byte + encoded, err = encodeCertResource(certRes) + if err != nil { + err = fmt.Errorf("got new ARI from %s, but could not re-encode certificate bundle: %v", iss.IssuerKey(), err) + return + } + if err = cfg.Storage.Store(ctx, StorageKeys.SiteBundle(cert.issuerKey, cert.Names[0]), encoded); err != nil { + err = fmt.Errorf("got new ARI from %s, but could not store it with certificate bundle: %v", iss.IssuerKey(), err) + return + } + + logger.Info("updated and stored ACME renewal information", + zap.Time("selected_time", newARI.SelectedTime), + zap.Timep("next_update", newARI.RetryAfter), + zap.String("explanation_url", newARI.ExplanationURL)) + + return + } + } + + err = fmt.Errorf("could not fully update ACME renewal info: either no issuer supporting ARI is configured for certificate, or all such failed (make sure the ACME CA that issued the certificate is configured)") + return +} + // CleanStorageOptions specifies how to clean up a storage unit. type CleanStorageOptions struct { // Optional custom logger. @@ -774,6 +1046,21 @@ func deleteOldOCSPStaples(ctx context.Context, storage Storage, logger *zap.Logg } func deleteExpiredCerts(ctx context.Context, storage Storage, logger *zap.Logger, gracePeriod time.Duration) error { + switch os.Getenv(StorageModeEnv) { + case StorageModeTransition: + if err := deleteExpiredCertsBundle(ctx, storage, logger, gracePeriod); err != nil { + logger.Warn("unable to delete expired certs from bundle", + zap.Error(err)) + } + return deleteExpiredCertsLegacy(ctx, storage, logger, gracePeriod) + case StorageModeBundle: + return deleteExpiredCertsBundle(ctx, storage, logger, gracePeriod) + default: + return deleteExpiredCertsLegacy(ctx, storage, logger, gracePeriod) + } +} + +func deleteExpiredCertsLegacy(ctx context.Context, storage Storage, logger *zap.Logger, gracePeriod time.Duration) error { issuerKeys, err := storage.List(ctx, prefixCerts, false) if err != nil { // maybe just hasn't been created yet; no big deal @@ -859,6 +1146,88 @@ func deleteExpiredCerts(ctx context.Context, storage Storage, logger *zap.Logger return nil } +func deleteExpiredCertsBundle(ctx context.Context, storage Storage, logger *zap.Logger, gracePeriod time.Duration) error { + issuerKeys, err := storage.List(ctx, prefixCerts, false) + if err != nil { + // maybe just hasn't been created yet; no big deal + return nil + } + + for _, issuerKey := range issuerKeys { + siteKeys, err := storage.List(ctx, issuerKey, false) + if err != nil { + logger.Error("listing contents", zap.String("issuer_key", issuerKey), zap.Error(err)) + continue + } + + for _, siteKey := range siteKeys { + // if context was cancelled, quit early; otherwise proceed + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + + siteAssets, err := storage.List(ctx, siteKey, false) + if err != nil { + logger.Error("listing site contents", zap.String("site_key", siteKey), zap.Error(err)) + continue + } + + for _, assetKey := range siteAssets { + if path.Ext(assetKey) != ".bundle" { + continue + } + + bundleFile, err := storage.Load(ctx, assetKey) + if err != nil { + return fmt.Errorf("loading certificate bundle %s: %v", assetKey, err) + } + certRes, err := decodeCertResource(bundleFile) + if err != nil { + return fmt.Errorf("decoding certificate bundle %s: %v", assetKey, err) + } + block, _ := pem.Decode(certRes.CertificatePEM) + if block == nil || block.Type != "CERTIFICATE" { + return fmt.Errorf("certificate bundle %s does not contain PEM-encoded certificate", assetKey) + } + cert, err := x509.ParseCertificate(block.Bytes) + if err != nil { + return fmt.Errorf("certificate bundle %s is malformed; error parsing PEM: %v", assetKey, err) + } + + if expiredTime := time.Since(expiresAt(cert)); expiredTime >= gracePeriod { + logger.Info("certificate expired beyond grace period; cleaning up", + zap.String("asset_key", assetKey), + zap.Duration("expired_for", expiredTime), + zap.Duration("grace_period", gracePeriod)) + logger.Info("deleting asset because resource expired", zap.String("asset_key", assetKey)) + err := storage.Delete(ctx, assetKey) + if err != nil { + logger.Error("could not clean up expired certificate bundle", + zap.String("asset_key", assetKey), + zap.Error(err)) + } + } + } + + // update listing; if folder is empty, delete it + siteAssets, err = storage.List(ctx, siteKey, false) + if err != nil { + continue + } + if len(siteAssets) == 0 { + logger.Info("deleting site folder because key is empty", zap.String("site_key", siteKey)) + err := storage.Delete(ctx, siteKey) + if err != nil { + return fmt.Errorf("deleting empty site folder %s: %v", siteKey, err) + } + } + } + } + return nil +} + // forceRenew forcefully renews cert and replaces it in the cache, and returns the new certificate. It is intended // for use primarily in the case of cert revocation. This MUST NOT be called within a lock on cfg.certCacheMu. func (cfg *Config) forceRenew(ctx context.Context, logger *zap.Logger, cert Certificate) (Certificate, error) { @@ -921,24 +1290,45 @@ func (cfg *Config) forceRenew(ctx context.Context, logger *zap.Logger, cert Cert // moveCompromisedPrivateKey moves the private key for cert to a ".compromised" file // by copying the data to the new file, then deleting the old one. func (cfg *Config) moveCompromisedPrivateKey(ctx context.Context, cert Certificate, logger *zap.Logger) error { - privKeyStorageKey := StorageKeys.SitePrivateKey(cert.issuerKey, cert.Names[0]) + // find the issuer that matches the cert's issuer key + var issuer Issuer + for _, iss := range cfg.Issuers { + if iss.IssuerKey() == cert.issuerKey { + issuer = iss + break + } + } + if issuer == nil { + return fmt.Errorf("no configured issuer matches certificate's issuer key: %s", cert.issuerKey) + } - privKeyPEM, err := cfg.Storage.Load(ctx, privKeyStorageKey) + // load cert resource to get private key (handles both legacy and bundle storage modes) + certRes, err := cfg.loadCertResource(ctx, issuer, cert.Names[0]) if err != nil { return err } - compromisedPrivKeyStorageKey := privKeyStorageKey + ".compromised" - err = cfg.Storage.Store(ctx, compromisedPrivKeyStorageKey, privKeyPEM) + // store the compromised key for audit purposes + compromisedPrivKeyStorageKey := StorageKeys.SitePrivateKey(cert.issuerKey, cert.Names[0]) + ".compromised" + err = cfg.Storage.Store(ctx, compromisedPrivKeyStorageKey, certRes.PrivateKeyPEM) if err != nil { - // better safe than sorry: as a last resort, try deleting the key so it won't be reused - cfg.Storage.Delete(ctx, privKeyStorageKey) return err } - err = cfg.Storage.Delete(ctx, privKeyStorageKey) - if err != nil { - return err + privKeyStorageKey := StorageKeys.SitePrivateKey(cert.issuerKey, cert.Names[0]) + bundleKey := StorageKeys.SiteBundle(cert.issuerKey, cert.Names[0]) + + // Delete the storage containing the compromised key based on storage mode. + // We intentionally ignore delete errors since the file might not exist, + // and we avoid calling .Exists() before .Delete() to minimize storage roundtrips. + switch os.Getenv(StorageModeEnv) { + case StorageModeTransition: + cfg.Storage.Delete(ctx, bundleKey) + cfg.Storage.Delete(ctx, privKeyStorageKey) + case StorageModeBundle: + cfg.Storage.Delete(ctx, bundleKey) + default: + cfg.Storage.Delete(ctx, privKeyStorageKey) } logger.Info("removed certificate's compromised private key from use", diff --git a/storage.go b/storage.go index a4ae1fc6..03400d0a 100644 --- a/storage.go +++ b/storage.go @@ -250,6 +250,14 @@ func (keys KeyBuilder) SiteMeta(issuerKey, domain string) string { return path.Join(keys.CertsSitePrefix(issuerKey, domain), safeDomain+".json") } +// SiteBundle returns the path to the resource file for domain that +// is associated with the certificate from the given issuer with +// the given issuerKey. +func (keys KeyBuilder) SiteBundle(issuerKey, domain string) string { + safeDomain := keys.Safe(domain) + return path.Join(keys.CertsSitePrefix(issuerKey, domain), safeDomain+".bundle") +} + // OCSPStaple returns a key for the OCSP staple associated // with the given certificate. If you have the PEM bundle // handy, pass that in to save an extra encoding step. @@ -342,8 +350,10 @@ func releaseLock(ctx context.Context, storage Storage, lockKey string) error { // locks stores a reference to all the current // locks obtained by this process. -var locks = make(map[string]Storage) -var locksMu sync.Mutex +var ( + locks = make(map[string]Storage) + locksMu sync.Mutex +) // StorageKeys provides methods for accessing // keys and key prefixes for items in a Storage.