From 10ff8754373689c4ae980039ba5b07c71c5c381b Mon Sep 17 00:00:00 2001 From: aswanidutt Date: Wed, 11 Feb 2026 15:49:25 -0600 Subject: [PATCH 01/14] Fix rotating secret with TTL=0 to use lease duration - When a rotating secret has rotation_period but ttl=0, it should not be treated as a rotating secret - Instead, it should fall back to using the original LeaseDuration with non-renewable lease logic (80-95% threshold) - This prevents the bug where sleep time would be 0 seconds Changes: - vault_common.go: Only set rotatingSecret=true when ttl > 0 - vault_common_test.go: Add test case for rotating secret with ttl=0 Fixes: VAULT-38829 --- dependency/vault_common.go | 11 +++++++---- dependency/vault_common_test.go | 16 ++++++++++++++++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/dependency/vault_common.go b/dependency/vault_common.go index b28a98214..b5064be59 100644 --- a/dependency/vault_common.go +++ b/dependency/vault_common.go @@ -144,10 +144,13 @@ func leaseCheckWait(s *Secret) time.Duration { if _, ok := s.Data["rotation_period"]; ok && s.LeaseID == "" { if ttlInterface, ok := s.Data["ttl"]; ok { if ttlData, err := ttlInterface.(json.Number).Int64(); err == nil { - log.Printf("[DEBUG] Found rotation_period and set lease duration to %d seconds", ttlData) - // Add a second for cushion - base = int(ttlData) + 1 - rotatingSecret = true + // FIX: When TTL is 0, don't treat as rotating secret so it uses the lease duration + if ttlData > 0 { + log.Printf("[DEBUG] Found rotation_period and set lease duration to %d seconds", ttlData) + // Add a second for cushion + base = int(ttlData) + 1 + rotatingSecret = true + } } } } diff --git a/dependency/vault_common_test.go b/dependency/vault_common_test.go index 910fe9214..e9279c071 100644 --- a/dependency/vault_common_test.go +++ b/dependency/vault_common_test.go @@ -57,6 +57,22 @@ func TestVaultRenewDuration(t *testing.T) { t.Fatalf("renewable duration is not 6: %f", nonRenewableRotatedDur) } + // Test rotating secret with TTL=0 should not be treated as rotating + // and should use the original LeaseDuration with non-renewable lease logic + data = map[string]interface{}{ + "rotation_period": json.Number("60"), + "ttl": json.Number("0"), + } + + nonRenewableRotatedZeroTTL := Secret{LeaseDuration: 100, Data: data} + nonRenewableRotatedZeroTTLDur := leaseCheckWait(&nonRenewableRotatedZeroTTL).Seconds() + + // When TTL is 0, it should NOT be treated as a rotating secret, + // so it uses the LeaseDuration (100) with the non-renewable threshold (80-95%) + if nonRenewableRotatedZeroTTLDur < 80 || nonRenewableRotatedZeroTTLDur > 95 { + t.Fatalf("rotating secret with TTL=0 duration is not within 80%% to 95%% of lease duration: %f", nonRenewableRotatedZeroTTLDur) + } + rawExpiration := time.Now().Unix() + 100 expiration := strconv.FormatInt(rawExpiration, 10) From b0fc9494e407da122e031bb117162730c7d8faed Mon Sep 17 00:00:00 2001 From: "aswanidutt.gutta@ibm.com" Date: Thu, 12 Feb 2026 14:41:40 -0600 Subject: [PATCH 02/14] adding exponential back off retry for ttl 0 --- dependency/vault_common.go | 31 +++++++++++++++++++++++++---- dependency/vault_common_test.go | 35 +++++++++++++++++++-------------- dependency/vault_read.go | 11 +++++++++-- dependency/vault_write.go | 11 +++++++++-- 4 files changed, 65 insertions(+), 23 deletions(-) diff --git a/dependency/vault_common.go b/dependency/vault_common.go index b5064be59..6b425440a 100644 --- a/dependency/vault_common.go +++ b/dependency/vault_common.go @@ -22,6 +22,8 @@ var ( onceVaultLeaseRenewalThreshold sync.Once ) +const maxJitter = 0.1 + // Secret is the structure returned for every secret within Vault. type Secret struct { // The request ID that generated this response @@ -109,9 +111,9 @@ func renewSecret(clients *ClientSet, d renewer) error { } } -// leaseCheckWait accepts a secret and returns the recommended amount of -// time to sleep. -func leaseCheckWait(s *Secret) time.Duration { +// leaseCheckWait accepts a secret and retry count, returns the recommended amount of +// time to sleep. For rotating secrets with ttl=0, implements exponential backoff. +func leaseCheckWait(s *Secret, retryCount int) time.Duration { // Handle whether this is an auth or a regular secret. base := s.LeaseDuration if s.Auth != nil && s.Auth.LeaseDuration > 0 { @@ -144,12 +146,26 @@ func leaseCheckWait(s *Secret) time.Duration { if _, ok := s.Data["rotation_period"]; ok && s.LeaseID == "" { if ttlInterface, ok := s.Data["ttl"]; ok { if ttlData, err := ttlInterface.(json.Number).Int64(); err == nil { - // FIX: When TTL is 0, don't treat as rotating secret so it uses the lease duration if ttlData > 0 { log.Printf("[DEBUG] Found rotation_period and set lease duration to %d seconds", ttlData) // Add a second for cushion base = int(ttlData) + 1 rotatingSecret = true + } else if ttlData == 0 { + // FIX: When TTL is 0, implement exponential backoff with jitter + // Start at 5s, double each time, max is VaultDefaultLeaseDuration or 300s + backoff := 5 * (1 << uint(retryCount)) // 5s, 10s, 20s, 40s, 80s... + maxBackoff := int(VaultDefaultLeaseDuration.Seconds()) + if maxBackoff <= 0 { + maxBackoff = 300 // Default max of 300 seconds if not configured + } + if backoff > maxBackoff { + backoff = maxBackoff + } + backoffDur := time.Duration(backoff) * time.Second + backoffWithJitter := jitter(backoffDur) + log.Printf("[DEBUG] Found rotation_period with ttl=0, using exponential backoff: %s with jitter %s (retry %d)", backoffDur, backoffWithJitter, retryCount) + return backoffWithJitter } } } @@ -192,6 +208,13 @@ func leaseCheckWait(s *Secret) time.Duration { return time.Duration(sleep) } +// jitter adds randomness to a duration to prevent thundering herd. +// It reduces the duration by up to maxJitter (10%) randomly. +func jitter(t time.Duration) time.Duration { + f := float64(t) * (1.0 - maxJitter*rand.Float64()) + return time.Duration(f) +} + // printVaultWarnings prints warnings for a given dependency. func printVaultWarnings(d Dependency, warnings []string) { for _, w := range warnings { diff --git a/dependency/vault_common_test.go b/dependency/vault_common_test.go index e9279c071..882e7007b 100644 --- a/dependency/vault_common_test.go +++ b/dependency/vault_common_test.go @@ -20,13 +20,13 @@ func init() { func TestVaultRenewDuration(t *testing.T) { renewable := Secret{LeaseDuration: 100, Renewable: true} - renewableDur := leaseCheckWait(&renewable).Seconds() + renewableDur := leaseCheckWait(&renewable, 0).Seconds() if renewableDur < 16 || renewableDur >= 34 { t.Fatalf("renewable duration is not within 1/6 to 1/3 of lease duration: %f", renewableDur) } nonRenewable := Secret{LeaseDuration: 100} - nonRenewableDur := leaseCheckWait(&nonRenewable).Seconds() + nonRenewableDur := leaseCheckWait(&nonRenewable, 0).Seconds() if nonRenewableDur < 80 || nonRenewableDur > 95 { t.Fatalf("renewable duration is not within 80%% to 95%% of lease duration: %f", nonRenewableDur) } @@ -37,7 +37,7 @@ func TestVaultRenewDuration(t *testing.T) { } nonRenewableRotated := Secret{LeaseDuration: 100, Data: data} - nonRenewableRotatedDur := leaseCheckWait(&nonRenewableRotated).Seconds() + nonRenewableRotatedDur := leaseCheckWait(&nonRenewableRotated, 0).Seconds() // We expect a 1 second cushion if nonRenewableRotatedDur != 31 { @@ -50,27 +50,32 @@ func TestVaultRenewDuration(t *testing.T) { } nonRenewableRotated = Secret{LeaseDuration: 100, Data: data} - nonRenewableRotatedDur = leaseCheckWait(&nonRenewableRotated).Seconds() + nonRenewableRotatedDur = leaseCheckWait(&nonRenewableRotated, 0).Seconds() // We expect a 1 second cushion if nonRenewableRotatedDur != 6 { t.Fatalf("renewable duration is not 6: %f", nonRenewableRotatedDur) } - // Test rotating secret with TTL=0 should not be treated as rotating - // and should use the original LeaseDuration with non-renewable lease logic + // Test rotating secret with TTL=0 uses exponential backoff data = map[string]interface{}{ "rotation_period": json.Number("60"), "ttl": json.Number("0"), } nonRenewableRotatedZeroTTL := Secret{LeaseDuration: 100, Data: data} - nonRenewableRotatedZeroTTLDur := leaseCheckWait(&nonRenewableRotatedZeroTTL).Seconds() - // When TTL is 0, it should NOT be treated as a rotating secret, - // so it uses the LeaseDuration (100) with the non-renewable threshold (80-95%) - if nonRenewableRotatedZeroTTLDur < 80 || nonRenewableRotatedZeroTTLDur > 95 { - t.Fatalf("rotating secret with TTL=0 duration is not within 80%% to 95%% of lease duration: %f", nonRenewableRotatedZeroTTLDur) + // Test exponential backoff with jitter: 5s, 10s, 20s, 40s, 80s, 160s (max 300s) + // Jitter reduces by up to 10%, so we check the range + expectedBackoffs := []float64{5, 10, 20, 40, 80, 160} + for i, expected := range expectedBackoffs { + dur := leaseCheckWait(&nonRenewableRotatedZeroTTL, i).Seconds() + // With jitter, duration should be between 90% and 100% of expected + minExpected := expected * 0.9 + maxExpected := expected + if dur < minExpected || dur > maxExpected { + t.Fatalf("retry %d: expected between %f and %f seconds, got %f", i, minExpected, maxExpected, dur) + } } rawExpiration := time.Now().Unix() + 100 @@ -82,7 +87,7 @@ func TestVaultRenewDuration(t *testing.T) { } nonRenewableCert := Secret{LeaseDuration: 100, Data: data} - nonRenewableCertDur := leaseCheckWait(&nonRenewableCert).Seconds() + nonRenewableCertDur := leaseCheckWait(&nonRenewableCert, 0).Seconds() if nonRenewableCertDur < 80 || nonRenewableCertDur > 95 { t.Fatalf("non renewable certificate duration is not within 80%% to 95%%: %f", nonRenewableCertDur) } @@ -96,7 +101,7 @@ func TestVaultRenewDuration(t *testing.T) { } nonRenewableSecretID := Secret{LeaseDuration: 100, Data: data} - nonRenewableSecretIDDur := leaseCheckWait(&nonRenewableSecretID).Seconds() + nonRenewableSecretIDDur := leaseCheckWait(&nonRenewableSecretID, 0).Seconds() if nonRenewableSecretIDDur < 0.80*(60+1) || nonRenewableSecretIDDur > 0.95*(60+1) { t.Fatalf("renewable duration is not within 80%% to 95%% of lease duration: %f", nonRenewableSecretIDDur) @@ -112,7 +117,7 @@ func TestVaultRenewDuration(t *testing.T) { } nonRenewableSecretID := Secret{LeaseDuration: leaseDuration, Data: data} - nonRenewableSecretIDDur := leaseCheckWait(&nonRenewableSecretID).Seconds() + nonRenewableSecretIDDur := leaseCheckWait(&nonRenewableSecretID, 0).Seconds() if nonRenewableSecretIDDur < 0.80*(leaseDuration+1) || nonRenewableSecretIDDur > 0.95*(leaseDuration+1) { t.Fatalf("renewable duration is not within 80%% to 95%% of lease duration: %f", nonRenewableSecretIDDur) @@ -127,7 +132,7 @@ func TestVaultRenewDuration(t *testing.T) { } nonRenewableSecretID := Secret{LeaseDuration: leaseDuration, Data: data} - nonRenewableSecretIDDur := leaseCheckWait(&nonRenewableSecretID).Seconds() + nonRenewableSecretIDDur := leaseCheckWait(&nonRenewableSecretID, 0).Seconds() if nonRenewableSecretIDDur < 0.80*(leaseDuration+1) || nonRenewableSecretIDDur > 0.95*(leaseDuration+1) { t.Fatalf("renewable duration is not within 80%% to 95%% of lease duration: %f", nonRenewableSecretIDDur) diff --git a/dependency/vault_read.go b/dependency/vault_read.go index a9b0a9d8f..84d1bf34e 100644 --- a/dependency/vault_read.go +++ b/dependency/vault_read.go @@ -31,6 +31,9 @@ type VaultReadQuery struct { // vaultSecret is the actual Vault secret which we are renewing vaultSecret *api.Secret + + // retryCount tracks the number of retries for exponential backoff + retryCount int } // NewVaultReadQuery creates a new datacenter dependency. @@ -88,9 +91,13 @@ func (d *VaultReadQuery) Fetch(clients *ClientSet, opts *QueryOptions, } if !vaultSecretRenewable(d.secret) { - dur := leaseCheckWait(d.secret) - log.Printf("[TRACE] %s: non-renewable secret, set sleep for %s", d, dur) + dur := leaseCheckWait(d.secret, d.retryCount) + log.Printf("[TRACE] %s: non-renewable secret, set sleep for %s (retry %d)", d, dur, d.retryCount) + d.retryCount++ d.sleepCh <- dur + } else { + // Reset retry count on successful renewable secret + d.retryCount = 0 } return respWithMetadata(d.secret) diff --git a/dependency/vault_write.go b/dependency/vault_write.go index 2232d1c79..1a872ac8c 100644 --- a/dependency/vault_write.go +++ b/dependency/vault_write.go @@ -32,6 +32,9 @@ type VaultWriteQuery struct { // vaultSecret is the actual Vault secret which we are renewing vaultSecret *api.Secret + + // retryCount tracks the number of retries for exponential backoff + retryCount int } // NewVaultWriteQuery creates a new datacenter dependency. @@ -91,9 +94,13 @@ func (d *VaultWriteQuery) Fetch(clients *ClientSet, opts *QueryOptions, d.secret = transformSecret(vaultSecret) if !vaultSecretRenewable(d.secret) { - dur := leaseCheckWait(d.secret) - log.Printf("[TRACE] %s: non-renewable secret, set sleep for %s", d, dur) + dur := leaseCheckWait(d.secret, d.retryCount) + log.Printf("[TRACE] %s: non-renewable secret, set sleep for %s (retry %d)", d, dur, d.retryCount) + d.retryCount++ d.sleepCh <- dur + } else { + // Reset retry count on successful renewable secret + d.retryCount = 0 } return respWithMetadata(d.secret) From 0b584b781a657073ebba363721b39636eac3c5c3 Mon Sep 17 00:00:00 2001 From: "aswanidutt.gutta@ibm.com" Date: Thu, 12 Feb 2026 14:56:05 -0600 Subject: [PATCH 03/14] fixing CWE-338 Use of Cryptographically Secure Pseudo-Random Number --- dependency/vault_common.go | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/dependency/vault_common.go b/dependency/vault_common.go index 6b425440a..0dcc37258 100644 --- a/dependency/vault_common.go +++ b/dependency/vault_common.go @@ -4,10 +4,12 @@ package dependency import ( + cryptorand "crypto/rand" "encoding/json" "fmt" "log" - "math/rand" + "math/big" + mathrand "math/rand" "sync" "time" @@ -185,7 +187,7 @@ func leaseCheckWait(s *Secret, retryCount int) time.Duration { sleep = sleep / 3.0 // Use some randomness so many clients do not hit Vault simultaneously. - sleep = sleep * (rand.Float64() + 1) / 2.0 + sleep = sleep * (mathrand.Float64() + 1) / 2.0 } else if !rotatingSecret { // If the secret doesn't have a rotation period, this is a non-renewable leased // secret. @@ -193,7 +195,7 @@ func leaseCheckWait(s *Secret, retryCount int) time.Duration { // lease as possible. Use a stagger over the configured threshold // fraction of the lease duration so that many clients do not hit // Vault simultaneously. - finalFraction := VaultLeaseRenewalThreshold + (rand.Float64()-0.5)*0.1 + finalFraction := VaultLeaseRenewalThreshold + (mathrand.Float64()-0.5)*0.1 if finalFraction >= 1.0 || finalFraction <= 0.0 { // If the fraction randomly winds up outside of (0.0-1.0), clamp // back down to the VaultLeaseRenewalThreshold provided by the user, @@ -209,9 +211,19 @@ func leaseCheckWait(s *Secret, retryCount int) time.Duration { } // jitter adds randomness to a duration to prevent thundering herd. -// It reduces the duration by up to maxJitter (10%) randomly. +// It reduces the duration by up to maxJitter (10%) randomly using crypto/rand. +// using this to fix CWE-338: Use of Cryptographically Secure Pseudo-Random Number Generator (CSPRNG) func jitter(t time.Duration) time.Duration { - f := float64(t) * (1.0 - maxJitter*rand.Float64()) + // Generate cryptographically secure random value between 0.0 and 1.0 + max := big.NewInt(1000000) + n, err := cryptorand.Int(cryptorand.Reader, max) + if err != nil { + // Fallback to no jitter if crypto/rand fails + log.Printf("[WARN] Failed to generate secure random jitter: %v", err) + return t + } + randomFloat := float64(n.Int64()) / 1000000.0 + f := float64(t) * (1.0 - maxJitter*randomFloat) return time.Duration(f) } From d482dc5dd93bd55dc252a962f70cc2b7038f5200 Mon Sep 17 00:00:00 2001 From: "aswanidutt.gutta@ibm.com" Date: Thu, 12 Feb 2026 15:07:13 -0600 Subject: [PATCH 04/14] fixing cwe-338 on other places to address race test failure --- child/child.go | 14 +++++++++++--- dependency/vault_common.go | 28 ++++++++++++++++------------ watch/view.go | 13 +++++++++++-- 3 files changed, 38 insertions(+), 17 deletions(-) diff --git a/child/child.go b/child/child.go index f5d64db3a..09fac488a 100644 --- a/child/child.go +++ b/child/child.go @@ -4,11 +4,12 @@ package child import ( + cryptorand "crypto/rand" "errors" "fmt" "io" "log" - "math/rand" + "math/big" "os" "os/exec" "strings" @@ -475,8 +476,15 @@ func (c *Child) randomSplay() <-chan time.Time { } ns := c.splay.Nanoseconds() - offset := rand.Int63n(ns) - t := time.Duration(offset) + // Use crypto/rand for secure random generation (CWE-338 fix) + max := big.NewInt(ns) + n, err := cryptorand.Int(cryptorand.Reader, max) + if err != nil { + // Fallback to no splay if crypto/rand fails + log.Printf("[WARN] (child) failed to generate secure random splay: %v", err) + return time.After(0) + } + t := time.Duration(n.Int64()) c.logger.Printf("[DEBUG] (child) waiting %.2fs for random splay", t.Seconds()) diff --git a/dependency/vault_common.go b/dependency/vault_common.go index 0dcc37258..3b4326968 100644 --- a/dependency/vault_common.go +++ b/dependency/vault_common.go @@ -9,7 +9,6 @@ import ( "fmt" "log" "math/big" - mathrand "math/rand" "sync" "time" @@ -187,7 +186,7 @@ func leaseCheckWait(s *Secret, retryCount int) time.Duration { sleep = sleep / 3.0 // Use some randomness so many clients do not hit Vault simultaneously. - sleep = sleep * (mathrand.Float64() + 1) / 2.0 + sleep = sleep * (secureRandomFloat64() + 1) / 2.0 } else if !rotatingSecret { // If the secret doesn't have a rotation period, this is a non-renewable leased // secret. @@ -195,7 +194,7 @@ func leaseCheckWait(s *Secret, retryCount int) time.Duration { // lease as possible. Use a stagger over the configured threshold // fraction of the lease duration so that many clients do not hit // Vault simultaneously. - finalFraction := VaultLeaseRenewalThreshold + (mathrand.Float64()-0.5)*0.1 + finalFraction := VaultLeaseRenewalThreshold + (secureRandomFloat64()-0.5)*0.1 if finalFraction >= 1.0 || finalFraction <= 0.0 { // If the fraction randomly winds up outside of (0.0-1.0), clamp // back down to the VaultLeaseRenewalThreshold provided by the user, @@ -210,19 +209,24 @@ func leaseCheckWait(s *Secret, retryCount int) time.Duration { return time.Duration(sleep) } -// jitter adds randomness to a duration to prevent thundering herd. -// It reduces the duration by up to maxJitter (10%) randomly using crypto/rand. -// using this to fix CWE-338: Use of Cryptographically Secure Pseudo-Random Number Generator (CSPRNG) -func jitter(t time.Duration) time.Duration { - // Generate cryptographically secure random value between 0.0 and 1.0 +// secureRandomFloat64 generates a cryptographically secure random float64 in [0.0, 1.0). +// This is thread-safe and used to fix CWE-338. +func secureRandomFloat64() float64 { max := big.NewInt(1000000) n, err := cryptorand.Int(cryptorand.Reader, max) if err != nil { - // Fallback to no jitter if crypto/rand fails - log.Printf("[WARN] Failed to generate secure random jitter: %v", err) - return t + // Fallback to 0.5 if crypto/rand fails (rare but possible) + log.Printf("[WARN] Failed to generate secure random number: %v", err) + return 0.5 } - randomFloat := float64(n.Int64()) / 1000000.0 + return float64(n.Int64()) / 1000000.0 +} + +// jitter adds randomness to a duration to prevent thundering herd. +// It reduces the duration by up to maxJitter (10%) randomly using crypto/rand. +// This function is thread-safe. +func jitter(t time.Duration) time.Duration { + randomFloat := secureRandomFloat64() f := float64(t) * (1.0 - maxJitter*randomFloat) return time.Duration(f) } diff --git a/watch/view.go b/watch/view.go index f29e1531b..01478ec94 100644 --- a/watch/view.go +++ b/watch/view.go @@ -4,10 +4,11 @@ package watch import ( + cryptorand "crypto/rand" "errors" "fmt" "log" - "math/rand" + "math/big" "reflect" "sync" "time" @@ -311,7 +312,15 @@ const minDelayBetweenUpdates = time.Millisecond * 100 func rateLimiter(start time.Time) time.Duration { remaining := minDelayBetweenUpdates - time.Since(start) if remaining > 0 { - dither := time.Duration(rand.Int63n(20000000)) // 0-20ms + // Use crypto/rand for secure random generation (CWE-338 fix) + max := big.NewInt(20000000) // 0-20ms in nanoseconds + n, err := cryptorand.Int(cryptorand.Reader, max) + if err != nil { + // Fallback to no dither if crypto/rand fails + log.Printf("[WARN] (view) failed to generate secure random dither: %v", err) + return remaining + } + dither := time.Duration(n.Int64()) return remaining + dither } return 0 From 6532face34ba26db6170137c329cf82a30050926 Mon Sep 17 00:00:00 2001 From: "aswanidutt.gutta@ibm.com" Date: Thu, 12 Feb 2026 15:40:29 -0600 Subject: [PATCH 05/14] reverting back to mathrand --- child/child.go | 13 ++----------- dependency/vault_common.go | 26 +++++--------------------- watch/view.go | 13 ++----------- 3 files changed, 9 insertions(+), 43 deletions(-) diff --git a/child/child.go b/child/child.go index 09fac488a..45b872d29 100644 --- a/child/child.go +++ b/child/child.go @@ -4,12 +4,11 @@ package child import ( - cryptorand "crypto/rand" "errors" "fmt" "io" "log" - "math/big" + "math/rand" "os" "os/exec" "strings" @@ -476,15 +475,7 @@ func (c *Child) randomSplay() <-chan time.Time { } ns := c.splay.Nanoseconds() - // Use crypto/rand for secure random generation (CWE-338 fix) - max := big.NewInt(ns) - n, err := cryptorand.Int(cryptorand.Reader, max) - if err != nil { - // Fallback to no splay if crypto/rand fails - log.Printf("[WARN] (child) failed to generate secure random splay: %v", err) - return time.After(0) - } - t := time.Duration(n.Int64()) + t := time.Duration(rand.Int63n(ns)) c.logger.Printf("[DEBUG] (child) waiting %.2fs for random splay", t.Seconds()) diff --git a/dependency/vault_common.go b/dependency/vault_common.go index 3b4326968..6b425440a 100644 --- a/dependency/vault_common.go +++ b/dependency/vault_common.go @@ -4,11 +4,10 @@ package dependency import ( - cryptorand "crypto/rand" "encoding/json" "fmt" "log" - "math/big" + "math/rand" "sync" "time" @@ -186,7 +185,7 @@ func leaseCheckWait(s *Secret, retryCount int) time.Duration { sleep = sleep / 3.0 // Use some randomness so many clients do not hit Vault simultaneously. - sleep = sleep * (secureRandomFloat64() + 1) / 2.0 + sleep = sleep * (rand.Float64() + 1) / 2.0 } else if !rotatingSecret { // If the secret doesn't have a rotation period, this is a non-renewable leased // secret. @@ -194,7 +193,7 @@ func leaseCheckWait(s *Secret, retryCount int) time.Duration { // lease as possible. Use a stagger over the configured threshold // fraction of the lease duration so that many clients do not hit // Vault simultaneously. - finalFraction := VaultLeaseRenewalThreshold + (secureRandomFloat64()-0.5)*0.1 + finalFraction := VaultLeaseRenewalThreshold + (rand.Float64()-0.5)*0.1 if finalFraction >= 1.0 || finalFraction <= 0.0 { // If the fraction randomly winds up outside of (0.0-1.0), clamp // back down to the VaultLeaseRenewalThreshold provided by the user, @@ -209,25 +208,10 @@ func leaseCheckWait(s *Secret, retryCount int) time.Duration { return time.Duration(sleep) } -// secureRandomFloat64 generates a cryptographically secure random float64 in [0.0, 1.0). -// This is thread-safe and used to fix CWE-338. -func secureRandomFloat64() float64 { - max := big.NewInt(1000000) - n, err := cryptorand.Int(cryptorand.Reader, max) - if err != nil { - // Fallback to 0.5 if crypto/rand fails (rare but possible) - log.Printf("[WARN] Failed to generate secure random number: %v", err) - return 0.5 - } - return float64(n.Int64()) / 1000000.0 -} - // jitter adds randomness to a duration to prevent thundering herd. -// It reduces the duration by up to maxJitter (10%) randomly using crypto/rand. -// This function is thread-safe. +// It reduces the duration by up to maxJitter (10%) randomly. func jitter(t time.Duration) time.Duration { - randomFloat := secureRandomFloat64() - f := float64(t) * (1.0 - maxJitter*randomFloat) + f := float64(t) * (1.0 - maxJitter*rand.Float64()) return time.Duration(f) } diff --git a/watch/view.go b/watch/view.go index 01478ec94..4895e19c1 100644 --- a/watch/view.go +++ b/watch/view.go @@ -4,11 +4,10 @@ package watch import ( - cryptorand "crypto/rand" "errors" "fmt" "log" - "math/big" + "math/rand" "reflect" "sync" "time" @@ -312,15 +311,7 @@ const minDelayBetweenUpdates = time.Millisecond * 100 func rateLimiter(start time.Time) time.Duration { remaining := minDelayBetweenUpdates - time.Since(start) if remaining > 0 { - // Use crypto/rand for secure random generation (CWE-338 fix) - max := big.NewInt(20000000) // 0-20ms in nanoseconds - n, err := cryptorand.Int(cryptorand.Reader, max) - if err != nil { - // Fallback to no dither if crypto/rand fails - log.Printf("[WARN] (view) failed to generate secure random dither: %v", err) - return remaining - } - dither := time.Duration(n.Int64()) + dither := time.Duration(rand.Int63n(20000000)) // 0-20ms in nanoseconds return remaining + dither } return 0 From 7df06e82d56d36466bf9b5e2273ce13cbdf89fc3 Mon Sep 17 00:00:00 2001 From: "aswanidutt.gutta@ibm.com" Date: Thu, 12 Feb 2026 16:20:32 -0600 Subject: [PATCH 06/14] fixing race condition --- dependency/file_test.go | 37 +++++++++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/dependency/file_test.go b/dependency/file_test.go index 5c9f64096..bdd7f7710 100644 --- a/dependency/file_test.go +++ b/dependency/file_test.go @@ -6,6 +6,7 @@ package dependency import ( "fmt" "os" + "path/filepath" "testing" "time" @@ -137,14 +138,38 @@ func TestFileQuery_Fetch(t *testing.T) { }) syncWriteFile := func(name string, data []byte, perm os.FileMode) error { - f, err := os.OpenFile(name, os.O_WRONLY|os.O_CREATE|os.O_TRUNC|os.O_SYNC, perm) + // Write to a temporary file in the same directory to ensure atomic rename + dir := filepath.Dir(name) + tmpFile, err := os.CreateTemp(dir, ".syncwrite-*") + if err != nil { + return err + } + tmpName := tmpFile.Name() + defer os.Remove(tmpName) + + _, err = tmpFile.Write(data) if err == nil { - _, err = f.Write(data) - if err1 := f.Close(); err1 != nil && err == nil { - err = err1 - } + err = tmpFile.Sync() + } + if err1 := tmpFile.Close(); err1 != nil && err == nil { + err = err1 + } + if err != nil { + return err + } + + // Atomically rename the temp file to the target file + if err := os.Rename(tmpName, name); err != nil { + return err } - return err + + // Ensure the directory entry is synced (best effort) + if dirFile, err := os.Open(dir); err == nil { + dirFile.Sync() + dirFile.Close() + } + + return nil } t.Run("fires_changes", func(t *testing.T) { f, err := os.CreateTemp("", "") From d7246f005050c53a1896160bd85b17742bd9d187 Mon Sep 17 00:00:00 2001 From: "aswanidutt.gutta@ibm.com" Date: Thu, 12 Feb 2026 16:25:53 -0600 Subject: [PATCH 07/14] fixing linting issue --- dependency/file_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dependency/file_test.go b/dependency/file_test.go index bdd7f7710..fcbc44b7d 100644 --- a/dependency/file_test.go +++ b/dependency/file_test.go @@ -137,7 +137,7 @@ func TestFileQuery_Fetch(t *testing.T) { } }) - syncWriteFile := func(name string, data []byte, perm os.FileMode) error { + syncWriteFile := func(name string, data []byte, _ os.FileMode) error { // Write to a temporary file in the same directory to ensure atomic rename dir := filepath.Dir(name) tmpFile, err := os.CreateTemp(dir, ".syncwrite-*") From dbd6de2f8d9d94ab13a47de2d02543ab6e6a2fcc Mon Sep 17 00:00:00 2001 From: "aswanidutt.gutta@ibm.com" Date: Thu, 12 Feb 2026 16:35:25 -0600 Subject: [PATCH 08/14] fixing cwe-338 --- child/child.go | 12 ++++++++++-- dependency/vault_common.go | 26 ++++++++++++++++++++++---- watch/view.go | 11 +++++++++-- 3 files changed, 41 insertions(+), 8 deletions(-) diff --git a/child/child.go b/child/child.go index 45b872d29..bfcb1ac99 100644 --- a/child/child.go +++ b/child/child.go @@ -4,11 +4,12 @@ package child import ( + "crypto/rand" "errors" "fmt" "io" "log" - "math/rand" + "math/big" "os" "os/exec" "strings" @@ -475,7 +476,14 @@ func (c *Child) randomSplay() <-chan time.Time { } ns := c.splay.Nanoseconds() - t := time.Duration(rand.Int63n(ns)) + // Use crypto/rand for secure random generation (CWE-338 fix) + n, err := rand.Int(rand.Reader, big.NewInt(ns)) + if err != nil { + // Fallback to no splay on error + c.logger.Printf("[WARN] (child) failed to generate random splay: %v", err) + return time.After(0) + } + t := time.Duration(n.Int64()) c.logger.Printf("[DEBUG] (child) waiting %.2fs for random splay", t.Seconds()) diff --git a/dependency/vault_common.go b/dependency/vault_common.go index 6b425440a..06fa46b78 100644 --- a/dependency/vault_common.go +++ b/dependency/vault_common.go @@ -4,10 +4,11 @@ package dependency import ( + "crypto/rand" "encoding/json" "fmt" "log" - "math/rand" + "math/big" "sync" "time" @@ -185,7 +186,8 @@ func leaseCheckWait(s *Secret, retryCount int) time.Duration { sleep = sleep / 3.0 // Use some randomness so many clients do not hit Vault simultaneously. - sleep = sleep * (rand.Float64() + 1) / 2.0 + randVal := secureRandomFloat64() + sleep = sleep * (randVal + 1) / 2.0 } else if !rotatingSecret { // If the secret doesn't have a rotation period, this is a non-renewable leased // secret. @@ -193,7 +195,7 @@ func leaseCheckWait(s *Secret, retryCount int) time.Duration { // lease as possible. Use a stagger over the configured threshold // fraction of the lease duration so that many clients do not hit // Vault simultaneously. - finalFraction := VaultLeaseRenewalThreshold + (rand.Float64()-0.5)*0.1 + finalFraction := VaultLeaseRenewalThreshold + (secureRandomFloat64()-0.5)*0.1 if finalFraction >= 1.0 || finalFraction <= 0.0 { // If the fraction randomly winds up outside of (0.0-1.0), clamp // back down to the VaultLeaseRenewalThreshold provided by the user, @@ -208,10 +210,26 @@ func leaseCheckWait(s *Secret, retryCount int) time.Duration { return time.Duration(sleep) } +// secureRandomFloat64 returns a cryptographically secure random float64 in [0.0, 1.0) +func secureRandomFloat64() float64 { + // Generate a random 53-bit integer (mantissa precision of float64) + // Use crypto/rand for secure random generation (CWE-338 fix) + + max := big.NewInt(1 << 53) + n, err := rand.Int(rand.Reader, max) + if err != nil { + // Fallback to 0.5 on error (middle of range) + log.Printf("[WARN] failed to generate secure random number: %v", err) + return 0.5 + } + return float64(n.Int64()) / float64(max.Int64()) +} + // jitter adds randomness to a duration to prevent thundering herd. // It reduces the duration by up to maxJitter (10%) randomly. func jitter(t time.Duration) time.Duration { - f := float64(t) * (1.0 - maxJitter*rand.Float64()) + randVal := secureRandomFloat64() + f := float64(t) * (1.0 - maxJitter*randVal) return time.Duration(f) } diff --git a/watch/view.go b/watch/view.go index 4895e19c1..e2b779cce 100644 --- a/watch/view.go +++ b/watch/view.go @@ -4,10 +4,11 @@ package watch import ( + "crypto/rand" "errors" "fmt" "log" - "math/rand" + "math/big" "reflect" "sync" "time" @@ -311,7 +312,13 @@ const minDelayBetweenUpdates = time.Millisecond * 100 func rateLimiter(start time.Time) time.Duration { remaining := minDelayBetweenUpdates - time.Since(start) if remaining > 0 { - dither := time.Duration(rand.Int63n(20000000)) // 0-20ms in nanoseconds + // Use crypto/rand for secure random generation (CWE-338 fix) + n, err := rand.Int(rand.Reader, big.NewInt(20000000)) + if err != nil { + // Fallback to no dither on error + return remaining + } + dither := time.Duration(n.Int64()) // 0-20ms in nanoseconds return remaining + dither } return 0 From f6ce3df598a8a27f0c5c635a23e1ff210c9efae2 Mon Sep 17 00:00:00 2001 From: "aswanidutt.gutta@ibm.com" Date: Fri, 13 Feb 2026 12:08:27 -0600 Subject: [PATCH 09/14] removing the cwe-338 and race condition changes as they are in their own pr --- child/child.go | 13 +++---------- dependency/vault_common.go | 26 ++++---------------------- watch/view.go | 11 ++--------- 3 files changed, 9 insertions(+), 41 deletions(-) diff --git a/child/child.go b/child/child.go index bfcb1ac99..f5d64db3a 100644 --- a/child/child.go +++ b/child/child.go @@ -4,12 +4,11 @@ package child import ( - "crypto/rand" "errors" "fmt" "io" "log" - "math/big" + "math/rand" "os" "os/exec" "strings" @@ -476,14 +475,8 @@ func (c *Child) randomSplay() <-chan time.Time { } ns := c.splay.Nanoseconds() - // Use crypto/rand for secure random generation (CWE-338 fix) - n, err := rand.Int(rand.Reader, big.NewInt(ns)) - if err != nil { - // Fallback to no splay on error - c.logger.Printf("[WARN] (child) failed to generate random splay: %v", err) - return time.After(0) - } - t := time.Duration(n.Int64()) + offset := rand.Int63n(ns) + t := time.Duration(offset) c.logger.Printf("[DEBUG] (child) waiting %.2fs for random splay", t.Seconds()) diff --git a/dependency/vault_common.go b/dependency/vault_common.go index 06fa46b78..6b425440a 100644 --- a/dependency/vault_common.go +++ b/dependency/vault_common.go @@ -4,11 +4,10 @@ package dependency import ( - "crypto/rand" "encoding/json" "fmt" "log" - "math/big" + "math/rand" "sync" "time" @@ -186,8 +185,7 @@ func leaseCheckWait(s *Secret, retryCount int) time.Duration { sleep = sleep / 3.0 // Use some randomness so many clients do not hit Vault simultaneously. - randVal := secureRandomFloat64() - sleep = sleep * (randVal + 1) / 2.0 + sleep = sleep * (rand.Float64() + 1) / 2.0 } else if !rotatingSecret { // If the secret doesn't have a rotation period, this is a non-renewable leased // secret. @@ -195,7 +193,7 @@ func leaseCheckWait(s *Secret, retryCount int) time.Duration { // lease as possible. Use a stagger over the configured threshold // fraction of the lease duration so that many clients do not hit // Vault simultaneously. - finalFraction := VaultLeaseRenewalThreshold + (secureRandomFloat64()-0.5)*0.1 + finalFraction := VaultLeaseRenewalThreshold + (rand.Float64()-0.5)*0.1 if finalFraction >= 1.0 || finalFraction <= 0.0 { // If the fraction randomly winds up outside of (0.0-1.0), clamp // back down to the VaultLeaseRenewalThreshold provided by the user, @@ -210,26 +208,10 @@ func leaseCheckWait(s *Secret, retryCount int) time.Duration { return time.Duration(sleep) } -// secureRandomFloat64 returns a cryptographically secure random float64 in [0.0, 1.0) -func secureRandomFloat64() float64 { - // Generate a random 53-bit integer (mantissa precision of float64) - // Use crypto/rand for secure random generation (CWE-338 fix) - - max := big.NewInt(1 << 53) - n, err := rand.Int(rand.Reader, max) - if err != nil { - // Fallback to 0.5 on error (middle of range) - log.Printf("[WARN] failed to generate secure random number: %v", err) - return 0.5 - } - return float64(n.Int64()) / float64(max.Int64()) -} - // jitter adds randomness to a duration to prevent thundering herd. // It reduces the duration by up to maxJitter (10%) randomly. func jitter(t time.Duration) time.Duration { - randVal := secureRandomFloat64() - f := float64(t) * (1.0 - maxJitter*randVal) + f := float64(t) * (1.0 - maxJitter*rand.Float64()) return time.Duration(f) } diff --git a/watch/view.go b/watch/view.go index e2b779cce..f29e1531b 100644 --- a/watch/view.go +++ b/watch/view.go @@ -4,11 +4,10 @@ package watch import ( - "crypto/rand" "errors" "fmt" "log" - "math/big" + "math/rand" "reflect" "sync" "time" @@ -312,13 +311,7 @@ const minDelayBetweenUpdates = time.Millisecond * 100 func rateLimiter(start time.Time) time.Duration { remaining := minDelayBetweenUpdates - time.Since(start) if remaining > 0 { - // Use crypto/rand for secure random generation (CWE-338 fix) - n, err := rand.Int(rand.Reader, big.NewInt(20000000)) - if err != nil { - // Fallback to no dither on error - return remaining - } - dither := time.Duration(n.Int64()) // 0-20ms in nanoseconds + dither := time.Duration(rand.Int63n(20000000)) // 0-20ms return remaining + dither } return 0 From 92cd7f98f0e1c8cb486bb9ec5833811311e8b92c Mon Sep 17 00:00:00 2001 From: "aswanidutt.gutta@ibm.com" Date: Fri, 13 Feb 2026 12:32:22 -0600 Subject: [PATCH 10/14] adding comments to address the CWE-338 later --- child/child.go | 4 +++- dependency/vault_common.go | 5 ++++- watch/view.go | 3 ++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/child/child.go b/child/child.go index f5d64db3a..b02ab915a 100644 --- a/child/child.go +++ b/child/child.go @@ -8,7 +8,7 @@ import ( "fmt" "io" "log" - "math/rand" + "math/rand" // TODO: Replace with crypto/rand to fix CWE-338 (cryptographically weak PRNG) "os" "os/exec" "strings" @@ -475,6 +475,8 @@ func (c *Child) randomSplay() <-chan time.Time { } ns := c.splay.Nanoseconds() + // TODO: Replace math/rand.Int63n with crypto/rand.Int to fix CWE-338 + // Use: nBig, err := rand.Int(rand.Reader, big.NewInt(ns)) offset := rand.Int63n(ns) t := time.Duration(offset) diff --git a/dependency/vault_common.go b/dependency/vault_common.go index 6b425440a..47e28f88c 100644 --- a/dependency/vault_common.go +++ b/dependency/vault_common.go @@ -7,7 +7,7 @@ import ( "encoding/json" "fmt" "log" - "math/rand" + "math/rand" // TODO: Replace with crypto/rand to fix CWE-338 (cryptographically weak PRNG) "sync" "time" @@ -185,6 +185,7 @@ func leaseCheckWait(s *Secret, retryCount int) time.Duration { sleep = sleep / 3.0 // Use some randomness so many clients do not hit Vault simultaneously. + // TODO: Replace math/rand.Float64 with crypto/rand to fix CWE-338 sleep = sleep * (rand.Float64() + 1) / 2.0 } else if !rotatingSecret { // If the secret doesn't have a rotation period, this is a non-renewable leased @@ -193,6 +194,7 @@ func leaseCheckWait(s *Secret, retryCount int) time.Duration { // lease as possible. Use a stagger over the configured threshold // fraction of the lease duration so that many clients do not hit // Vault simultaneously. + // TODO: Replace math/rand.Float64 with crypto/rand to fix CWE-338 finalFraction := VaultLeaseRenewalThreshold + (rand.Float64()-0.5)*0.1 if finalFraction >= 1.0 || finalFraction <= 0.0 { // If the fraction randomly winds up outside of (0.0-1.0), clamp @@ -211,6 +213,7 @@ func leaseCheckWait(s *Secret, retryCount int) time.Duration { // jitter adds randomness to a duration to prevent thundering herd. // It reduces the duration by up to maxJitter (10%) randomly. func jitter(t time.Duration) time.Duration { + // TODO: Replace math/rand.Float64 with crypto/rand to fix CWE-338 f := float64(t) * (1.0 - maxJitter*rand.Float64()) return time.Duration(f) } diff --git a/watch/view.go b/watch/view.go index f29e1531b..23bbd1923 100644 --- a/watch/view.go +++ b/watch/view.go @@ -7,7 +7,7 @@ import ( "errors" "fmt" "log" - "math/rand" + "math/rand" // TODO: Replace with crypto/rand to fix CWE-338 (cryptographically weak PRNG) "reflect" "sync" "time" @@ -311,6 +311,7 @@ const minDelayBetweenUpdates = time.Millisecond * 100 func rateLimiter(start time.Time) time.Duration { remaining := minDelayBetweenUpdates - time.Since(start) if remaining > 0 { + // TODO: Replace math/rand.Int63n with crypto/rand.Int to fix CWE-338 dither := time.Duration(rand.Int63n(20000000)) // 0-20ms return remaining + dither } From 8f245e3c2a52690ac5b2aa2cd528d61c554a91dd Mon Sep 17 00:00:00 2001 From: "aswanidutt.gutta@ibm.com" Date: Fri, 13 Feb 2026 12:36:42 -0600 Subject: [PATCH 11/14] putting the race condition file back --- dependency/file_test.go | 39 +++++++-------------------------------- 1 file changed, 7 insertions(+), 32 deletions(-) diff --git a/dependency/file_test.go b/dependency/file_test.go index fcbc44b7d..5c9f64096 100644 --- a/dependency/file_test.go +++ b/dependency/file_test.go @@ -6,7 +6,6 @@ package dependency import ( "fmt" "os" - "path/filepath" "testing" "time" @@ -137,39 +136,15 @@ func TestFileQuery_Fetch(t *testing.T) { } }) - syncWriteFile := func(name string, data []byte, _ os.FileMode) error { - // Write to a temporary file in the same directory to ensure atomic rename - dir := filepath.Dir(name) - tmpFile, err := os.CreateTemp(dir, ".syncwrite-*") - if err != nil { - return err - } - tmpName := tmpFile.Name() - defer os.Remove(tmpName) - - _, err = tmpFile.Write(data) + syncWriteFile := func(name string, data []byte, perm os.FileMode) error { + f, err := os.OpenFile(name, os.O_WRONLY|os.O_CREATE|os.O_TRUNC|os.O_SYNC, perm) if err == nil { - err = tmpFile.Sync() - } - if err1 := tmpFile.Close(); err1 != nil && err == nil { - err = err1 - } - if err != nil { - return err - } - - // Atomically rename the temp file to the target file - if err := os.Rename(tmpName, name); err != nil { - return err - } - - // Ensure the directory entry is synced (best effort) - if dirFile, err := os.Open(dir); err == nil { - dirFile.Sync() - dirFile.Close() + _, err = f.Write(data) + if err1 := f.Close(); err1 != nil && err == nil { + err = err1 + } } - - return nil + return err } t.Run("fires_changes", func(t *testing.T) { f, err := os.CreateTemp("", "") From 68c34d8e75478602aeb783a19bada23e8d45b767 Mon Sep 17 00:00:00 2001 From: "aswanidutt.gutta@ibm.com" Date: Mon, 16 Feb 2026 11:18:39 -0600 Subject: [PATCH 12/14] using existing backoff and other pr review changes --- dependency/vault_common.go | 48 ++++++++++++++++++--------------- dependency/vault_common_test.go | 13 ++++----- dependency/vault_read.go | 19 ++++++++++++- dependency/vault_write.go | 19 ++++++++++++- 4 files changed, 70 insertions(+), 29 deletions(-) diff --git a/dependency/vault_common.go b/dependency/vault_common.go index 47e28f88c..a91a55847 100644 --- a/dependency/vault_common.go +++ b/dependency/vault_common.go @@ -11,6 +11,7 @@ import ( "sync" "time" + "github.com/cenkalti/backoff/v4" "github.com/hashicorp/vault/api" ) @@ -22,8 +23,6 @@ var ( onceVaultLeaseRenewalThreshold sync.Once ) -const maxJitter = 0.1 - // Secret is the structure returned for every secret within Vault. type Secret struct { // The request ID that generated this response @@ -152,20 +151,35 @@ func leaseCheckWait(s *Secret, retryCount int) time.Duration { base = int(ttlData) + 1 rotatingSecret = true } else if ttlData == 0 { - // FIX: When TTL is 0, implement exponential backoff with jitter - // Start at 5s, double each time, max is VaultDefaultLeaseDuration or 300s - backoff := 5 * (1 << uint(retryCount)) // 5s, 10s, 20s, 40s, 80s... - maxBackoff := int(VaultDefaultLeaseDuration.Seconds()) + // FIX: To address the case where Vault responds with a TTL of 0 when there's + // been an error rotating the secret on the Vault side, so this is effectively + // retrying an error condition. When TTL is 0, implement exponential backoff + // Start at 2s, double each time, max is VaultDefaultLeaseDuration or 300s + maxBackoff := VaultDefaultLeaseDuration if maxBackoff <= 0 { - maxBackoff = 300 // Default max of 300 seconds if not configured + maxBackoff = 300 * time.Second // Default max of 300 seconds if not configured + } + + expBackoff := backoff.NewExponentialBackOff() + expBackoff.InitialInterval = 2 * time.Second + expBackoff.Multiplier = 2.0 + expBackoff.MaxInterval = maxBackoff + expBackoff.MaxElapsedTime = 0 // Never stop retrying + // Keep default RandomizationFactor (0.5) for jitter to prevent thundering herd + expBackoff.Reset() + + // Calculate backoff for the current retry attempt + // NextBackOff() must be called retryCount+1 times to get the correct interval + var backoffDur time.Duration + for i := 0; i <= retryCount; i++ { + backoffDur = expBackoff.NextBackOff() } - if backoff > maxBackoff { - backoff = maxBackoff + if backoffDur == backoff.Stop { + backoffDur = maxBackoff } - backoffDur := time.Duration(backoff) * time.Second - backoffWithJitter := jitter(backoffDur) - log.Printf("[DEBUG] Found rotation_period with ttl=0, using exponential backoff: %s with jitter %s (retry %d)", backoffDur, backoffWithJitter, retryCount) - return backoffWithJitter + + log.Printf("[DEBUG] Found rotation_period with ttl=0, using exponential backoff: %s (retry %d)", backoffDur, retryCount) + return backoffDur } } } @@ -210,14 +224,6 @@ func leaseCheckWait(s *Secret, retryCount int) time.Duration { return time.Duration(sleep) } -// jitter adds randomness to a duration to prevent thundering herd. -// It reduces the duration by up to maxJitter (10%) randomly. -func jitter(t time.Duration) time.Duration { - // TODO: Replace math/rand.Float64 with crypto/rand to fix CWE-338 - f := float64(t) * (1.0 - maxJitter*rand.Float64()) - return time.Duration(f) -} - // printVaultWarnings prints warnings for a given dependency. func printVaultWarnings(d Dependency, warnings []string) { for _, w := range warnings { diff --git a/dependency/vault_common_test.go b/dependency/vault_common_test.go index 882e7007b..af412aa48 100644 --- a/dependency/vault_common_test.go +++ b/dependency/vault_common_test.go @@ -65,14 +65,15 @@ func TestVaultRenewDuration(t *testing.T) { nonRenewableRotatedZeroTTL := Secret{LeaseDuration: 100, Data: data} - // Test exponential backoff with jitter: 5s, 10s, 20s, 40s, 80s, 160s (max 300s) - // Jitter reduces by up to 10%, so we check the range - expectedBackoffs := []float64{5, 10, 20, 40, 80, 160} + // Test exponential backoff: 2s, 4s, 8s, 16s, 32s, 64s, 128s, 256s (max 300s) + // Using cenkalti/backoff with default RandomizationFactor (0.5) for jitter + // This means the actual interval will be [0.5*interval, 1.5*interval] + expectedBackoffs := []float64{2, 4, 8, 16, 32, 64, 128, 256} for i, expected := range expectedBackoffs { dur := leaseCheckWait(&nonRenewableRotatedZeroTTL, i).Seconds() - // With jitter, duration should be between 90% and 100% of expected - minExpected := expected * 0.9 - maxExpected := expected + // With RandomizationFactor=0.5, allow 50% variance: [0.5*expected, 1.5*expected] + minExpected := expected * 0.5 + maxExpected := expected * 1.5 if dur < minExpected || dur > maxExpected { t.Fatalf("retry %d: expected between %f and %f seconds, got %f", i, minExpected, maxExpected, dur) } diff --git a/dependency/vault_read.go b/dependency/vault_read.go index 84d1bf34e..b97fc76cd 100644 --- a/dependency/vault_read.go +++ b/dependency/vault_read.go @@ -4,6 +4,7 @@ package dependency import ( + "encoding/json" "fmt" "log" "net/url" @@ -91,9 +92,25 @@ func (d *VaultReadQuery) Fetch(clients *ClientSet, opts *QueryOptions, } if !vaultSecretRenewable(d.secret) { + // Check if this is a successful rotation (ttl > 0) + shouldResetRetry := false + if _, ok := d.secret.Data["rotation_period"]; ok && d.secret.LeaseID == "" { + if ttlInterface, ok := d.secret.Data["ttl"]; ok { + if ttlData, err := ttlInterface.(json.Number).Int64(); err == nil && ttlData > 0 { + shouldResetRetry = true + } + } + } + dur := leaseCheckWait(d.secret, d.retryCount) log.Printf("[TRACE] %s: non-renewable secret, set sleep for %s (retry %d)", d, dur, d.retryCount) - d.retryCount++ + + if shouldResetRetry { + // Reset retry count on successful rotation + d.retryCount = 0 + } else { + d.retryCount++ + } d.sleepCh <- dur } else { // Reset retry count on successful renewable secret diff --git a/dependency/vault_write.go b/dependency/vault_write.go index 1a872ac8c..48f2aed38 100644 --- a/dependency/vault_write.go +++ b/dependency/vault_write.go @@ -5,6 +5,7 @@ package dependency import ( "crypto/sha1" + "encoding/json" "fmt" "io" "log" @@ -94,9 +95,25 @@ func (d *VaultWriteQuery) Fetch(clients *ClientSet, opts *QueryOptions, d.secret = transformSecret(vaultSecret) if !vaultSecretRenewable(d.secret) { + // Check if this is a successful rotation (ttl > 0) + shouldResetRetry := false + if _, ok := d.secret.Data["rotation_period"]; ok && d.secret.LeaseID == "" { + if ttlInterface, ok := d.secret.Data["ttl"]; ok { + if ttlData, err := ttlInterface.(json.Number).Int64(); err == nil && ttlData > 0 { + shouldResetRetry = true + } + } + } + dur := leaseCheckWait(d.secret, d.retryCount) log.Printf("[TRACE] %s: non-renewable secret, set sleep for %s (retry %d)", d, dur, d.retryCount) - d.retryCount++ + + if shouldResetRetry { + // Reset retry count on successful rotation + d.retryCount = 0 + } else { + d.retryCount++ + } d.sleepCh <- dur } else { // Reset retry count on successful renewable secret From d7a66ff0e6c8336eed9a6ddc7e14228678ff78cb Mon Sep 17 00:00:00 2001 From: "aswanidutt.gutta@ibm.com" Date: Mon, 16 Feb 2026 11:25:08 -0600 Subject: [PATCH 13/14] go linter fix --- go.mod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go.mod b/go.mod index 789f81012..7d9564812 100644 --- a/go.mod +++ b/go.mod @@ -37,7 +37,7 @@ require ( github.com/Masterminds/goutils v1.1.1 // indirect github.com/Masterminds/semver/v3 v3.3.0 // indirect github.com/armon/go-metrics v0.4.1 // indirect - github.com/cenkalti/backoff/v4 v4.3.0 // indirect + github.com/cenkalti/backoff/v4 v4.3.0 github.com/coreos/go-systemd/v22 v22.5.0 github.com/fatih/color v1.17.0 // indirect github.com/go-jose/go-jose/v4 v4.1.1 // indirect From dd7e8f06482650c03ab935b958a4d3a8968a35c6 Mon Sep 17 00:00:00 2001 From: "aswanidutt.gutta@ibm.com" Date: Mon, 16 Feb 2026 11:31:02 -0600 Subject: [PATCH 14/14] adding as direct dependecy --- .golangci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.golangci.yml b/.golangci.yml index 43c5cd3bc..5e040d740 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -24,6 +24,7 @@ linters: allow: - $gostd - github.com/BurntSushi/toml + - github.com/cenkalti/backoff/v4 - golang.org/x/sys/unix - golang.org/x/exp/maps - golang.org/x/text/cases