Skip to content

Commit 5ba0c59

Browse files
Implement dynamic token stale period based on token TTL.
Replace the fixed 5-minute stale window with a per-token formula: stale_period = min(TTL / 2, 20 minutes). This ensures that short-lived tokens (e.g. FastPath tokens with a 10-minute TTL) enter the stale window early enough to trigger a proactive async refresh, while long-lived tokens are capped at 20 minutes to maintain a meaningful refresh buffer under a 99.99% uptime SLA. The stale duration is computed at token fetch time using the remaining TTL as a proxy, since the SDK does not track token issuance timestamps. It is updated after every successful refresh (both synchronous and asynchronous) via computeStaleDuration(), and stored in a volatile field so that the unsynchronized fast-path read in getTokenAsync() sees a consistent value. Due to the exposed CachedTokenSource builder function setStalePeriod backwards compatibility is maintained through the useDynamicStalePeriod flag. Using the setStalePeriod function disables this flag, reverting the behaviour of the token cache to the legacy one.
1 parent a5a437c commit 5ba0c59

File tree

2 files changed

+79
-13
lines changed

2 files changed

+79
-13
lines changed

databricks-sdk-java/src/main/java/com/databricks/sdk/core/oauth/CachedTokenSource.java

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,11 @@ private enum TokenState {
3333
// Default duration before expiry to consider a token as 'stale'. This value is chosen to cover
3434
// the maximum monthly downtime allowed by a 99.99% uptime SLA (~4.38 minutes).
3535
private static final Duration DEFAULT_STALE_DURATION = Duration.ofMinutes(5);
36+
// The maximum stale duration that can be achieved before expiry to consider a token as 'stale'
37+
// when using the dynamic stale duration method. This value is chosen to cover the maximum
38+
// monthly downtime allowed by a 99.99% uptime SLA (~4.38 minutes) while increasing the likelihood
39+
// that the token is refreshed asynchronously if the auth server is down.
40+
private static final Duration MAX_STALE_DURATION = Duration.ofMinutes(20);
3641
// Default additional buffer before expiry to consider a token as expired.
3742
// This is 40 seconds by default since Azure Databricks rejects tokens that are within 30 seconds
3843
// of expiry.
@@ -42,8 +47,12 @@ private enum TokenState {
4247
private final TokenSource tokenSource;
4348
// Whether asynchronous refresh is enabled.
4449
private boolean asyncDisabled = false;
45-
// Duration before expiry to consider a token as 'stale'.
50+
// The legacy duration before expiry to consider a token as 'stale'.
4651
private final Duration staleDuration;
52+
// Whether to use the dynamic stale duration computation or defer to the legacy duration.
53+
private final boolean useDynamicStaleDuration;
54+
// The dynamically computed duration before expiry to consider a token as 'stale'.
55+
private volatile Duration dynamicStaleDuration;
4756
// Additional buffer before expiry to consider a token as expired.
4857
private final Duration expiryBuffer;
4958
// Clock supplier for current time.
@@ -60,9 +69,16 @@ private CachedTokenSource(Builder builder) {
6069
this.tokenSource = builder.tokenSource;
6170
this.asyncDisabled = builder.asyncDisabled;
6271
this.staleDuration = builder.staleDuration;
72+
this.useDynamicStaleDuration = builder.useDynamicStaleDuration;
6373
this.expiryBuffer = builder.expiryBuffer;
6474
this.clockSupplier = builder.clockSupplier;
6575
this.token = builder.token;
76+
77+
if (this.useDynamicStaleDuration && this.token != null) {
78+
this.dynamicStaleDuration = computeStaleDuration(this.token);
79+
} else {
80+
this.dynamicStaleDuration = MAX_STALE_DURATION;
81+
}
6682
}
6783

6884
/**
@@ -75,6 +91,7 @@ public static class Builder {
7591
private final TokenSource tokenSource;
7692
private boolean asyncDisabled = false;
7793
private Duration staleDuration = DEFAULT_STALE_DURATION;
94+
private boolean useDynamicStaleDuration = true;
7895
private Duration expiryBuffer = DEFAULT_EXPIRY_BUFFER;
7996
private ClockSupplier clockSupplier = new UtcClockSupplier();
8097
private Token token;
@@ -130,6 +147,7 @@ public Builder setAsyncDisabled(boolean asyncDisabled) {
130147
*/
131148
public Builder setStaleDuration(Duration staleDuration) {
132149
this.staleDuration = staleDuration;
150+
this.useDynamicStaleDuration = false;
133151
return this;
134152
}
135153

@@ -188,6 +206,17 @@ public Token getToken() {
188206
return getTokenAsync();
189207
}
190208

209+
private Duration computeStaleDuration(Token t) {
210+
Duration ttl = Duration.between(Instant.now(clockSupplier.getClock()), t.getExpiry());
211+
212+
if (ttl.compareTo(Duration.ZERO) <= 0) {
213+
return Duration.ZERO;
214+
}
215+
216+
Duration halfTtl = ttl.dividedBy(2);
217+
return halfTtl.compareTo(MAX_STALE_DURATION) > 0 ? MAX_STALE_DURATION : halfTtl;
218+
}
219+
191220
/**
192221
* Determine the state of the current token (fresh, stale, or expired).
193222
*
@@ -201,7 +230,8 @@ protected TokenState getTokenState(Token t) {
201230
if (lifeTime.compareTo(expiryBuffer) <= 0) {
202231
return TokenState.EXPIRED;
203232
}
204-
if (lifeTime.compareTo(staleDuration) <= 0) {
233+
Duration usedStaleDuration = useDynamicStaleDuration ? dynamicStaleDuration : staleDuration;
234+
if (lifeTime.compareTo(usedStaleDuration) <= 0) {
205235
return TokenState.STALE;
206236
}
207237
return TokenState.FRESH;
@@ -235,6 +265,11 @@ protected Token getTokenBlocking() {
235265
throw e;
236266
}
237267
lastRefreshSucceeded = true;
268+
269+
// Update the stale duration only after ensuring that the token is refreshed successfully.
270+
if (useDynamicStaleDuration) {
271+
dynamicStaleDuration = computeStaleDuration(token);
272+
}
238273
return token;
239274
}
240275
}
@@ -281,6 +316,11 @@ private synchronized void triggerAsyncRefresh() {
281316
synchronized (this) {
282317
token = newToken;
283318
refreshInProgress = false;
319+
320+
// Update the stale duration only after ensuring that the token is refreshed successfully.
321+
if (useDynamicStaleDuration) {
322+
dynamicStaleDuration = computeStaleDuration(token);
323+
}
284324
}
285325
} catch (Exception e) {
286326
synchronized (this) {

databricks-sdk-java/src/test/java/com/databricks/sdk/core/oauth/CachedTokenSourceTest.java

Lines changed: 37 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,38 +17,57 @@ public class CachedTokenSourceTest {
1717
private static final String TOKEN_TYPE = "Bearer";
1818
private static final String INITIAL_TOKEN = "initial-token";
1919
private static final String REFRESH_TOKEN = "refreshed-token";
20+
2021
private static final long FRESH_MINUTES = 10;
21-
private static final long STALE_MINUTES = 1;
22+
23+
// Token TTL for the stale scenario: 4 minutes.
24+
// dynamicStaleDuration = min(4/2, 20) = 2 min.
25+
// After advancing the clock by STALE_ADVANCE_MINUTES = 3, lifeTime = 1 min.
26+
// 1 min ≤ 2 min (stale) and 1 min > 40s (not expired) → STALE.
27+
private static final long STALE_MINUTES = 4;
28+
private static final long STALE_ADVANCE_MINUTES = 3;
29+
30+
// Token TTL for the capped stale duration scenario: 60 minutes.
31+
// dynamicStaleDuration = min(60/2, 20) = 20 min (MAX_STALE_DURATION cap).
32+
// After advancing the clock by CAPPED_STALE_ADVANCE_MINUTES = 41, lifeTime = 19 min.
33+
// 19 min ≤ 20 min (stale) and 19 min > 40s (not expired) → STALE.
34+
private static final long CAPPED_STALE_MINUTES = 60;
35+
private static final long CAPPED_STALE_ADVANCE_MINUTES = 41;
36+
2237
private static final long EXPIRED_MINUTES = -1;
2338

2439
private static Stream<Arguments> provideAsyncRefreshScenarios() {
2540
return Stream.of(
26-
Arguments.of("Fresh token, async enabled", FRESH_MINUTES, false, false, INITIAL_TOKEN),
27-
Arguments.of("Stale token, async enabled", STALE_MINUTES, false, true, INITIAL_TOKEN),
28-
Arguments.of("Expired token, async enabled", EXPIRED_MINUTES, false, true, REFRESH_TOKEN),
29-
Arguments.of("Fresh token, async disabled", FRESH_MINUTES, true, false, INITIAL_TOKEN),
30-
Arguments.of("Stale token, async disabled", STALE_MINUTES, true, false, INITIAL_TOKEN),
31-
Arguments.of("Expired token, async disabled", EXPIRED_MINUTES, true, true, REFRESH_TOKEN));
41+
Arguments.of("Fresh token, async enabled", FRESH_MINUTES, 0L, false, false, INITIAL_TOKEN),
42+
Arguments.of("Stale token, async enabled", STALE_MINUTES, STALE_ADVANCE_MINUTES, false, true, INITIAL_TOKEN),
43+
Arguments.of("Expired token, async enabled", EXPIRED_MINUTES, 0L, false, true, REFRESH_TOKEN),
44+
Arguments.of("Fresh token, async disabled", FRESH_MINUTES, 0L, true, false, INITIAL_TOKEN),
45+
Arguments.of("Stale token, async disabled", STALE_MINUTES, STALE_ADVANCE_MINUTES, true, false, INITIAL_TOKEN),
46+
Arguments.of("Stale token, capped stale duration, async enabled", CAPPED_STALE_MINUTES, CAPPED_STALE_ADVANCE_MINUTES, false, true, INITIAL_TOKEN),
47+
Arguments.of("Expired token, async disabled", EXPIRED_MINUTES, 0L, true, true, REFRESH_TOKEN));
3248
}
3349

3450
@ParameterizedTest(name = "{0}")
3551
@MethodSource("provideAsyncRefreshScenarios")
3652
void testAsyncRefreshParametrized(
3753
String testName,
3854
long minutesUntilExpiry,
55+
long clockAdvanceMinutes,
3956
boolean asyncDisabled,
4057
boolean expectRefresh,
4158
String expectedToken)
4259
throws Exception {
4360

61+
TestClockSupplier clockSupplier = new TestClockSupplier(Instant.now());
62+
4463
Token initialToken =
4564
new Token(
4665
INITIAL_TOKEN,
4766
TOKEN_TYPE,
4867
null,
49-
Instant.now().plus(Duration.ofMinutes(minutesUntilExpiry)));
68+
Instant.now(clockSupplier.getClock()).plus(Duration.ofMinutes(minutesUntilExpiry)));
5069
Token refreshedToken =
51-
new Token(REFRESH_TOKEN, TOKEN_TYPE, null, Instant.now().plus(Duration.ofMinutes(10)));
70+
new Token(REFRESH_TOKEN, TOKEN_TYPE, null, Instant.now(clockSupplier.getClock()).plus(Duration.ofMinutes(10)));
5271
CountDownLatch refreshCalled = new CountDownLatch(1);
5372

5473
TokenSource tokenSource =
@@ -69,8 +88,12 @@ public Token getToken() {
6988
new CachedTokenSource.Builder(tokenSource)
7089
.setAsyncDisabled(asyncDisabled)
7190
.setToken(initialToken)
91+
.setClockSupplier(clockSupplier)
7292
.build();
7393

94+
// Advance the clock to put the token in the expected state before calling getToken().
95+
clockSupplier.advanceTime(Duration.ofMinutes(clockAdvanceMinutes));
96+
7497
Token token = source.getToken();
7598

7699
boolean refreshed = refreshCalled.await(1, TimeUnit.SECONDS);
@@ -90,13 +113,13 @@ void testAsyncRefreshFailureFallback() throws Exception {
90113
// Create a mutable clock supplier that we can control
91114
TestClockSupplier clockSupplier = new TestClockSupplier(Instant.now());
92115

93-
// Create a token that will be stale (2 minutes until expiry)
116+
// Create a token with a TTL of 4 minutes that will be stale in 3 minutes.
94117
Token staleToken =
95118
new Token(
96119
INITIAL_TOKEN,
97120
TOKEN_TYPE,
98121
null,
99-
Instant.now(clockSupplier.getClock()).plus(Duration.ofMinutes(2)));
122+
Instant.now(clockSupplier.getClock()).plus(Duration.ofMinutes(4)));
100123

101124
class TestSource implements TokenSource {
102125
int refreshCallCount = 0;
@@ -132,6 +155,9 @@ public Token getToken() {
132155
.setClockSupplier(clockSupplier)
133156
.build();
134157

158+
// Advance clock to put the token in the stale window.
159+
clockSupplier.advanceTime(Duration.ofMinutes(3));
160+
135161
// First call triggers async refresh, which fails
136162
// Should return stale token immediately (async refresh)
137163
Token token = source.getToken();

0 commit comments

Comments
 (0)