Skip to content

Commit 1b86228

Browse files
Address PR review feedback on async refresh handling
- Move refreshInProgress reset to a finally block so a thrown exception in updateToken or cachedTokenIsNewer cannot permanently deadlock future async refreshes. - Update the memory-ordering comment in updateToken to acknowledge that handleFailedAsyncRefresh writes staleAfter without a subsequent volatile token write (consequence is at most one extra async trigger). - Add a comment in triggerAsyncRefresh explaining why only STALE (not EXPIRED) triggers an async attempt. - Add testAsyncRefreshDiscardsOlderToken to cover the cachedTokenIsNewer discard path where a blocking refresh installs a newer token while an async refresh is in flight.
1 parent 1ab6c27 commit 1b86228

File tree

2 files changed

+83
-5
lines changed

2 files changed

+83
-5
lines changed

databricks-sdk-java/src/main/java/com/databricks/sdk/core/oauth/CachedTokenSource.java

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,9 @@ private void updateToken(Token t) {
221221
}
222222

223223
// Publish the token after staleAfter so readers that observe the new token also observe the
224-
// stale threshold computed for that token.
224+
// stale threshold computed for that token. Note: handleFailedAsyncRefresh writes staleAfter
225+
// without a subsequent volatile token write, so a concurrent reader may briefly see a stale
226+
// staleAfter value; the only consequence is one extra async trigger, which is harmless.
225227
this.token = t;
226228
}
227229

@@ -353,8 +355,9 @@ protected Token getTokenAsync() {
353355
* succeeded.
354356
*/
355357
private synchronized void triggerAsyncRefresh() {
356-
// Check token state again inside the synchronized block to avoid triggering a refresh if
357-
// another thread updated the token in the meantime.
358+
// Re-check inside the synchronized block: another thread may have updated the token.
359+
// Only STALE triggers async refresh; EXPIRED tokens are handled by getTokenBlocking, so
360+
// an async attempt is unnecessary and would race with the blocking path.
358361
if (refreshInProgress || getTokenState(token) != TokenState.STALE) {
359362
return;
360363
}
@@ -368,14 +371,16 @@ private synchronized void triggerAsyncRefresh() {
368371
if (newToken != null && !cachedTokenIsNewer(newToken)) {
369372
updateToken(newToken);
370373
}
371-
refreshInProgress = false;
372374
}
373375
} catch (Exception e) {
374376
synchronized (this) {
375377
handleFailedAsyncRefresh();
376-
refreshInProgress = false;
377378
logger.error("Asynchronous token refresh failed", e);
378379
}
380+
} finally {
381+
synchronized (this) {
382+
refreshInProgress = false;
383+
}
379384
}
380385
});
381386
}

databricks-sdk-java/src/test/java/com/databricks/sdk/core/oauth/CachedTokenSourceTest.java

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,79 @@ void testGetTokenRetriesAfterAsyncBackoffElapsesAndUpdatesToken() throws Excepti
317317
assertEquals(2, refreshCallCount.get(), "The cache should retry exactly once after backoff");
318318
}
319319

320+
/**
321+
* Verifies that an async refresh result is discarded when the cache already holds a token with a
322+
* later expiry. This covers the concurrent scenario where a blocking refresh runs while an async
323+
* refresh is in flight: the async result is older and should not overwrite the newer cached token.
324+
*/
325+
@Test
326+
void testAsyncRefreshDiscardsOlderToken() throws Exception {
327+
TestClockSupplier clockSupplier = new TestClockSupplier(BASE_TIME);
328+
329+
Token olderRefreshToken =
330+
tokenExpiringAt("older-async-token", BASE_TIME.plus(Duration.ofMinutes(8)));
331+
Token newerBlockingToken =
332+
tokenExpiringAt("newer-blocking-token", BASE_TIME.plus(Duration.ofMinutes(20)));
333+
334+
CountDownLatch asyncRefreshStarted = new CountDownLatch(1);
335+
CountDownLatch allowAsyncToFinish = new CountDownLatch(1);
336+
AtomicInteger refreshCallCount = new AtomicInteger();
337+
338+
TokenSource tokenSource =
339+
() -> {
340+
int call = refreshCallCount.incrementAndGet();
341+
if (call == 1) {
342+
asyncRefreshStarted.countDown();
343+
try {
344+
allowAsyncToFinish.await(5, TimeUnit.SECONDS);
345+
} catch (InterruptedException e) {
346+
Thread.currentThread().interrupt();
347+
}
348+
return olderRefreshToken;
349+
}
350+
return newerBlockingToken;
351+
};
352+
353+
CachedTokenSource source =
354+
new CachedTokenSource.Builder(tokenSource)
355+
.setToken(tokenExpiringAt(INITIAL_TOKEN, BASE_TIME.plus(Duration.ofMinutes(10))))
356+
.setClockSupplier(clockSupplier)
357+
.build();
358+
359+
// Advance clock so the token is stale, triggering an async refresh.
360+
clockSupplier.advanceTime(Duration.ofMinutes(6));
361+
Token staleResult = source.getToken();
362+
assertEquals(INITIAL_TOKEN, staleResult.getAccessToken());
363+
assertTrue(
364+
asyncRefreshStarted.await(1, TimeUnit.SECONDS),
365+
"Async refresh should have started");
366+
367+
// While async refresh is blocked, advance time so the token expires and force a blocking
368+
// refresh that installs a newer token.
369+
clockSupplier.advanceTime(Duration.ofMinutes(4));
370+
Token blockingResult = source.getToken();
371+
assertEquals("newer-blocking-token", blockingResult.getAccessToken());
372+
373+
// Let the async refresh finish — its older token should be discarded.
374+
allowAsyncToFinish.countDown();
375+
awaitCondition(
376+
"refreshInProgress should be reset after the async refresh completes",
377+
() -> {
378+
try {
379+
Field f = CachedTokenSource.class.getDeclaredField("refreshInProgress");
380+
f.setAccessible(true);
381+
return !(boolean) f.get(source);
382+
} catch (Exception e) {
383+
throw new RuntimeException(e);
384+
}
385+
});
386+
387+
assertEquals(
388+
"newer-blocking-token",
389+
source.getToken().getAccessToken(),
390+
"The newer blocking token should still be cached after the older async result is discarded");
391+
}
392+
320393
/**
321394
* Builds a CachedTokenSource whose initial token is already stale. The clock is advanced past the
322395
* dynamic staleAfter threshold so the very first getToken call triggers an async refresh.

0 commit comments

Comments
 (0)