From 79a392adbe0846b2b6144043530456ef632ab54d Mon Sep 17 00:00:00 2001 From: Miroslav Crnic Date: Mon, 12 Jan 2026 11:47:05 +0000 Subject: [PATCH] relaxed move idempotency requirements We used to only check if the move operation happened if we retried. With multiple regions which can retry independently, detecting if we retried from client becomes unreliable. In this change we relax the idempotency check to succeed if the requrested operation succeeded and the creation time of the new edge is "close enough" to current time. Currently close enough is 1 minute. Previously if 2 clients tried to move the same file to the same target concurrently, one would succeed and the other would get edge not found. With this change, both may succeed if they are close enough in time. --- go/client/metadatareq.go | 12 +++++++++++- go/client/shardreq.go | 13 +++++++++++++ kmod/metadata.c | 15 ++++++++++++--- 3 files changed, 36 insertions(+), 4 deletions(-) diff --git a/go/client/metadatareq.go b/go/client/metadatareq.go index 822b3dd0..89a94a9b 100644 --- a/go/client/metadatareq.go +++ b/go/client/metadatareq.go @@ -78,7 +78,17 @@ func (c *Client) metadataRequest( if resp.err != nil { var isTernError bool ternError, isTernError = resp.err.(msgs.TernError) - if isTernError && attempts > 0 { + shouldCheckIdempotency := attempts > 0 + if (shid >=0) { + if reqBody.(msgs.ShardRequest).ShardRequestKind() == msgs.SAME_DIRECTORY_RENAME { + shouldCheckIdempotency = true + } + } else { + if reqBody.(msgs.CDCRequest).CDCRequestKind() == msgs.RENAME_FILE || reqBody.(msgs.CDCRequest).CDCRequestKind() == msgs.RENAME_DIRECTORY { + shouldCheckIdempotency = true + } + } + if isTernError && shouldCheckIdempotency { if shid >= 0 { ternError = c.checkRepeatedShardRequestError(log, reqBody.(msgs.ShardRequest), respBody.(msgs.ShardResponse), ternError) } else { diff --git a/go/client/shardreq.go b/go/client/shardreq.go index 3a76d43f..33094c38 100644 --- a/go/client/shardreq.go +++ b/go/client/shardreq.go @@ -67,6 +67,19 @@ func (c *Client) checkNewEdgeAfterRename( logger.Info("got mismatched current target (%v), giving up and returning original error", lookupResp.TargetId) return false } + nowNs := msgs.Now() + var delta msgs.TernTime + if nowNs > lookupResp.CreationTime { + delta = nowNs - lookupResp.CreationTime + } else { + delta = lookupResp.CreationTime - nowNs + } + const edgeRenameMaxFuzzNs = msgs.TernTime(60 * 1000 * 1000 * 1000) // 60 seconds + if delta > edgeRenameMaxFuzzNs { + logger.Info("got creation time %v too far from now %v (delta %v), giving up and returning original error", lookupResp.CreationTime, nowNs, delta) + return false + } + *creationTime = lookupResp.CreationTime return true } diff --git a/kmod/metadata.c b/kmod/metadata.c index fc4037d2..ef2fb563 100644 --- a/kmod/metadata.c +++ b/kmod/metadata.c @@ -21,6 +21,8 @@ int ternfs_mtu = TERNFS_DEFAULT_MTU; int ternfs_default_mtu = TERNFS_DEFAULT_MTU; int ternfs_max_mtu = TERNFS_MAX_MTU; +#define TERNFS_RENAME_NEW_EDGE_CREATION_TIME_FUZZ_NS (60ULL * 1000 * 1000 * 1000) // 60 seconds + static DEFINE_PER_CPU(u64, next_request_id); static inline u64 alloc_request_id(void) { @@ -356,6 +358,13 @@ static bool check_new_edge_after_rename( consume_skb(skb); if (ctx.err != 0) { return false; } if (resp_target.x != target) { return false; } + + u64 now_ns = ktime_get_real_ns(); + u64 delta = now_ns > resp_creation_time.x ? now_ns - resp_creation_time.x : resp_creation_time.x - now_ns; + if (delta > TERNFS_RENAME_NEW_EDGE_CREATION_TIME_FUZZ_NS) { + return false; + } + *creation_time = resp_creation_time.x; } @@ -483,7 +492,7 @@ int ternfs_shard_rename( ternfs_same_directory_rename_resp_get_end(&ctx, resp_new_creation_time, end); ternfs_same_directory_rename_resp_get_finish(&ctx, end); bool recovered = false; - if (attempts > 1 && ctx.err == TERNFS_ERR_EDGE_NOT_FOUND) { + if (ctx.err == TERNFS_ERR_EDGE_NOT_FOUND) { ternfs_debug("got edge not found, performing followup checks"); // See commentary in shardreq.go if ( @@ -1239,7 +1248,7 @@ int ternfs_cdc_rename_directory( ternfs_rename_directory_resp_get_end(&ctx, resp_new_creation_time, end); ternfs_rename_directory_resp_get_finish(&ctx, end); bool recovered = false; - if (attempts > 1 && ctx.err == TERNFS_ERR_EDGE_NOT_FOUND) { + if (ctx.err == TERNFS_ERR_EDGE_NOT_FOUND) { ternfs_debug("got edge not found, performing followup checks"); // See commentary in shardreq.go if ( @@ -1290,7 +1299,7 @@ int ternfs_cdc_rename_file( ternfs_rename_file_resp_get_end(&ctx, resp_new_creation_time, end); ternfs_rename_file_resp_get_finish(&ctx, end); bool recovered = false; - if (attempts > 1 && ctx.err == TERNFS_ERR_EDGE_NOT_FOUND) { + if (ctx.err == TERNFS_ERR_EDGE_NOT_FOUND) { ternfs_debug("got edge not found, performing followup checks"); // See commentary in shardreq.go if (