diff --git a/go/client/metadatareq.go b/go/client/metadatareq.go index 822b3dd0..89a94a9b 100644 --- a/go/client/metadatareq.go +++ b/go/client/metadatareq.go @@ -78,7 +78,17 @@ func (c *Client) metadataRequest( if resp.err != nil { var isTernError bool ternError, isTernError = resp.err.(msgs.TernError) - if isTernError && attempts > 0 { + shouldCheckIdempotency := attempts > 0 + if (shid >=0) { + if reqBody.(msgs.ShardRequest).ShardRequestKind() == msgs.SAME_DIRECTORY_RENAME { + shouldCheckIdempotency = true + } + } else { + if reqBody.(msgs.CDCRequest).CDCRequestKind() == msgs.RENAME_FILE || reqBody.(msgs.CDCRequest).CDCRequestKind() == msgs.RENAME_DIRECTORY { + shouldCheckIdempotency = true + } + } + if isTernError && shouldCheckIdempotency { if shid >= 0 { ternError = c.checkRepeatedShardRequestError(log, reqBody.(msgs.ShardRequest), respBody.(msgs.ShardResponse), ternError) } else { diff --git a/go/client/shardreq.go b/go/client/shardreq.go index 3a76d43f..33094c38 100644 --- a/go/client/shardreq.go +++ b/go/client/shardreq.go @@ -67,6 +67,19 @@ func (c *Client) checkNewEdgeAfterRename( logger.Info("got mismatched current target (%v), giving up and returning original error", lookupResp.TargetId) return false } + nowNs := msgs.Now() + var delta msgs.TernTime + if nowNs > lookupResp.CreationTime { + delta = nowNs - lookupResp.CreationTime + } else { + delta = lookupResp.CreationTime - nowNs + } + const edgeRenameMaxFuzzNs = msgs.TernTime(60 * 1000 * 1000 * 1000) // 60 seconds + if delta > edgeRenameMaxFuzzNs { + logger.Info("got creation time %v too far from now %v (delta %v), giving up and returning original error", lookupResp.CreationTime, nowNs, delta) + return false + } + *creationTime = lookupResp.CreationTime return true } diff --git a/kmod/metadata.c b/kmod/metadata.c index fc4037d2..ef2fb563 100644 --- a/kmod/metadata.c +++ b/kmod/metadata.c @@ -21,6 +21,8 @@ int ternfs_mtu = TERNFS_DEFAULT_MTU; int ternfs_default_mtu = TERNFS_DEFAULT_MTU; int ternfs_max_mtu = TERNFS_MAX_MTU; +#define TERNFS_RENAME_NEW_EDGE_CREATION_TIME_FUZZ_NS (60ULL * 1000 * 1000 * 1000) // 60 seconds + static DEFINE_PER_CPU(u64, next_request_id); static inline u64 alloc_request_id(void) { @@ -356,6 +358,13 @@ static bool check_new_edge_after_rename( consume_skb(skb); if (ctx.err != 0) { return false; } if (resp_target.x != target) { return false; } + + u64 now_ns = ktime_get_real_ns(); + u64 delta = now_ns > resp_creation_time.x ? now_ns - resp_creation_time.x : resp_creation_time.x - now_ns; + if (delta > TERNFS_RENAME_NEW_EDGE_CREATION_TIME_FUZZ_NS) { + return false; + } + *creation_time = resp_creation_time.x; } @@ -483,7 +492,7 @@ int ternfs_shard_rename( ternfs_same_directory_rename_resp_get_end(&ctx, resp_new_creation_time, end); ternfs_same_directory_rename_resp_get_finish(&ctx, end); bool recovered = false; - if (attempts > 1 && ctx.err == TERNFS_ERR_EDGE_NOT_FOUND) { + if (ctx.err == TERNFS_ERR_EDGE_NOT_FOUND) { ternfs_debug("got edge not found, performing followup checks"); // See commentary in shardreq.go if ( @@ -1239,7 +1248,7 @@ int ternfs_cdc_rename_directory( ternfs_rename_directory_resp_get_end(&ctx, resp_new_creation_time, end); ternfs_rename_directory_resp_get_finish(&ctx, end); bool recovered = false; - if (attempts > 1 && ctx.err == TERNFS_ERR_EDGE_NOT_FOUND) { + if (ctx.err == TERNFS_ERR_EDGE_NOT_FOUND) { ternfs_debug("got edge not found, performing followup checks"); // See commentary in shardreq.go if ( @@ -1290,7 +1299,7 @@ int ternfs_cdc_rename_file( ternfs_rename_file_resp_get_end(&ctx, resp_new_creation_time, end); ternfs_rename_file_resp_get_finish(&ctx, end); bool recovered = false; - if (attempts > 1 && ctx.err == TERNFS_ERR_EDGE_NOT_FOUND) { + if (ctx.err == TERNFS_ERR_EDGE_NOT_FOUND) { ternfs_debug("got edge not found, performing followup checks"); // See commentary in shardreq.go if (