From e5fbae374c020b3edddeb6415e83b7cbaeaefe11 Mon Sep 17 00:00:00 2001
From: Schuyler Goodman <schgoo@microsoft.com>
Date: Wed, 10 Dec 2025 10:22:18 -0500
Subject: [PATCH 01/33] Initial commit of uniflight

---
 CHANGELOG.md                  |   1 +
 Cargo.lock                    |  45 ++++
 Cargo.toml                    |   9 +-
 README.md                     |   8 +-
 crates/uniflight/CHANGELOG.md |   1 +
 crates/uniflight/Cargo.toml   |  28 +++
 crates/uniflight/README.md    |  71 ++++++
 crates/uniflight/favicon.ico  |   3 +
 crates/uniflight/logo.png     |   3 +
 crates/uniflight/src/lib.rs   | 405 ++++++++++++++++++++++++++++++++++
 10 files changed, 570 insertions(+), 4 deletions(-)
 create mode 100644 crates/uniflight/CHANGELOG.md
 create mode 100644 crates/uniflight/Cargo.toml
 create mode 100644 crates/uniflight/README.md
 create mode 100644 crates/uniflight/favicon.ico
 create mode 100644 crates/uniflight/logo.png
 create mode 100644 crates/uniflight/src/lib.rs

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f9fe2e63..a826ab4d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,3 +14,4 @@ Please see each crate's change log below:
 - [`thread_aware`](./crates/thread_aware/CHANGELOG.md)
 - [`thread_aware_macros`](./crates/thread_aware_macros/CHANGELOG.md)
 - [`thread_aware_macros_impl`](./crates/thread_aware_macros_impl/CHANGELOG.md)
+- [`uniflight`](./crates/uniflight/CHANGELOG.md)
diff --git a/Cargo.lock b/Cargo.lock
index 382e0ddd..ade02e31 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -57,6 +57,15 @@ version = "2.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3"
 
+[[package]]
+name = "branches"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f11502672c5570f77f6bdf573332483f8475bab6a7fda00f1fae8ddb5a6245c0"
+dependencies = [
+ "rustc_version",
+]
+
 [[package]]
 name = "byteorder"
 version = "1.5.0"
@@ -200,6 +209,21 @@ dependencies = [
  "itertools 0.13.0",
 ]
 
+[[package]]
+name = "crossbeam-queue"
+version = "0.3.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115"
+dependencies = [
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-utils"
+version = "0.8.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
+
 [[package]]
 name = "crunchy"
 version = "0.2.4"
@@ -1282,6 +1306,16 @@ version = "0.2.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
 
+[[package]]
+name = "uniflight"
+version = "0.4.0"
+dependencies = [
+ "futures-util",
+ "parking_lot",
+ "tokio",
+ "xutex",
+]
+
 [[package]]
 name = "walkdir"
 version = "2.5.0"
@@ -1515,6 +1549,17 @@ version = "0.46.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59"
 
+[[package]]
+name = "xutex"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c6a2a824bda0270095d584b553e1f084652e5fc40ebf3945e78a14a2437e0c6"
+dependencies = [
+ "branches",
+ "crossbeam-queue",
+ "once_cell",
+]
+
 [[package]]
 name = "xxhash-rust"
 version = "0.8.15"
diff --git a/Cargo.toml b/Cargo.toml
index 4e113ec0..7647e8a1 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -37,6 +37,7 @@ testing_aids = { path = "crates/testing_aids", default-features = false, version
 thread_aware = { path = "crates/thread_aware", default-features = false, version = "0.4.0" }
 thread_aware_macros = { path = "crates/thread_aware_macros", default-features = false, version = "0.4.0" }
 thread_aware_macros_impl = { path = "crates/thread_aware_macros_impl", default-features = false, version = "0.4.0" }
+uniflight = { path = "crates/uniflight", default-features = false, version = "0.4.0" }
 
 # external dependencies
 alloc_tracker = { version = "0.5.9", default-features = false }
@@ -46,6 +47,7 @@ criterion = { version = "0.7.0", default-features = false }
 derive_more = { version = "2.0.1", default-features = false }
 duct = { version = "1.1.1", default-features = false }
 futures = { version = "0.3.31", default-features = false }
+futures-util = { version = "0.3.31", default-features = false }
 infinity_pool = { version = "0.8.1", default-features = false }
 insta = { version = "1.44.1", default-features = false }
 many_cpus = { version = "1.1.0", default-features = false }
@@ -54,6 +56,7 @@ mutants = { version = "0.0.3", default-features = false }
 new_zealand = { version = "1.0.1", default-features = false }
 nm = { version = "0.1.21", default-features = false }
 once_cell = { version = "1.21.3", default-features = false }
+parking_lot = { version = "0.12.5", default-features = false }
 pretty_assertions = { version = "1.4.1", default-features = false }
 prettyplease = { version = "0.2.37", default-features = false }
 proc-macro2 = { version = "1.0.103", default-features = false }
@@ -71,6 +74,7 @@ tracing = { version = "0.1.41", default-features = false }
 tracing-subscriber = { version = "0.3.20", default-features = false }
 trybuild = { version = "1.0.114", default-features = false }
 typeid = { version = "1.0.3", default-features = false }
+xutex = { version = "0.2.0", default-features = false }
 xxhash-rust = { version = "0.8.15", default-features = false }
 
 [workspace.lints.rust]
@@ -84,7 +88,10 @@ unsafe_op_in_unsafe_fn = "warn"
 unused_lifetimes = "warn"
 
 # Allow cfg attributes for coverage builds and docs.rs builds
-unexpected_cfgs = { level = "warn", check-cfg = ['cfg(coverage,coverage_nightly)', 'cfg(docsrs)'] }
+unexpected_cfgs = { level = "warn", check-cfg = [
+    'cfg(coverage,coverage_nightly)',
+    'cfg(docsrs)',
+] }
 
 [workspace.lints.clippy]
 cargo = { level = "warn", priority = -1 }
diff --git a/README.md b/README.md
index e794bce8..77359b4e 100644
--- a/README.md
+++ b/README.md
@@ -11,14 +11,15 @@
 
 This repository contains a set of crates that help you build robust highly scalable services in Rust.
 
-- [Crates](#crates)
-- [About this Repo](#about-this-repo)
+- [The Oxidizer Project](#the-oxidizer-project)
+  - [Crates](#crates)
+  - [About this Repo](#about-this-repo)
     - [Adding New Crates](#adding-new-crates)
     - [Publishing Crates](#publishing-crates)
     - [Documenting Crates](#documenting-crates)
     - [CI Workflows](#ci-workflows)
     - [Pull Request Gates](#pull-request-gates)
-- [Trademarks](#trademarks)
+  - [Trademarks](#trademarks)
 
 ## Crates
 
@@ -36,6 +37,7 @@ These are the crates built out of this repo:
 - [`thread_aware`](./crates/thread_aware/README.md) - Facilities to support thread-isolated state.
 - [`thread_aware_macros`](./crates/thread_aware_macros/README.md) - Macros for the `thread_aware` crate.
 - [`thread_aware_macros_impl`](./crates/thread_aware_macros_impl/README.md) - Macros for the `thread_aware` crate.
+- [`uniflight`](./crates/uniflight/README.md) - Coalesces duplicate async tasks into a single execution.
 
 ## About this Repo
 
diff --git a/crates/uniflight/CHANGELOG.md b/crates/uniflight/CHANGELOG.md
new file mode 100644
index 00000000..825c32f0
--- /dev/null
+++ b/crates/uniflight/CHANGELOG.md
@@ -0,0 +1 @@
+# Changelog
diff --git a/crates/uniflight/Cargo.toml b/crates/uniflight/Cargo.toml
new file mode 100644
index 00000000..76df9480
--- /dev/null
+++ b/crates/uniflight/Cargo.toml
@@ -0,0 +1,28 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+[package]
+name = "uniflight"
+description = "Coalesces multiple ongoing tasks into a leader which does the work, and follower tasks that wait on the result, to prevent duplicate I/O or other downstream overhead."
+version = "0.4.0"
+readme = "README.md"
+keywords = ["oxidizer", "singleflight"]
+categories = ["Concurrency"]
+
+edition.workspace = true
+rust-version.workspace = true
+authors.workspace = true
+license.workspace = true
+homepage.workspace = true
+repository.workspace = true
+
+[dependencies]
+parking_lot = { workspace = true, default-features = false }
+xutex = { workspace = true, default-features = false }
+
+[dev-dependencies]
+futures-util = { workspace = true, default-features = false, features = ["alloc", "std"] }
+tokio = { workspace = true, features = ["macros", "rt", "time", "rt-multi-thread"] }
+
+[lints]
+workspace = true
diff --git a/crates/uniflight/README.md b/crates/uniflight/README.md
new file mode 100644
index 00000000..cce07f50
--- /dev/null
+++ b/crates/uniflight/README.md
@@ -0,0 +1,71 @@
+<div align="center">
+ <img src="./logo.png" alt="Uniflight Logo" width="128">
+
+# Uniflight
+
+[![crate.io](https://img.shields.io/crates/v/uniflight.svg)](https://crates.io/crates/uniflight)
+[![docs.rs](https://docs.rs/uniflight/badge.svg)](https://docs.rs/uniflight)
+[![MSRV](https://img.shields.io/crates/msrv/uniflight)](https://crates.io/crates/uniflight)
+[![CI](https://github.com/microsoft/oxidizer/workflows/main/badge.svg)](https://github.com/microsoft/oxidizer/actions)
+[![Coverage](https://codecov.io/gh/microsoft/oxidizer/graph/badge.svg?token=FCUG0EL5TI)](https://codecov.io/gh/microsoft/oxidizer)
+[![License](https://img.shields.io/badge/license-MIT-blue.svg)](../LICENSE)
+
+</div>
+
+* [Summary](#summary)
+
+## Summary
+
+<!-- cargo-rdme start -->
+
+Coalesces duplicate async tasks into a single execution.
+
+This crate provides [`UniFlight`], a mechanism for deduplicating concurrent async operations.
+When multiple tasks request the same work (identified by a key), only the first task (the
+"leader") performs the actual work while subsequent tasks (the "followers") wait and receive
+a clone of the result.
+
+## When to Use
+
+Use `UniFlight` when you have expensive or rate-limited operations that may be requested
+concurrently with the same parameters:
+
+- **Cache population**: Prevent thundering herd when a cache entry expires
+- **API calls**: Deduplicate concurrent requests to the same endpoint
+- **Database queries**: Coalesce identical queries issued simultaneously
+- **File I/O**: Avoid reading the same file multiple times concurrently
+
+## Example
+
+```rust
+use uniflight::UniFlight;
+
+let group: UniFlight<&str, String> = UniFlight::new();
+
+// Multiple concurrent calls with the same key will share a single execution
+let result = group.work("user:123", || async {
+    // This expensive operation runs only once, even if called concurrently
+    "expensive_result".to_string()
+}).await;
+```
+
+## Cancellation and Panic Safety
+
+`UniFlight` handles task cancellation and panics gracefully:
+
+- If the leader task is cancelled or dropped, a follower becomes the new leader
+- If the leader task panics, a follower becomes the new leader and executes its work
+- Followers that join before the leader completes receive the cached result
+
+## Thread Safety
+
+[`UniFlight`] is `Send` and `Sync`, and can be shared across threads. The returned futures
+do not require `Send` bounds on the closure or its output.
+
+<!-- cargo-rdme end -->
+
+<div style="font-size: 75%" ><hr/>
+
+This crate was developed as part of [The Oxidizer Project](https://github.com/microsoft/oxidizer).
+
+</div>
diff --git a/crates/uniflight/favicon.ico b/crates/uniflight/favicon.ico
new file mode 100644
index 00000000..ccae8114
--- /dev/null
+++ b/crates/uniflight/favicon.ico
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e48225cb42c8ac02df5dd4a9bdba29c1e8d10436cc27055d6a021bcb951d4145
+size 480683
diff --git a/crates/uniflight/logo.png b/crates/uniflight/logo.png
new file mode 100644
index 00000000..f2bd6691
--- /dev/null
+++ b/crates/uniflight/logo.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:63ad64ebb0185d7cd153acc291989d72e3a0094603d8bfe781a220861de247f2
+size 17876
diff --git a/crates/uniflight/src/lib.rs b/crates/uniflight/src/lib.rs
new file mode 100644
index 00000000..ccefb69c
--- /dev/null
+++ b/crates/uniflight/src/lib.rs
@@ -0,0 +1,405 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+// Based on singleflight-async by ihciah
+// Original: https://github.com/ihciah/singleflight-async
+// Licensed under MIT/Apache-2.0
+
+//! Coalesces duplicate async tasks into a single execution.
+//!
+//! This crate provides [`UniFlight`], a mechanism for deduplicating concurrent async operations.
+//! When multiple tasks request the same work (identified by a key), only the first task (the
+//! "leader") performs the actual work while subsequent tasks (the "followers") wait and receive
+//! a clone of the result.
+//!
+//! # When to Use
+//!
+//! Use `UniFlight` when you have expensive or rate-limited operations that may be requested
+//! concurrently with the same parameters:
+//!
+//! - **Cache population**: Prevent thundering herd when a cache entry expires
+//! - **API calls**: Deduplicate concurrent requests to the same endpoint
+//! - **Database queries**: Coalesce identical queries issued simultaneously
+//! - **File I/O**: Avoid reading the same file multiple times concurrently
+//!
+//! # Example
+//!
+//! ```
+//! use uniflight::UniFlight;
+//!
+//! # async fn example() {
+//! let group: UniFlight<&str, String> = UniFlight::new();
+//!
+//! // Multiple concurrent calls with the same key will share a single execution
+//! let result = group.work("user:123", || async {
+//!     // This expensive operation runs only once, even if called concurrently
+//!     "expensive_result".to_string()
+//! }).await;
+//! # }
+//! ```
+//!
+//! # Cancellation and Panic Safety
+//!
+//! `UniFlight` handles task cancellation and panics gracefully:
+//!
+//! - If the leader task is cancelled or dropped, a follower becomes the new leader
+//! - If the leader task panics, a follower becomes the new leader and executes its work
+//! - Followers that join before the leader completes receive the cached result
+//!
+//! # Thread Safety
+//!
+//! [`UniFlight`] is `Send` and `Sync`, and can be shared across threads. The returned futures
+//! do not require `Send` bounds on the closure or its output.
+
+#![doc(html_logo_url = "https://media.githubusercontent.com/media/microsoft/oxidizer/refs/heads/main/crates/uniflight/logo.png")]
+#![doc(html_favicon_url = "https://media.githubusercontent.com/media/microsoft/oxidizer/refs/heads/main/crates/uniflight/favicon.ico")]
+
+use std::{
+    collections::HashMap,
+    hash::Hash,
+    sync::{Arc, Weak},
+};
+
+use parking_lot::Mutex as SyncMutex;
+use xutex::AsyncMutex;
+
+type SharedMapping<K, T> = Arc<SyncMutex<HashMap<K, BroadcastOnce<T>>>>;
+
+/// UniFlight represents a class of work and creates a space in which units of work
+/// can be executed with duplicate suppression.
+#[derive(Debug)]
+pub struct UniFlight<K, T> {
+    mapping: SharedMapping<K, T>,
+}
+
+impl<K, T> Default for UniFlight<K, T> {
+    fn default() -> Self {
+        Self {
+            mapping: Default::default(),
+        }
+    }
+}
+
+struct Shared<T> {
+    slot: AsyncMutex<Option<T>>,
+}
+
+impl<T> Default for Shared<T> {
+    fn default() -> Self {
+        Self {
+            slot: AsyncMutex::new(None),
+        }
+    }
+}
+
+/// `BroadcastOnce` consists of shared slot and notify.
+#[derive(Clone)]
+struct BroadcastOnce<T> {
+    shared: Weak<Shared<T>>,
+}
+
+impl<T> BroadcastOnce<T> {
+    fn new() -> (Self, Arc<Shared<T>>) {
+        let shared = Arc::new(Shared::default());
+        (
+            Self {
+                shared: Arc::downgrade(&shared),
+            },
+            shared,
+        )
+    }
+}
+
+// After calling BroadcastOnce::waiter we can get a waiter.
+// It's in WaitList.
+struct BroadcastOnceWaiter<K, T, F> {
+    func: F,
+    shared: Arc<Shared<T>>,
+
+    key: K,
+    mapping: SharedMapping<K, T>,
+}
+
+impl<T> std::fmt::Debug for BroadcastOnce<T> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "BroadcastOnce")
+    }
+}
+
+#[allow(clippy::type_complexity)]
+impl<T> BroadcastOnce<T> {
+    fn try_waiter<K, F>(
+        &self,
+        func: F,
+        key: K,
+        mapping: SharedMapping<K, T>,
+    ) -> Result<BroadcastOnceWaiter<K, T, F>, (F, K, SharedMapping<K, T>)> {
+        let Some(upgraded) = self.shared.upgrade() else {
+            return Err((func, key, mapping));
+        };
+        Ok(BroadcastOnceWaiter {
+            func,
+            shared: upgraded,
+            key,
+            mapping,
+        })
+    }
+
+    #[inline]
+    const fn waiter<K, F>(shared: Arc<Shared<T>>, func: F, key: K, mapping: SharedMapping<K, T>) -> BroadcastOnceWaiter<K, T, F> {
+        BroadcastOnceWaiter {
+            func,
+            shared,
+            key,
+            mapping,
+        }
+    }
+}
+
+// We already in WaitList, so wait will be fine, we won't miss
+// anything after Waiter generated.
+impl<K, T, F, Fut> BroadcastOnceWaiter<K, T, F>
+where
+    K: Hash + Eq,
+    F: FnOnce() -> Fut,
+    Fut: Future<Output = T>,
+    T: Clone,
+{
+    async fn wait(self) -> T {
+        let mut slot = self.shared.slot.lock().await;
+        if let Some(value) = (*slot).as_ref() {
+            return value.clone();
+        }
+
+        let value = (self.func)().await;
+        *slot = Some(value.clone());
+
+        self.mapping.lock().remove(&self.key);
+
+        value
+    }
+}
+
+impl<K, T> UniFlight<K, T>
+where
+    K: Hash + Eq + Clone,
+{
+    /// Create a new BroadcastOnce to do work with.
+    #[inline]
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Execute and return the value for a given function, making sure that only one
+    /// operation is in-flight at a given moment. If a duplicate call comes in, that caller will
+    /// wait until the original call completes and return the same value.
+    pub fn work<F, Fut>(&self, key: K, func: F) -> impl Future<Output = T>
+    where
+        F: FnOnce() -> Fut,
+        Fut: Future<Output = T>,
+        T: Clone,
+    {
+        let owned_mapping = self.mapping.clone();
+        let mut mapping = self.mapping.lock();
+        let val = mapping.get_mut(&key);
+        match val {
+            Some(call) => {
+                let (func, key, owned_mapping) = match call.try_waiter(func, key, owned_mapping) {
+                    Ok(waiter) => return waiter.wait(),
+                    Err(fm) => fm,
+                };
+                let (new_call, shared) = BroadcastOnce::new();
+                *call = new_call;
+                let waiter = BroadcastOnce::waiter(shared, func, key, owned_mapping);
+                waiter.wait()
+            }
+            None => {
+                let (call, shared) = BroadcastOnce::new();
+                mapping.insert(key.clone(), call);
+                let waiter = BroadcastOnce::waiter(shared, func, key, owned_mapping);
+                waiter.wait()
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::{
+        sync::atomic::{
+            AtomicUsize,
+            Ordering::{AcqRel, Acquire},
+        },
+        time::Duration,
+    };
+
+    use futures_util::{StreamExt, stream::FuturesUnordered};
+
+    use super::*;
+
+    #[tokio::test]
+    async fn direct_call() {
+        let group = UniFlight::new();
+        let result = group
+            .work("key", || async {
+                tokio::time::sleep(Duration::from_millis(10)).await;
+                "Result".to_string()
+            })
+            .await;
+        assert_eq!(result, "Result");
+    }
+
+    #[tokio::test]
+    async fn parallel_call() {
+        let call_counter = AtomicUsize::default();
+
+        let group = UniFlight::new();
+        let futures = FuturesUnordered::new();
+        for _ in 0..10 {
+            futures.push(group.work("key", || async {
+                tokio::time::sleep(Duration::from_millis(100)).await;
+                call_counter.fetch_add(1, AcqRel);
+                "Result".to_string()
+            }));
+        }
+
+        assert!(futures.all(|out| async move { out == "Result" }).await);
+        assert_eq!(call_counter.load(Acquire), 1, "future should only be executed once");
+    }
+
+    #[tokio::test]
+    async fn parallel_call_seq_await() {
+        let call_counter = AtomicUsize::default();
+
+        let group = UniFlight::new();
+        let mut futures = Vec::new();
+        for _ in 0..10 {
+            futures.push(group.work("key", || async {
+                tokio::time::sleep(Duration::from_millis(100)).await;
+                call_counter.fetch_add(1, AcqRel);
+                "Result".to_string()
+            }));
+        }
+
+        for fut in futures.into_iter() {
+            assert_eq!(fut.await, "Result");
+        }
+        assert_eq!(call_counter.load(Acquire), 1, "future should only be executed once");
+    }
+
+    #[tokio::test]
+    async fn call_with_static_str_key() {
+        let group = UniFlight::new();
+        let result = group
+            .work("key".to_string(), || async {
+                tokio::time::sleep(Duration::from_millis(1)).await;
+                "Result".to_string()
+            })
+            .await;
+        assert_eq!(result, "Result");
+    }
+
+    #[tokio::test]
+    async fn call_with_static_string_key() {
+        let group = UniFlight::new();
+        let result = group
+            .work("key".to_string(), || async {
+                tokio::time::sleep(Duration::from_millis(1)).await;
+                "Result".to_string()
+            })
+            .await;
+        assert_eq!(result, "Result");
+    }
+
+    #[tokio::test]
+    async fn call_with_custom_key() {
+        #[derive(Clone, PartialEq, Eq, Hash)]
+        struct K(i32);
+        let group = UniFlight::new();
+        let result = group
+            .work(K(1), || async {
+                tokio::time::sleep(Duration::from_millis(1)).await;
+                "Result".to_string()
+            })
+            .await;
+        assert_eq!(result, "Result");
+    }
+
+    #[tokio::test]
+    async fn late_wait() {
+        let group = UniFlight::new();
+        let fut_early = group.work("key".to_string(), || async {
+            tokio::time::sleep(Duration::from_millis(20)).await;
+            "Result".to_string()
+        });
+        let fut_late = group.work("key".into(), || async { panic!("unexpected") });
+        assert_eq!(fut_early.await, "Result");
+        tokio::time::sleep(Duration::from_millis(50)).await;
+        assert_eq!(fut_late.await, "Result");
+    }
+
+    #[tokio::test]
+    async fn cancel() {
+        let group = UniFlight::new();
+
+        // the executer cancelled and the other awaiter will create a new future and execute.
+        let fut_cancel = group.work("key".to_string(), || async {
+            tokio::time::sleep(Duration::from_millis(2000)).await;
+            "Result1".to_string()
+        });
+        let _ = tokio::time::timeout(Duration::from_millis(10), fut_cancel).await;
+        let fut_late = group.work("key".to_string(), || async { "Result2".to_string() });
+        assert_eq!(fut_late.await, "Result2");
+
+        // the first executer is slow but not dropped, so the result will be the first ones.
+        let begin = tokio::time::Instant::now();
+        let fut_1 = group.work("key".to_string(), || async {
+            tokio::time::sleep(Duration::from_millis(2000)).await;
+            "Result1".to_string()
+        });
+        let fut_2 = group.work("key".to_string(), || async { panic!() });
+        let (v1, v2) = tokio::join!(fut_1, fut_2);
+        assert_eq!(v1, "Result1");
+        assert_eq!(v2, "Result1");
+        assert!(begin.elapsed() > Duration::from_millis(1500));
+    }
+
+    #[tokio::test]
+    async fn leader_panic_in_spawned_task() {
+        let call_counter = AtomicUsize::default();
+        let group: Arc<UniFlight<String, String>> = Arc::new(UniFlight::new());
+
+        // First task will panic in a spawned task (no catch_unwind)
+        let group_clone = group.clone();
+        let handle = tokio::spawn(async move {
+            group_clone
+                .work("key".to_string(), || async {
+                    tokio::time::sleep(Duration::from_millis(50)).await;
+                    panic!("leader panicked in spawned task");
+                    #[allow(unreachable_code)]
+                    "never".to_string()
+                })
+                .await
+        });
+
+        // Give time for the spawned task to register and start
+        tokio::time::sleep(Duration::from_millis(10)).await;
+
+        // Second task should become the new leader after the first panics
+        let group_clone = group.clone();
+        let call_counter_ref = &call_counter;
+        let fut_follower = group_clone.work("key".to_string(), || async {
+            call_counter_ref.fetch_add(1, AcqRel);
+            "Result".to_string()
+        });
+
+        // Wait for the spawned task to panic
+        let spawn_result = handle.await;
+        assert!(spawn_result.is_err(), "spawned task should have panicked");
+
+        // The follower should succeed - Rust's drop semantics ensure the mutex is released
+        let result = fut_follower.await;
+        assert_eq!(result, "Result");
+        assert_eq!(call_counter.load(Acquire), 1, "follower should have executed its work");
+    }
+}

From 9eb29c0746f7339349aa832980c0f4a6538f4748 Mon Sep 17 00:00:00 2001
From: Schuyler Goodman <schgoo@microsoft.com>
Date: Wed, 10 Dec 2025 10:29:25 -0500
Subject: [PATCH 02/33] Update docs

---
 Cargo.lock                                    |  2 +-
 Cargo.toml                                    |  2 +-
 crates/uniflight/CHANGELOG.md                 |  7 ++
 crates/uniflight/Cargo.toml                   |  2 +-
 crates/uniflight/examples/cache_population.rs | 67 +++++++++++++++++++
 5 files changed, 77 insertions(+), 3 deletions(-)
 create mode 100644 crates/uniflight/examples/cache_population.rs

diff --git a/Cargo.lock b/Cargo.lock
index ade02e31..1ac674a0 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1308,7 +1308,7 @@ checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
 
 [[package]]
 name = "uniflight"
-version = "0.4.0"
+version = "0.1.0"
 dependencies = [
  "futures-util",
  "parking_lot",
diff --git a/Cargo.toml b/Cargo.toml
index 7647e8a1..215aed85 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -37,7 +37,7 @@ testing_aids = { path = "crates/testing_aids", default-features = false, version
 thread_aware = { path = "crates/thread_aware", default-features = false, version = "0.4.0" }
 thread_aware_macros = { path = "crates/thread_aware_macros", default-features = false, version = "0.4.0" }
 thread_aware_macros_impl = { path = "crates/thread_aware_macros_impl", default-features = false, version = "0.4.0" }
-uniflight = { path = "crates/uniflight", default-features = false, version = "0.4.0" }
+uniflight = { path = "crates/uniflight", default-features = false, version = "0.1.0" }
 
 # external dependencies
 alloc_tracker = { version = "0.5.9", default-features = false }
diff --git a/crates/uniflight/CHANGELOG.md b/crates/uniflight/CHANGELOG.md
index 825c32f0..0906fd27 100644
--- a/crates/uniflight/CHANGELOG.md
+++ b/crates/uniflight/CHANGELOG.md
@@ -1 +1,8 @@
 # Changelog
+
+## [0.1.0] - 2025-12-10
+
+- 🧩 Miscellaneous
+
+  - Initial commit of uniflight
+
diff --git a/crates/uniflight/Cargo.toml b/crates/uniflight/Cargo.toml
index 76df9480..0539a483 100644
--- a/crates/uniflight/Cargo.toml
+++ b/crates/uniflight/Cargo.toml
@@ -4,7 +4,7 @@
 [package]
 name = "uniflight"
 description = "Coalesces multiple ongoing tasks into a leader which does the work, and follower tasks that wait on the result, to prevent duplicate I/O or other downstream overhead."
-version = "0.4.0"
+version = "0.1.0"
 readme = "README.md"
 keywords = ["oxidizer", "singleflight"]
 categories = ["Concurrency"]
diff --git a/crates/uniflight/examples/cache_population.rs b/crates/uniflight/examples/cache_population.rs
new file mode 100644
index 00000000..663dc334
--- /dev/null
+++ b/crates/uniflight/examples/cache_population.rs
@@ -0,0 +1,67 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+//! Demonstrates using UniFlight to prevent thundering herd when populating a cache.
+//!
+//! Multiple concurrent requests for the same cache key will share a single execution,
+//! with the first request (leader) performing the work and subsequent requests (followers)
+//! receiving a copy of the result.
+
+use std::{
+    sync::{
+        atomic::{AtomicUsize, Ordering},
+        Arc,
+    },
+    time::Duration,
+};
+
+use uniflight::UniFlight;
+
+#[tokio::main]
+async fn main() {
+    // Create a shared UniFlight instance for cache operations
+    let cache_group = Arc::new(UniFlight::<String, String>::new());
+
+    // Track how many times the work closure actually executes
+    let execution_count = Arc::new(AtomicUsize::new(0));
+
+    println!("Starting 5 concurrent requests for user:123...\n");
+
+    // Simulate 5 concurrent requests for the same user data
+    let mut handles = Vec::new();
+    for i in 1..=5 {
+        let group = cache_group.clone();
+        let counter = execution_count.clone();
+        let handle = tokio::spawn(async move {
+            let start = tokio::time::Instant::now();
+
+            let result = group
+                .work("user:123".to_string(), || async {
+                    let count = counter.fetch_add(1, Ordering::SeqCst) + 1;
+                    println!("  [Request {i}] I'm the leader! Fetching from database... (execution #{count})");
+
+                    // Simulate expensive database query
+                    tokio::time::sleep(Duration::from_millis(500)).await;
+
+                    "UserData(name: Alice, age: 30)".to_string()
+                })
+                .await;
+
+            let elapsed = start.elapsed();
+            println!("  [Request {i}] Got result in {elapsed:?}: {result}");
+        });
+
+        handles.push(handle);
+
+        // Stagger the requests slightly to see the deduplication in action
+        tokio::time::sleep(Duration::from_millis(10)).await;
+    }
+
+    // Wait for all requests to complete
+    for handle in handles {
+        handle.await.expect("Task panicked");
+    }
+
+    let total_executions = execution_count.load(Ordering::SeqCst);
+    println!("\nAll requests completed! Database query executed {total_executions} time(s) for 5 requests.");
+}

From e29c3f46bea0441d069c1e896fdbf1fe3fca6970 Mon Sep 17 00:00:00 2001
From: Schuyler Goodman <schgoo@microsoft.com>
Date: Wed, 10 Dec 2025 10:49:57 -0500
Subject: [PATCH 03/33] Fix clippy

---
 crates/uniflight/examples/cache_population.rs |  6 +-
 crates/uniflight/src/lib.rs                   | 55 +++++++++----------
 2 files changed, 30 insertions(+), 31 deletions(-)

diff --git a/crates/uniflight/examples/cache_population.rs b/crates/uniflight/examples/cache_population.rs
index 663dc334..b45b7ee3 100644
--- a/crates/uniflight/examples/cache_population.rs
+++ b/crates/uniflight/examples/cache_population.rs
@@ -1,7 +1,7 @@
 // Copyright (c) Microsoft Corporation.
 // Licensed under the MIT License.
 
-//! Demonstrates using UniFlight to prevent thundering herd when populating a cache.
+//! Demonstrates using `UniFlight` to prevent thundering herd when populating a cache.
 //!
 //! Multiple concurrent requests for the same cache key will share a single execution,
 //! with the first request (leader) performing the work and subsequent requests (followers)
@@ -30,8 +30,8 @@ async fn main() {
     // Simulate 5 concurrent requests for the same user data
     let mut handles = Vec::new();
     for i in 1..=5 {
-        let group = cache_group.clone();
-        let counter = execution_count.clone();
+        let group = Arc::clone(&cache_group);
+        let counter = Arc::clone(&execution_count);
         let handle = tokio::spawn(async move {
             let start = tokio::time::Instant::now();
 
diff --git a/crates/uniflight/src/lib.rs b/crates/uniflight/src/lib.rs
index ccefb69c..a0398d91 100644
--- a/crates/uniflight/src/lib.rs
+++ b/crates/uniflight/src/lib.rs
@@ -65,7 +65,7 @@ use xutex::AsyncMutex;
 
 type SharedMapping<K, T> = Arc<SyncMutex<HashMap<K, BroadcastOnce<T>>>>;
 
-/// UniFlight represents a class of work and creates a space in which units of work
+/// Represents a class of work and creates a space in which units of work
 /// can be executed with duplicate suppression.
 #[derive(Debug)]
 pub struct UniFlight<K, T> {
@@ -74,9 +74,7 @@ pub struct UniFlight<K, T> {
 
 impl<K, T> Default for UniFlight<K, T> {
     fn default() -> Self {
-        Self {
-            mapping: Default::default(),
-        }
+        Self { mapping: Arc::default() }
     }
 }
 
@@ -126,7 +124,10 @@ impl<T> std::fmt::Debug for BroadcastOnce<T> {
     }
 }
 
-#[allow(clippy::type_complexity)]
+#[expect(
+    clippy::type_complexity,
+    reason = "The Result type is complex but intentionally groups related items for the retry pattern"
+)]
 impl<T> BroadcastOnce<T> {
     fn try_waiter<K, F>(
         &self,
@@ -184,8 +185,9 @@ impl<K, T> UniFlight<K, T>
 where
     K: Hash + Eq + Clone,
 {
-    /// Create a new BroadcastOnce to do work with.
+    /// Creates a new `UniFlight` instance.
     #[inline]
+    #[must_use]
     pub fn new() -> Self {
         Self::default()
     }
@@ -199,26 +201,23 @@ where
         Fut: Future<Output = T>,
         T: Clone,
     {
-        let owned_mapping = self.mapping.clone();
+        let owned_mapping = Arc::clone(&self.mapping);
         let mut mapping = self.mapping.lock();
         let val = mapping.get_mut(&key);
-        match val {
-            Some(call) => {
-                let (func, key, owned_mapping) = match call.try_waiter(func, key, owned_mapping) {
-                    Ok(waiter) => return waiter.wait(),
-                    Err(fm) => fm,
-                };
-                let (new_call, shared) = BroadcastOnce::new();
-                *call = new_call;
-                let waiter = BroadcastOnce::waiter(shared, func, key, owned_mapping);
-                waiter.wait()
-            }
-            None => {
-                let (call, shared) = BroadcastOnce::new();
-                mapping.insert(key.clone(), call);
-                let waiter = BroadcastOnce::waiter(shared, func, key, owned_mapping);
-                waiter.wait()
-            }
+        if let Some(call) = val {
+            let (func, key, owned_mapping) = match call.try_waiter(func, key, owned_mapping) {
+                Ok(waiter) => return waiter.wait(),
+                Err(fm) => fm,
+            };
+            let (new_call, shared) = BroadcastOnce::new();
+            *call = new_call;
+            let waiter = BroadcastOnce::waiter(shared, func, key, owned_mapping);
+            waiter.wait()
+        } else {
+            let (call, shared) = BroadcastOnce::new();
+            mapping.insert(key.clone(), call);
+            let waiter = BroadcastOnce::waiter(shared, func, key, owned_mapping);
+            waiter.wait()
         }
     }
 }
@@ -281,7 +280,7 @@ mod tests {
             }));
         }
 
-        for fut in futures.into_iter() {
+        for fut in futures {
             assert_eq!(fut.await, "Result");
         }
         assert_eq!(call_counter.load(Acquire), 1, "future should only be executed once");
@@ -370,13 +369,13 @@ mod tests {
         let group: Arc<UniFlight<String, String>> = Arc::new(UniFlight::new());
 
         // First task will panic in a spawned task (no catch_unwind)
-        let group_clone = group.clone();
+        let group_clone = Arc::clone(&group);
         let handle = tokio::spawn(async move {
             group_clone
                 .work("key".to_string(), || async {
                     tokio::time::sleep(Duration::from_millis(50)).await;
                     panic!("leader panicked in spawned task");
-                    #[allow(unreachable_code)]
+                    #[expect(unreachable_code, reason = "Required to satisfy return type after panic")]
                     "never".to_string()
                 })
                 .await
@@ -386,7 +385,7 @@ mod tests {
         tokio::time::sleep(Duration::from_millis(10)).await;
 
         // Second task should become the new leader after the first panics
-        let group_clone = group.clone();
+        let group_clone = Arc::clone(&group);
         let call_counter_ref = &call_counter;
         let fut_follower = group_clone.work("key".to_string(), || async {
             call_counter_ref.fetch_add(1, AcqRel);

From 3f508a5a7c690e204e46960f95eebe5eb3ffcd24 Mon Sep 17 00:00:00 2001
From: Schuyler Goodman <schgoo@microsoft.com>
Date: Thu, 11 Dec 2025 09:47:33 -0500
Subject: [PATCH 04/33] Add logo

---
 crates/uniflight/logo.png | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/crates/uniflight/logo.png b/crates/uniflight/logo.png
index f2bd6691..90a48880 100644
--- a/crates/uniflight/logo.png
+++ b/crates/uniflight/logo.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:63ad64ebb0185d7cd153acc291989d72e3a0094603d8bfe781a220861de247f2
-size 17876
+oid sha256:ec6fa0a1a07f8c21fb9946bc8ea78f127f027554fd8ca67f0b296d8c3d234b68
+size 46554

From ff628d0320dca7881e8e5841aca3a84f4fe895ed Mon Sep 17 00:00:00 2001
From: Schuyler Goodman <schgoo@microsoft.com>
Date: Thu, 11 Dec 2025 10:42:52 -0500
Subject: [PATCH 05/33] Fix formatting

---
 crates/uniflight/examples/cache_population.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/uniflight/examples/cache_population.rs b/crates/uniflight/examples/cache_population.rs
index b45b7ee3..ab570f0f 100644
--- a/crates/uniflight/examples/cache_population.rs
+++ b/crates/uniflight/examples/cache_population.rs
@@ -9,8 +9,8 @@
 
 use std::{
     sync::{
-        atomic::{AtomicUsize, Ordering},
         Arc,
+        atomic::{AtomicUsize, Ordering},
     },
     time::Duration,
 };

From 8aa3ffb60354d42d09e48af8df50d2b0755e6881 Mon Sep 17 00:00:00 2001
From: Schuyler Goodman <schgoo@microsoft.com>
Date: Thu, 11 Dec 2025 12:14:34 -0500
Subject: [PATCH 06/33] Update favicon and code coverage

---
 .gitignore                   |  3 ++-
 crates/uniflight/favicon.ico |  4 ++--
 crates/uniflight/src/lib.rs  | 44 ++++++++++++++++++++++++++++--------
 3 files changed, 38 insertions(+), 13 deletions(-)

diff --git a/.gitignore b/.gitignore
index 20b91f6b..6368ac41 100644
--- a/.gitignore
+++ b/.gitignore
@@ -35,4 +35,5 @@ _manifest
 ARROW
 
 # Agent files
-.claude
\ No newline at end of file
+.claude
+CLAUDE.md
\ No newline at end of file
diff --git a/crates/uniflight/favicon.ico b/crates/uniflight/favicon.ico
index ccae8114..2ed275b8 100644
--- a/crates/uniflight/favicon.ico
+++ b/crates/uniflight/favicon.ico
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e48225cb42c8ac02df5dd4a9bdba29c1e8d10436cc27055d6a021bcb951d4145
-size 480683
+oid sha256:bd1ebeca79834229253d38008b7d0491947042e808664646f907472acbb9ba01
+size 15406
diff --git a/crates/uniflight/src/lib.rs b/crates/uniflight/src/lib.rs
index a0398d91..ff049afd 100644
--- a/crates/uniflight/src/lib.rs
+++ b/crates/uniflight/src/lib.rs
@@ -236,6 +236,10 @@ mod tests {
 
     use super::*;
 
+    fn unreachable_future() -> std::future::Pending<String> {
+        std::future::pending()
+    }
+
     #[tokio::test]
     async fn direct_call() {
         let group = UniFlight::new();
@@ -263,7 +267,7 @@ mod tests {
         }
 
         assert!(futures.all(|out| async move { out == "Result" }).await);
-        assert_eq!(call_counter.load(Acquire), 1, "future should only be executed once");
+        assert_eq!(call_counter.load(Acquire), 1);
     }
 
     #[tokio::test]
@@ -283,7 +287,7 @@ mod tests {
         for fut in futures {
             assert_eq!(fut.await, "Result");
         }
-        assert_eq!(call_counter.load(Acquire), 1, "future should only be executed once");
+        assert_eq!(call_counter.load(Acquire), 1);
     }
 
     #[tokio::test]
@@ -331,7 +335,7 @@ mod tests {
             tokio::time::sleep(Duration::from_millis(20)).await;
             "Result".to_string()
         });
-        let fut_late = group.work("key".into(), || async { panic!("unexpected") });
+        let fut_late = group.work("key".into(), unreachable_future);
         assert_eq!(fut_early.await, "Result");
         tokio::time::sleep(Duration::from_millis(50)).await;
         assert_eq!(fut_late.await, "Result");
@@ -342,10 +346,7 @@ mod tests {
         let group = UniFlight::new();
 
         // the executer cancelled and the other awaiter will create a new future and execute.
-        let fut_cancel = group.work("key".to_string(), || async {
-            tokio::time::sleep(Duration::from_millis(2000)).await;
-            "Result1".to_string()
-        });
+        let fut_cancel = group.work("key".to_string(), unreachable_future);
         let _ = tokio::time::timeout(Duration::from_millis(10), fut_cancel).await;
         let fut_late = group.work("key".to_string(), || async { "Result2".to_string() });
         assert_eq!(fut_late.await, "Result2");
@@ -356,7 +357,7 @@ mod tests {
             tokio::time::sleep(Duration::from_millis(2000)).await;
             "Result1".to_string()
         });
-        let fut_2 = group.work("key".to_string(), || async { panic!() });
+        let fut_2 = group.work("key".to_string(), unreachable_future);
         let (v1, v2) = tokio::join!(fut_1, fut_2);
         assert_eq!(v1, "Result1");
         assert_eq!(v2, "Result1");
@@ -394,11 +395,34 @@ mod tests {
 
         // Wait for the spawned task to panic
         let spawn_result = handle.await;
-        assert!(spawn_result.is_err(), "spawned task should have panicked");
+        assert!(spawn_result.is_err());
 
         // The follower should succeed - Rust's drop semantics ensure the mutex is released
         let result = fut_follower.await;
         assert_eq!(result, "Result");
-        assert_eq!(call_counter.load(Acquire), 1, "follower should have executed its work");
+        assert_eq!(call_counter.load(Acquire), 1);
+    }
+
+    #[tokio::test]
+    async fn debug_impl() {
+        let group: UniFlight<String, String> = UniFlight::new();
+
+        // Test Debug on empty group
+        let debug_str = format!("{:?}", group);
+        assert!(debug_str.contains("UniFlight"));
+
+        // Create a pending work item to populate the mapping with a BroadcastOnce
+        let fut = group.work("key".to_string(), || async {
+            tokio::time::sleep(Duration::from_millis(100)).await;
+            "Result".to_string()
+        });
+
+        // Debug should still work with entries in the mapping
+        let debug_str = format!("{:?}", group);
+        assert!(debug_str.contains("UniFlight"));
+        assert!(debug_str.contains("BroadcastOnce"));
+
+        // Complete the work
+        assert_eq!(fut.await, "Result");
     }
 }

From 6ce6e111bc46946513d870bd68855722b4c33def Mon Sep 17 00:00:00 2001
From: schgoo <138131263+schgoo@users.noreply.github.com>
Date: Thu, 11 Dec 2025 16:27:52 -0500
Subject: [PATCH 07/33] Update crates/uniflight/Cargo.toml

Co-authored-by: Martin Taillefer <geeknoid@users.noreply.github.com>
---
 crates/uniflight/Cargo.toml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/crates/uniflight/Cargo.toml b/crates/uniflight/Cargo.toml
index 0539a483..f62a9e1a 100644
--- a/crates/uniflight/Cargo.toml
+++ b/crates/uniflight/Cargo.toml
@@ -17,11 +17,11 @@ homepage.workspace = true
 repository.workspace = true
 
 [dependencies]
-parking_lot = { workspace = true, default-features = false }
-xutex = { workspace = true, default-features = false }
+parking_lot.workspace = true
+xutex.workspace = true
 
 [dev-dependencies]
-futures-util = { workspace = true, default-features = false, features = ["alloc", "std"] }
+futures-util = { workspace = true, features = ["alloc", "std"] }
 tokio = { workspace = true, features = ["macros", "rt", "time", "rt-multi-thread"] }
 
 [lints]

From 76e0edea2483351e4821800b852c6a56e5a06cd7 Mon Sep 17 00:00:00 2001
From: schgoo <138131263+schgoo@users.noreply.github.com>
Date: Thu, 11 Dec 2025 16:28:02 -0500
Subject: [PATCH 08/33] Update crates/uniflight/Cargo.toml

Co-authored-by: Martin Taillefer <geeknoid@users.noreply.github.com>
---
 crates/uniflight/Cargo.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/crates/uniflight/Cargo.toml b/crates/uniflight/Cargo.toml
index f62a9e1a..695b76bd 100644
--- a/crates/uniflight/Cargo.toml
+++ b/crates/uniflight/Cargo.toml
@@ -6,8 +6,8 @@ name = "uniflight"
 description = "Coalesces multiple ongoing tasks into a leader which does the work, and follower tasks that wait on the result, to prevent duplicate I/O or other downstream overhead."
 version = "0.1.0"
 readme = "README.md"
-keywords = ["oxidizer", "singleflight"]
-categories = ["Concurrency"]
+keywords = ["oxidizer", "coalescing", "stempede", "singleflight", "deduplication"]
+categories = ["concurrency"]
 
 edition.workspace = true
 rust-version.workspace = true

From e6e7458503a58d3ee5b75a537829ba6d2dd2a7bd Mon Sep 17 00:00:00 2001
From: Schuyler Goodman <schgoo@microsoft.com>
Date: Tue, 16 Dec 2025 16:39:07 -0500
Subject: [PATCH 09/33] Move tests to separate file

---
 crates/uniflight/src/lib.rs    | 205 --------------------------------
 crates/uniflight/tests/work.rs | 208 +++++++++++++++++++++++++++++++++
 2 files changed, 208 insertions(+), 205 deletions(-)
 create mode 100644 crates/uniflight/tests/work.rs

diff --git a/crates/uniflight/src/lib.rs b/crates/uniflight/src/lib.rs
index ff049afd..f5167415 100644
--- a/crates/uniflight/src/lib.rs
+++ b/crates/uniflight/src/lib.rs
@@ -221,208 +221,3 @@ where
         }
     }
 }
-
-#[cfg(test)]
-mod tests {
-    use std::{
-        sync::atomic::{
-            AtomicUsize,
-            Ordering::{AcqRel, Acquire},
-        },
-        time::Duration,
-    };
-
-    use futures_util::{StreamExt, stream::FuturesUnordered};
-
-    use super::*;
-
-    fn unreachable_future() -> std::future::Pending<String> {
-        std::future::pending()
-    }
-
-    #[tokio::test]
-    async fn direct_call() {
-        let group = UniFlight::new();
-        let result = group
-            .work("key", || async {
-                tokio::time::sleep(Duration::from_millis(10)).await;
-                "Result".to_string()
-            })
-            .await;
-        assert_eq!(result, "Result");
-    }
-
-    #[tokio::test]
-    async fn parallel_call() {
-        let call_counter = AtomicUsize::default();
-
-        let group = UniFlight::new();
-        let futures = FuturesUnordered::new();
-        for _ in 0..10 {
-            futures.push(group.work("key", || async {
-                tokio::time::sleep(Duration::from_millis(100)).await;
-                call_counter.fetch_add(1, AcqRel);
-                "Result".to_string()
-            }));
-        }
-
-        assert!(futures.all(|out| async move { out == "Result" }).await);
-        assert_eq!(call_counter.load(Acquire), 1);
-    }
-
-    #[tokio::test]
-    async fn parallel_call_seq_await() {
-        let call_counter = AtomicUsize::default();
-
-        let group = UniFlight::new();
-        let mut futures = Vec::new();
-        for _ in 0..10 {
-            futures.push(group.work("key", || async {
-                tokio::time::sleep(Duration::from_millis(100)).await;
-                call_counter.fetch_add(1, AcqRel);
-                "Result".to_string()
-            }));
-        }
-
-        for fut in futures {
-            assert_eq!(fut.await, "Result");
-        }
-        assert_eq!(call_counter.load(Acquire), 1);
-    }
-
-    #[tokio::test]
-    async fn call_with_static_str_key() {
-        let group = UniFlight::new();
-        let result = group
-            .work("key".to_string(), || async {
-                tokio::time::sleep(Duration::from_millis(1)).await;
-                "Result".to_string()
-            })
-            .await;
-        assert_eq!(result, "Result");
-    }
-
-    #[tokio::test]
-    async fn call_with_static_string_key() {
-        let group = UniFlight::new();
-        let result = group
-            .work("key".to_string(), || async {
-                tokio::time::sleep(Duration::from_millis(1)).await;
-                "Result".to_string()
-            })
-            .await;
-        assert_eq!(result, "Result");
-    }
-
-    #[tokio::test]
-    async fn call_with_custom_key() {
-        #[derive(Clone, PartialEq, Eq, Hash)]
-        struct K(i32);
-        let group = UniFlight::new();
-        let result = group
-            .work(K(1), || async {
-                tokio::time::sleep(Duration::from_millis(1)).await;
-                "Result".to_string()
-            })
-            .await;
-        assert_eq!(result, "Result");
-    }
-
-    #[tokio::test]
-    async fn late_wait() {
-        let group = UniFlight::new();
-        let fut_early = group.work("key".to_string(), || async {
-            tokio::time::sleep(Duration::from_millis(20)).await;
-            "Result".to_string()
-        });
-        let fut_late = group.work("key".into(), unreachable_future);
-        assert_eq!(fut_early.await, "Result");
-        tokio::time::sleep(Duration::from_millis(50)).await;
-        assert_eq!(fut_late.await, "Result");
-    }
-
-    #[tokio::test]
-    async fn cancel() {
-        let group = UniFlight::new();
-
-        // the executer cancelled and the other awaiter will create a new future and execute.
-        let fut_cancel = group.work("key".to_string(), unreachable_future);
-        let _ = tokio::time::timeout(Duration::from_millis(10), fut_cancel).await;
-        let fut_late = group.work("key".to_string(), || async { "Result2".to_string() });
-        assert_eq!(fut_late.await, "Result2");
-
-        // the first executer is slow but not dropped, so the result will be the first ones.
-        let begin = tokio::time::Instant::now();
-        let fut_1 = group.work("key".to_string(), || async {
-            tokio::time::sleep(Duration::from_millis(2000)).await;
-            "Result1".to_string()
-        });
-        let fut_2 = group.work("key".to_string(), unreachable_future);
-        let (v1, v2) = tokio::join!(fut_1, fut_2);
-        assert_eq!(v1, "Result1");
-        assert_eq!(v2, "Result1");
-        assert!(begin.elapsed() > Duration::from_millis(1500));
-    }
-
-    #[tokio::test]
-    async fn leader_panic_in_spawned_task() {
-        let call_counter = AtomicUsize::default();
-        let group: Arc<UniFlight<String, String>> = Arc::new(UniFlight::new());
-
-        // First task will panic in a spawned task (no catch_unwind)
-        let group_clone = Arc::clone(&group);
-        let handle = tokio::spawn(async move {
-            group_clone
-                .work("key".to_string(), || async {
-                    tokio::time::sleep(Duration::from_millis(50)).await;
-                    panic!("leader panicked in spawned task");
-                    #[expect(unreachable_code, reason = "Required to satisfy return type after panic")]
-                    "never".to_string()
-                })
-                .await
-        });
-
-        // Give time for the spawned task to register and start
-        tokio::time::sleep(Duration::from_millis(10)).await;
-
-        // Second task should become the new leader after the first panics
-        let group_clone = Arc::clone(&group);
-        let call_counter_ref = &call_counter;
-        let fut_follower = group_clone.work("key".to_string(), || async {
-            call_counter_ref.fetch_add(1, AcqRel);
-            "Result".to_string()
-        });
-
-        // Wait for the spawned task to panic
-        let spawn_result = handle.await;
-        assert!(spawn_result.is_err());
-
-        // The follower should succeed - Rust's drop semantics ensure the mutex is released
-        let result = fut_follower.await;
-        assert_eq!(result, "Result");
-        assert_eq!(call_counter.load(Acquire), 1);
-    }
-
-    #[tokio::test]
-    async fn debug_impl() {
-        let group: UniFlight<String, String> = UniFlight::new();
-
-        // Test Debug on empty group
-        let debug_str = format!("{:?}", group);
-        assert!(debug_str.contains("UniFlight"));
-
-        // Create a pending work item to populate the mapping with a BroadcastOnce
-        let fut = group.work("key".to_string(), || async {
-            tokio::time::sleep(Duration::from_millis(100)).await;
-            "Result".to_string()
-        });
-
-        // Debug should still work with entries in the mapping
-        let debug_str = format!("{:?}", group);
-        assert!(debug_str.contains("UniFlight"));
-        assert!(debug_str.contains("BroadcastOnce"));
-
-        // Complete the work
-        assert_eq!(fut.await, "Result");
-    }
-}
diff --git a/crates/uniflight/tests/work.rs b/crates/uniflight/tests/work.rs
new file mode 100644
index 00000000..c552862b
--- /dev/null
+++ b/crates/uniflight/tests/work.rs
@@ -0,0 +1,208 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+//! Integration tests for `UniFlight::work()`.
+
+use std::{
+    sync::{
+        Arc,
+        atomic::{
+            AtomicUsize,
+            Ordering::{AcqRel, Acquire},
+        },
+    },
+    time::Duration,
+};
+
+use futures_util::{StreamExt, stream::FuturesUnordered};
+use uniflight::UniFlight;
+
+fn unreachable_future() -> std::future::Pending<String> {
+    std::future::pending()
+}
+
+#[tokio::test]
+async fn direct_call() {
+    let group = UniFlight::new();
+    let result = group
+        .work("key", || async {
+            tokio::time::sleep(Duration::from_millis(10)).await;
+            "Result".to_string()
+        })
+        .await;
+    assert_eq!(result, "Result");
+}
+
+#[tokio::test]
+async fn parallel_call() {
+    let call_counter = AtomicUsize::default();
+
+    let group = UniFlight::new();
+    let futures = FuturesUnordered::new();
+    for _ in 0..10 {
+        futures.push(group.work("key", || async {
+            tokio::time::sleep(Duration::from_millis(100)).await;
+            call_counter.fetch_add(1, AcqRel);
+            "Result".to_string()
+        }));
+    }
+
+    assert!(futures.all(|out| async move { out == "Result" }).await);
+    assert_eq!(call_counter.load(Acquire), 1);
+}
+
+#[tokio::test]
+async fn parallel_call_seq_await() {
+    let call_counter = AtomicUsize::default();
+
+    let group = UniFlight::new();
+    let mut futures = Vec::new();
+    for _ in 0..10 {
+        futures.push(group.work("key", || async {
+            tokio::time::sleep(Duration::from_millis(100)).await;
+            call_counter.fetch_add(1, AcqRel);
+            "Result".to_string()
+        }));
+    }
+
+    for fut in futures {
+        assert_eq!(fut.await, "Result");
+    }
+    assert_eq!(call_counter.load(Acquire), 1);
+}
+
+#[tokio::test]
+async fn call_with_static_str_key() {
+    let group = UniFlight::new();
+    let result = group
+        .work("key".to_string(), || async {
+            tokio::time::sleep(Duration::from_millis(1)).await;
+            "Result".to_string()
+        })
+        .await;
+    assert_eq!(result, "Result");
+}
+
+#[tokio::test]
+async fn call_with_static_string_key() {
+    let group = UniFlight::new();
+    let result = group
+        .work("key".to_string(), || async {
+            tokio::time::sleep(Duration::from_millis(1)).await;
+            "Result".to_string()
+        })
+        .await;
+    assert_eq!(result, "Result");
+}
+
+#[tokio::test]
+async fn call_with_custom_key() {
+    #[derive(Clone, PartialEq, Eq, Hash)]
+    struct K(i32);
+    let group = UniFlight::new();
+    let result = group
+        .work(K(1), || async {
+            tokio::time::sleep(Duration::from_millis(1)).await;
+            "Result".to_string()
+        })
+        .await;
+    assert_eq!(result, "Result");
+}
+
+#[tokio::test]
+async fn late_wait() {
+    let group = UniFlight::new();
+    let fut_early = group.work("key".to_string(), || async {
+        tokio::time::sleep(Duration::from_millis(20)).await;
+        "Result".to_string()
+    });
+    let fut_late = group.work("key".into(), unreachable_future);
+    assert_eq!(fut_early.await, "Result");
+    tokio::time::sleep(Duration::from_millis(50)).await;
+    assert_eq!(fut_late.await, "Result");
+}
+
+#[tokio::test]
+async fn cancel() {
+    let group = UniFlight::new();
+
+    // the executer cancelled and the other awaiter will create a new future and execute.
+    let fut_cancel = group.work("key".to_string(), unreachable_future);
+    let _ = tokio::time::timeout(Duration::from_millis(10), fut_cancel).await;
+    let fut_late = group.work("key".to_string(), || async { "Result2".to_string() });
+    assert_eq!(fut_late.await, "Result2");
+
+    // the first executer is slow but not dropped, so the result will be the first ones.
+    let begin = tokio::time::Instant::now();
+    let fut_1 = group.work("key".to_string(), || async {
+        tokio::time::sleep(Duration::from_millis(2000)).await;
+        "Result1".to_string()
+    });
+    let fut_2 = group.work("key".to_string(), unreachable_future);
+    let (v1, v2) = tokio::join!(fut_1, fut_2);
+    assert_eq!(v1, "Result1");
+    assert_eq!(v2, "Result1");
+    assert!(begin.elapsed() > Duration::from_millis(1500));
+}
+
+#[tokio::test]
+async fn leader_panic_in_spawned_task() {
+    let call_counter = AtomicUsize::default();
+    let group: Arc<UniFlight<String, String>> = Arc::new(UniFlight::new());
+
+    // First task will panic in a spawned task (no catch_unwind)
+    let group_clone = Arc::clone(&group);
+    let handle = tokio::spawn(async move {
+        group_clone
+            .work("key".to_string(), || async {
+                tokio::time::sleep(Duration::from_millis(50)).await;
+                panic!("leader panicked in spawned task");
+                #[expect(unreachable_code, reason = "Required to satisfy return type after panic")]
+                "never".to_string()
+            })
+            .await
+    });
+
+    // Give time for the spawned task to register and start
+    tokio::time::sleep(Duration::from_millis(10)).await;
+
+    // Second task should become the new leader after the first panics
+    let group_clone = Arc::clone(&group);
+    let call_counter_ref = &call_counter;
+    let fut_follower = group_clone.work("key".to_string(), || async {
+        call_counter_ref.fetch_add(1, AcqRel);
+        "Result".to_string()
+    });
+
+    // Wait for the spawned task to panic
+    let spawn_result = handle.await;
+    assert!(spawn_result.is_err());
+
+    // The follower should succeed - Rust's drop semantics ensure the mutex is released
+    let result = fut_follower.await;
+    assert_eq!(result, "Result");
+    assert_eq!(call_counter.load(Acquire), 1);
+}
+
+#[tokio::test]
+async fn debug_impl() {
+    let group: UniFlight<String, String> = UniFlight::new();
+
+    // Test Debug on empty group
+    let debug_str = format!("{:?}", group);
+    assert!(debug_str.contains("UniFlight"));
+
+    // Create a pending work item to populate the mapping with a BroadcastOnce
+    let fut = group.work("key".to_string(), || async {
+        tokio::time::sleep(Duration::from_millis(100)).await;
+        "Result".to_string()
+    });
+
+    // Debug should still work with entries in the mapping
+    let debug_str = format!("{:?}", group);
+    assert!(debug_str.contains("UniFlight"));
+    assert!(debug_str.contains("BroadcastOnce"));
+
+    // Complete the work
+    assert_eq!(fut.await, "Result");
+}

From bf25a1a62165aa2eb247e0b542e5764583ec3aa8 Mon Sep 17 00:00:00 2001
From: Schuyler Goodman <schgoo@microsoft.com>
Date: Tue, 16 Dec 2025 17:07:47 -0500
Subject: [PATCH 10/33] Use tick

---
 Cargo.lock                                    |  1 +
 crates/uniflight/Cargo.toml                   | 11 ++++++++++-
 crates/uniflight/examples/cache_population.rs |  6 ++++--
 3 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index e4173727..de0be1e2 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1725,6 +1725,7 @@ version = "0.1.0"
 dependencies = [
  "futures-util",
  "parking_lot",
+ "tick",
  "tokio",
  "xutex",
 ]
diff --git a/crates/uniflight/Cargo.toml b/crates/uniflight/Cargo.toml
index f62a9e1a..b2d431ba 100644
--- a/crates/uniflight/Cargo.toml
+++ b/crates/uniflight/Cargo.toml
@@ -22,7 +22,16 @@ xutex.workspace = true
 
 [dev-dependencies]
 futures-util = { workspace = true, features = ["alloc", "std"] }
-tokio = { workspace = true, features = ["macros", "rt", "time", "rt-multi-thread"] }
+tick = { workspace = true, features = ["tokio"] }
+tokio = { workspace = true, features = [
+    "macros",
+    "rt",
+    "time",
+    "rt-multi-thread",
+] }
 
 [lints]
 workspace = true
+
+[[example]]
+name = "cache_population"
diff --git a/crates/uniflight/examples/cache_population.rs b/crates/uniflight/examples/cache_population.rs
index ab570f0f..a8dfa4d0 100644
--- a/crates/uniflight/examples/cache_population.rs
+++ b/crates/uniflight/examples/cache_population.rs
@@ -15,6 +15,7 @@ use std::{
     time::Duration,
 };
 
+use tick::Clock;
 use uniflight::UniFlight;
 
 #[tokio::main]
@@ -33,7 +34,8 @@ async fn main() {
         let group = Arc::clone(&cache_group);
         let counter = Arc::clone(&execution_count);
         let handle = tokio::spawn(async move {
-            let start = tokio::time::Instant::now();
+            let clock = Clock::new_tokio();
+            let start = clock.instant();
 
             let result = group
                 .work("user:123".to_string(), || async {
@@ -41,7 +43,7 @@ async fn main() {
                     println!("  [Request {i}] I'm the leader! Fetching from database... (execution #{count})");
 
                     // Simulate expensive database query
-                    tokio::time::sleep(Duration::from_millis(500)).await;
+                    clock.delay(Duration::from_millis(500)).await;
 
                     "UserData(name: Alice, age: 30)".to_string()
                 })

From e9697faef87153dc4703564692f6f95f238be598 Mon Sep 17 00:00:00 2001
From: Schuyler Goodman <schgoo@microsoft.com>
Date: Tue, 16 Dec 2025 18:46:51 -0500
Subject: [PATCH 11/33] Add N leaders functionality (for redundancy)

---
 crates/uniflight/README.md     |  25 ++++
 crates/uniflight/src/lib.rs    | 234 +++++++++++++++++++++++++++++----
 crates/uniflight/tests/work.rs |  88 +++++++++++++
 3 files changed, 321 insertions(+), 26 deletions(-)

diff --git a/crates/uniflight/README.md b/crates/uniflight/README.md
index cce07f50..b1575891 100644
--- a/crates/uniflight/README.md
+++ b/crates/uniflight/README.md
@@ -62,6 +62,31 @@ let result = group.work("user:123", || async {
 [`UniFlight`] is `Send` and `Sync`, and can be shared across threads. The returned futures
 do not require `Send` bounds on the closure or its output.
 
+## Multiple Leaders for Redundancy
+
+By default, `UniFlight` uses a single leader per key. For redundancy scenarios where you want
+multiple concurrent attempts at the same operation (using whichever completes first), use
+[`UniFlight::with_max_leaders`]:
+
+```rust
+use uniflight::UniFlight;
+
+// Allow up to 3 concurrent leaders for redundancy
+let group: UniFlight<&str, String> = UniFlight::with_max_leaders(3);
+
+// First 3 concurrent calls become leaders and execute in parallel.
+// The first leader to complete stores the result.
+// All callers (leaders and followers) receive that result.
+let result = group.work("key", || async {
+    "result".to_string()
+}).await;
+```
+
+This is useful when:
+- You want fault tolerance through redundant execution
+- Network latency varies and you want the fastest response
+- You're implementing speculative execution patterns
+
 <!-- cargo-rdme end -->
 
 <div style="font-size: 75%" ><hr/>
diff --git a/crates/uniflight/src/lib.rs b/crates/uniflight/src/lib.rs
index f5167415..b30a4be3 100644
--- a/crates/uniflight/src/lib.rs
+++ b/crates/uniflight/src/lib.rs
@@ -50,6 +50,33 @@
 //!
 //! [`UniFlight`] is `Send` and `Sync`, and can be shared across threads. The returned futures
 //! do not require `Send` bounds on the closure or its output.
+//!
+//! # Multiple Leaders for Redundancy
+//!
+//! By default, `UniFlight` uses a single leader per key. For redundancy scenarios where you want
+//! multiple concurrent attempts at the same operation (using whichever completes first), use
+//! [`UniFlight::with_max_leaders`]:
+//!
+//! ```
+//! use uniflight::UniFlight;
+//!
+//! # async fn example() {
+//! // Allow up to 3 concurrent leaders for redundancy
+//! let group: UniFlight<&str, String> = UniFlight::with_max_leaders(3);
+//!
+//! // First 3 concurrent calls become leaders and execute in parallel.
+//! // The first leader to complete stores the result.
+//! // All callers (leaders and followers) receive that result.
+//! let result = group.work("key", || async {
+//!     "result".to_string()
+//! }).await;
+//! # }
+//! ```
+//!
+//! This is useful when:
+//! - You want fault tolerance through redundant execution
+//! - Network latency varies and you want the fastest response
+//! - You're implementing speculative execution patterns
 
 #![doc(html_logo_url = "https://media.githubusercontent.com/media/microsoft/oxidizer/refs/heads/main/crates/uniflight/logo.png")]
 #![doc(html_favicon_url = "https://media.githubusercontent.com/media/microsoft/oxidizer/refs/heads/main/crates/uniflight/favicon.ico")]
@@ -57,7 +84,10 @@
 use std::{
     collections::HashMap,
     hash::Hash,
-    sync::{Arc, Weak},
+    sync::{
+        Arc, Weak,
+        atomic::{AtomicUsize, Ordering},
+    },
 };
 
 use parking_lot::Mutex as SyncMutex;
@@ -70,22 +100,66 @@ type SharedMapping<K, T> = Arc<SyncMutex<HashMap<K, BroadcastOnce<T>>>>;
 #[derive(Debug)]
 pub struct UniFlight<K, T> {
     mapping: SharedMapping<K, T>,
+    max_leaders: usize,
 }
 
 impl<K, T> Default for UniFlight<K, T> {
     fn default() -> Self {
-        Self { mapping: Arc::default() }
+        Self {
+            mapping: Arc::default(),
+            max_leaders: 1,
+        }
     }
 }
 
 struct Shared<T> {
     slot: AsyncMutex<Option<T>>,
+    leader_count: AtomicUsize,
+    max_leaders: usize,
 }
 
-impl<T> Default for Shared<T> {
-    fn default() -> Self {
+impl<T> Shared<T> {
+    fn new(max_leaders: usize) -> Self {
         Self {
             slot: AsyncMutex::new(None),
+            leader_count: AtomicUsize::new(0),
+            max_leaders,
+        }
+    }
+}
+
+/// RAII guard that decrements leader count on drop.
+struct LeaderGuard<T> {
+    shared: Option<Arc<Shared<T>>>,
+}
+
+impl<T> LeaderGuard<T> {
+    /// Try to claim a leader slot. Returns `Some(guard)` if successful, `None` if max leaders reached.
+    fn try_claim(shared: &Arc<Shared<T>>) -> Option<Self> {
+        let current = shared.leader_count.load(Ordering::Acquire);
+        if current < shared.max_leaders {
+            let prev = shared.leader_count.fetch_add(1, Ordering::AcqRel);
+            if prev < shared.max_leaders {
+                return Some(Self {
+                    shared: Some(Arc::clone(shared)),
+                });
+            }
+            // Race lost - another caller claimed the last slot
+            shared.leader_count.fetch_sub(1, Ordering::AcqRel);
+        }
+        None
+    }
+
+    /// Consume the guard without decrementing (called when leader successfully stores result).
+    fn disarm(mut self) -> Arc<Shared<T>> {
+        self.shared.take().expect("LeaderGuard shared already taken")
+    }
+}
+
+impl<T> Drop for LeaderGuard<T> {
+    fn drop(&mut self) {
+        if let Some(shared) = &self.shared {
+            shared.leader_count.fetch_sub(1, Ordering::AcqRel);
         }
     }
 }
@@ -97,8 +171,8 @@ struct BroadcastOnce<T> {
 }
 
 impl<T> BroadcastOnce<T> {
-    fn new() -> (Self, Arc<Shared<T>>) {
-        let shared = Arc::new(Shared::default());
+    fn new(max_leaders: usize) -> (Self, Arc<Shared<T>>) {
+        let shared = Arc::new(Shared::new(max_leaders));
         (
             Self {
                 shared: Arc::downgrade(&shared),
@@ -108,10 +182,18 @@ impl<T> BroadcastOnce<T> {
     }
 }
 
+/// Role of a caller in the work execution.
+enum Role<T, F> {
+    /// Leader executes the work closure.
+    Leader { func: F, guard: LeaderGuard<T> },
+    /// Follower waits for any leader's result. Keeps func for potential promotion.
+    Follower { func: F },
+}
+
 // After calling BroadcastOnce::waiter we can get a waiter.
 // It's in WaitList.
 struct BroadcastOnceWaiter<K, T, F> {
-    func: F,
+    role: Role<T, F>,
     shared: Arc<Shared<T>>,
 
     key: K,
@@ -129,27 +211,45 @@ impl<T> std::fmt::Debug for BroadcastOnce<T> {
     reason = "The Result type is complex but intentionally groups related items for the retry pattern"
 )]
 impl<T> BroadcastOnce<T> {
+    /// Attempts to create a waiter for an existing broadcast.
+    ///
+    /// Returns `Ok` with a waiter (either leader or follower role) if the broadcast is still active.
+    /// Returns `Err` if all leaders have dropped (weak reference upgrade failed).
     fn try_waiter<K, F>(
         &self,
         func: F,
         key: K,
         mapping: SharedMapping<K, T>,
     ) -> Result<BroadcastOnceWaiter<K, T, F>, (F, K, SharedMapping<K, T>)> {
-        let Some(upgraded) = self.shared.upgrade() else {
+        let Some(shared) = self.shared.upgrade() else {
             return Err((func, key, mapping));
         };
+
+        // Try to become a leader if slots are available
+        if let Some(guard) = LeaderGuard::try_claim(&shared) {
+            return Ok(BroadcastOnceWaiter {
+                role: Role::Leader { func, guard },
+                shared,
+                key,
+                mapping,
+            });
+        }
+
+        // Become a follower (keep func for potential promotion)
         Ok(BroadcastOnceWaiter {
-            func,
-            shared: upgraded,
+            role: Role::Follower { func },
+            shared,
             key,
             mapping,
         })
     }
 
-    #[inline]
-    const fn waiter<K, F>(shared: Arc<Shared<T>>, func: F, key: K, mapping: SharedMapping<K, T>) -> BroadcastOnceWaiter<K, T, F> {
+    /// Creates a waiter for a new broadcast entry (first caller always becomes leader).
+    fn leader_waiter<K, F>(shared: Arc<Shared<T>>, func: F, key: K, mapping: SharedMapping<K, T>) -> BroadcastOnceWaiter<K, T, F> {
+        // Safe to unwrap: new Shared starts at 0, max_leaders >= 1
+        let guard = LeaderGuard::try_claim(&shared).expect("first leader claim should always succeed");
         BroadcastOnceWaiter {
-            func,
+            role: Role::Leader { func, guard },
             shared,
             key,
             mapping,
@@ -167,34 +267,114 @@ where
     T: Clone,
 {
     async fn wait(self) -> T {
-        let mut slot = self.shared.slot.lock().await;
-        if let Some(value) = (*slot).as_ref() {
-            return value.clone();
+        let Self {
+            role,
+            shared,
+            key,
+            mapping,
+        } = self;
+        match role {
+            Role::Leader { func, guard } => Self::wait_as_leader(shared, key, mapping, func, guard).await,
+            Role::Follower { func } => Self::wait_as_follower(shared, key, mapping, func).await,
+        }
+    }
+
+    async fn wait_as_leader(shared: Arc<Shared<T>>, key: K, mapping: SharedMapping<K, T>, func: F, guard: LeaderGuard<T>) -> T {
+        // Lock the slot first - this ensures followers wait while we execute
+        let mut slot = shared.slot.lock().await;
+
+        // Check if another leader already stored a result
+        if let Some(value) = slot.as_ref() {
+            let result = value.clone();
+            drop(slot);
+            guard.disarm();
+            return result;
         }
 
-        let value = (self.func)().await;
+        // Execute the work while holding the lock
+        // This ensures followers block on lock().await until we're done
+        let value = func().await;
         *slot = Some(value.clone());
+        drop(slot);
 
-        self.mapping.lock().remove(&self.key);
+        // Clean up the mapping entry
+        mapping.lock().remove(&key);
 
+        // Disarm the guard (result is stored, count doesn't matter)
+        guard.disarm();
         value
     }
+
+    async fn wait_as_follower(shared: Arc<Shared<T>>, key: K, mapping: SharedMapping<K, T>, func: F) -> T {
+        // Wait for a result by acquiring the slot lock
+        // Leaders hold this lock during execution, so we'll block until one finishes
+        let slot = shared.slot.lock().await;
+        if let Some(value) = slot.as_ref() {
+            return value.clone();
+        }
+        drop(slot);
+
+        // No result and we acquired the lock - all leaders must have failed
+        // Promote ourselves to leader and execute
+        // Safe to unwrap: if we got here, leader_count == 0, and max_leaders >= 1
+        let guard = LeaderGuard::try_claim(&shared).expect("follower promotion should always succeed");
+        Self::wait_as_leader(shared, key, mapping, func, guard).await
+    }
 }
 
 impl<K, T> UniFlight<K, T>
 where
     K: Hash + Eq + Clone,
 {
-    /// Creates a new `UniFlight` instance.
+    /// Creates a new `UniFlight` instance with single-leader behavior.
     #[inline]
     #[must_use]
     pub fn new() -> Self {
         Self::default()
     }
 
-    /// Execute and return the value for a given function, making sure that only one
-    /// operation is in-flight at a given moment. If a duplicate call comes in, that caller will
-    /// wait until the original call completes and return the same value.
+    /// Creates a new `UniFlight` instance allowing up to `max_leaders` concurrent executions.
+    ///
+    /// When multiple tasks request the same work concurrently, up to `max_leaders` of them
+    /// will execute in parallel. The first to complete wins, and all other tasks (both
+    /// executing leaders and waiting followers) receive that result.
+    ///
+    /// This is useful for redundancy scenarios where you want multiple attempts at the
+    /// same operation and want to use whichever completes first.
+    ///
+    /// # Panics
+    ///
+    /// Panics if `max_leaders` is 0.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use uniflight::UniFlight;
+    ///
+    /// # async fn example() {
+    /// // Allow 3 concurrent leaders for redundancy
+    /// let group: UniFlight<&str, String> = UniFlight::with_max_leaders(3);
+    ///
+    /// // Up to 3 concurrent calls will execute in parallel
+    /// let result = group.work("key", || async {
+    ///     "result".to_string()
+    /// }).await;
+    /// # }
+    /// ```
+    #[inline]
+    #[must_use]
+    pub fn with_max_leaders(max_leaders: usize) -> Self {
+        assert!(max_leaders > 0, "max_leaders must be at least 1");
+        Self {
+            mapping: Arc::default(),
+            max_leaders,
+        }
+    }
+
+    /// Execute and return the value for a given function, making sure that only up to
+    /// `max_leaders` operations are in-flight at a given moment. If a duplicate call comes in
+    /// beyond the limit, that caller will wait until one of the leaders completes and return
+    /// the same value.
     pub fn work<F, Fut>(&self, key: K, func: F) -> impl Future<Output = T>
     where
         F: FnOnce() -> Fut,
@@ -209,14 +389,16 @@ where
                 Ok(waiter) => return waiter.wait(),
                 Err(fm) => fm,
             };
-            let (new_call, shared) = BroadcastOnce::new();
+            // All leaders dropped - create new broadcast entry
+            let (new_call, shared) = BroadcastOnce::new(self.max_leaders);
             *call = new_call;
-            let waiter = BroadcastOnce::waiter(shared, func, key, owned_mapping);
+            let waiter = BroadcastOnce::leader_waiter(shared, func, key, owned_mapping);
             waiter.wait()
         } else {
-            let (call, shared) = BroadcastOnce::new();
+            // New key - create broadcast entry and become first leader
+            let (call, shared) = BroadcastOnce::new(self.max_leaders);
             mapping.insert(key.clone(), call);
-            let waiter = BroadcastOnce::waiter(shared, func, key, owned_mapping);
+            let waiter = BroadcastOnce::leader_waiter(shared, func, key, owned_mapping);
             waiter.wait()
         }
     }
diff --git a/crates/uniflight/tests/work.rs b/crates/uniflight/tests/work.rs
index c552862b..a8e5956f 100644
--- a/crates/uniflight/tests/work.rs
+++ b/crates/uniflight/tests/work.rs
@@ -206,3 +206,91 @@ async fn debug_impl() {
     // Complete the work
     assert_eq!(fut.await, "Result");
 }
+
+// N-leader tests
+
+#[tokio::test]
+async fn with_max_leaders_basic() {
+    let group: UniFlight<&str, String> = UniFlight::with_max_leaders(3);
+    let result = group
+        .work("key", || async {
+            tokio::time::sleep(Duration::from_millis(10)).await;
+            "Result".to_string()
+        })
+        .await;
+    assert_eq!(result, "Result");
+}
+
+#[tokio::test]
+async fn multiple_leaders_all_get_same_result() {
+    let call_counter = AtomicUsize::default();
+
+    // Allow up to 3 concurrent leaders
+    let group = UniFlight::with_max_leaders(3);
+    let futures = FuturesUnordered::new();
+
+    // Start 5 concurrent calls - up to 3 become leaders, 2 become followers
+    for i in 0..5 {
+        let counter = &call_counter;
+        futures.push(group.work("key", move || async move {
+            tokio::time::sleep(Duration::from_millis(50)).await;
+            counter.fetch_add(1, AcqRel);
+            format!("Result-{i}")
+        }));
+    }
+
+    // All should complete with the same result (first to finish wins)
+    let results: Vec<_> = futures.collect().await;
+    let first_result = &results[0];
+    assert!(results.iter().all(|r| r == first_result));
+}
+
+#[tokio::test]
+async fn followers_get_first_leader_result() {
+    let group = UniFlight::with_max_leaders(2);
+
+    // Start first leader (slow)
+    let fut1 = group.work("key".to_string(), || async {
+        tokio::time::sleep(Duration::from_millis(100)).await;
+        "slow".to_string()
+    });
+
+    // Start second leader (fast)
+    let fut2 = group.work("key".to_string(), || async {
+        tokio::time::sleep(Duration::from_millis(10)).await;
+        "fast".to_string()
+    });
+
+    // Start followers (should get whichever leader finishes first)
+    let fut3 = group.work("key".to_string(), unreachable_future);
+    let fut4 = group.work("key".to_string(), unreachable_future);
+
+    // Note: Due to current implementation, leaders serialize on slot lock,
+    // so execution order is deterministic. The first to acquire the lock wins.
+    let (r1, r2, r3, r4) = tokio::join!(fut1, fut2, fut3, fut4);
+
+    // All should have the same result
+    assert_eq!(r1, r2);
+    assert_eq!(r2, r3);
+    assert_eq!(r3, r4);
+}
+
+#[tokio::test]
+async fn leader_cancel_with_multiple_leaders() {
+    let group: Arc<UniFlight<String, String>> = Arc::new(UniFlight::with_max_leaders(2));
+
+    // First leader will be cancelled
+    let group_clone = Arc::clone(&group);
+    let fut_cancel = group_clone.work("key".to_string(), unreachable_future);
+    let _ = tokio::time::timeout(Duration::from_millis(10), fut_cancel).await;
+
+    // Second leader should succeed
+    let result = group.work("key".to_string(), || async { "Success".to_string() }).await;
+    assert_eq!(result, "Success");
+}
+
+#[tokio::test]
+#[should_panic(expected = "max_leaders must be at least 1")]
+async fn with_max_leaders_zero_panics() {
+    let _group: UniFlight<&str, String> = UniFlight::with_max_leaders(0);
+}

From af7f96c5e4017012ca56477794e28e849e8b2e57 Mon Sep 17 00:00:00 2001
From: Schuyler Goodman <schgoo@microsoft.com>
Date: Wed, 17 Dec 2025 10:19:22 -0500
Subject: [PATCH 12/33] Refactoring to allow followers to unlock in parallel

---
 Cargo.lock                  | 50 +++++++++------------
 Cargo.toml                  |  1 +
 crates/uniflight/Cargo.toml |  2 +-
 crates/uniflight/src/lib.rs | 87 +++++++++++++++++++++++--------------
 4 files changed, 76 insertions(+), 64 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index de0be1e2..4a43b44d 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -66,15 +66,6 @@ version = "2.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3"
 
-[[package]]
-name = "branches"
-version = "0.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f11502672c5570f77f6bdf573332483f8475bab6a7fda00f1fae8ddb5a6245c0"
-dependencies = [
- "rustc_version",
-]
-
 [[package]]
 name = "bumpalo"
 version = "3.19.0"
@@ -206,6 +197,15 @@ version = "0.7.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
 
+[[package]]
+name = "concurrent-queue"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973"
+dependencies = [
+ "crossbeam-utils",
+]
+
 [[package]]
 name = "convert_case"
 version = "0.10.0"
@@ -263,15 +263,6 @@ dependencies = [
  "itertools 0.13.0",
 ]
 
-[[package]]
-name = "crossbeam-queue"
-version = "0.3.12"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115"
-dependencies = [
- "crossbeam-utils",
-]
-
 [[package]]
 name = "crossbeam-utils"
 version = "0.8.21"
@@ -386,6 +377,16 @@ version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
 
+[[package]]
+name = "event-listener"
+version = "5.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e13b66accf52311f30a0db42147dadea9850cb48cd070028831ae5f5d4b856ab"
+dependencies = [
+ "concurrent-queue",
+ "pin-project-lite",
+]
+
 [[package]]
 name = "find-msvc-tools"
 version = "0.1.5"
@@ -1723,11 +1724,11 @@ checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
 name = "uniflight"
 version = "0.1.0"
 dependencies = [
+ "event-listener",
  "futures-util",
  "parking_lot",
  "tick",
  "tokio",
- "xutex",
 ]
 
 [[package]]
@@ -2057,17 +2058,6 @@ version = "0.8.28"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3ae8337f8a065cfc972643663ea4279e04e7256de865aa66fe25cec5fb912d3f"
 
-[[package]]
-name = "xutex"
-version = "0.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9c6a2a824bda0270095d584b553e1f084652e5fc40ebf3945e78a14a2437e0c6"
-dependencies = [
- "branches",
- "crossbeam-queue",
- "once_cell",
-]
-
 [[package]]
 name = "xxhash-rust"
 version = "0.8.15"
diff --git a/Cargo.toml b/Cargo.toml
index c32633c7..75eca013 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -49,6 +49,7 @@ chrono-tz = { version = "0.10.4", default-features = false }
 criterion = { version = "0.7.0", default-features = false }
 derive_more = { version = "2.0.1", default-features = false }
 duct = { version = "1.1.1", default-features = false }
+event-listener = { version = "5.4.0", default-features = false }
 futures = { version = "0.3.31", default-features = false }
 futures-core = { version = "0.3.31", default-features = false }
 futures-util = { version = "0.3.31", default-features = false }
diff --git a/crates/uniflight/Cargo.toml b/crates/uniflight/Cargo.toml
index 74e0c9ea..f395f9bf 100644
--- a/crates/uniflight/Cargo.toml
+++ b/crates/uniflight/Cargo.toml
@@ -17,8 +17,8 @@ homepage.workspace = true
 repository.workspace = true
 
 [dependencies]
+event-listener.workspace = true
 parking_lot.workspace = true
-xutex.workspace = true
 
 [dev-dependencies]
 futures-util = { workspace = true, features = ["alloc", "std"] }
diff --git a/crates/uniflight/src/lib.rs b/crates/uniflight/src/lib.rs
index b30a4be3..58abbfbb 100644
--- a/crates/uniflight/src/lib.rs
+++ b/crates/uniflight/src/lib.rs
@@ -85,13 +85,13 @@ use std::{
     collections::HashMap,
     hash::Hash,
     sync::{
-        Arc, Weak,
+        Arc, OnceLock, Weak,
         atomic::{AtomicUsize, Ordering},
     },
 };
 
+use event_listener::Event;
 use parking_lot::Mutex as SyncMutex;
-use xutex::AsyncMutex;
 
 type SharedMapping<K, T> = Arc<SyncMutex<HashMap<K, BroadcastOnce<T>>>>;
 
@@ -113,15 +113,21 @@ impl<K, T> Default for UniFlight<K, T> {
 }
 
 struct Shared<T> {
-    slot: AsyncMutex<Option<T>>,
+    /// Result storage - written once by the winning leader, then lock-free reads.
+    result: OnceLock<T>,
+    /// Event for notifying waiters when result is ready or all leaders failed.
+    ready: Event,
+    /// Number of leaders currently executing.
     leader_count: AtomicUsize,
+    /// Maximum concurrent leaders.
     max_leaders: usize,
 }
 
 impl<T> Shared<T> {
     fn new(max_leaders: usize) -> Self {
         Self {
-            slot: AsyncMutex::new(None),
+            result: OnceLock::new(),
+            ready: Event::new(),
             leader_count: AtomicUsize::new(0),
             max_leaders,
         }
@@ -159,7 +165,11 @@ impl<T> LeaderGuard<T> {
 impl<T> Drop for LeaderGuard<T> {
     fn drop(&mut self) {
         if let Some(shared) = &self.shared {
-            shared.leader_count.fetch_sub(1, Ordering::AcqRel);
+            let prev = shared.leader_count.fetch_sub(1, Ordering::AcqRel);
+            // If we were the last leader and no result was stored, wake one follower for promotion.
+            if prev == 1 && shared.result.get().is_none() {
+                shared.ready.notify(1);
+            }
         }
     }
 }
@@ -280,45 +290,56 @@ where
     }
 
     async fn wait_as_leader(shared: Arc<Shared<T>>, key: K, mapping: SharedMapping<K, T>, func: F, guard: LeaderGuard<T>) -> T {
-        // Lock the slot first - this ensures followers wait while we execute
-        let mut slot = shared.slot.lock().await;
-
-        // Check if another leader already stored a result
-        if let Some(value) = slot.as_ref() {
-            let result = value.clone();
-            drop(slot);
+        // Check if another leader already stored a result (lock-free read).
+        if let Some(result) = shared.result.get() {
             guard.disarm();
-            return result;
+            return result.clone();
         }
 
-        // Execute the work while holding the lock
-        // This ensures followers block on lock().await until we're done
+        // Execute the work.
         let value = func().await;
-        *slot = Some(value.clone());
-        drop(slot);
 
-        // Clean up the mapping entry
-        mapping.lock().remove(&key);
+        // Try to store the result. First writer wins via OnceLock.
+        if shared.result.set(value.clone()).is_ok() {
+            // We stored the result - clean up the mapping entry.
+            mapping.lock().remove(&key);
+        }
 
-        // Disarm the guard (result is stored, count doesn't matter)
+        // Notify ALL waiting followers simultaneously.
+        shared.ready.notify(usize::MAX);
+
+        // Disarm the guard (result is stored, count doesn't matter).
         guard.disarm();
-        value
+
+        // Return our computed value, or the winning value if we lost the race.
+        shared.result.get().cloned().unwrap_or(value)
     }
 
     async fn wait_as_follower(shared: Arc<Shared<T>>, key: K, mapping: SharedMapping<K, T>, func: F) -> T {
-        // Wait for a result by acquiring the slot lock
-        // Leaders hold this lock during execution, so we'll block until one finishes
-        let slot = shared.slot.lock().await;
-        if let Some(value) = slot.as_ref() {
-            return value.clone();
-        }
-        drop(slot);
+        loop {
+            // Fast path: result already available (lock-free read).
+            if let Some(result) = shared.result.get() {
+                return result.clone();
+            }
+
+            // Register listener BEFORE checking state to avoid missed notifications.
+            let listener = shared.ready.listen();
+
+            // Double-check after registering.
+            if let Some(result) = shared.result.get() {
+                return result.clone();
+            }
 
-        // No result and we acquired the lock - all leaders must have failed
-        // Promote ourselves to leader and execute
-        // Safe to unwrap: if we got here, leader_count == 0, and max_leaders >= 1
-        let guard = LeaderGuard::try_claim(&shared).expect("follower promotion should always succeed");
-        Self::wait_as_leader(shared, key, mapping, func, guard).await
+            // Check if all leaders have failed and we need promotion.
+            if shared.leader_count.load(Ordering::Acquire) == 0 {
+                // All leaders failed - promote ourselves.
+                let guard = LeaderGuard::try_claim(&shared).expect("follower promotion should always succeed");
+                return Self::wait_as_leader(shared, key, mapping, func, guard).await;
+            }
+
+            // Wait for notification (in parallel with other followers).
+            listener.await;
+        }
     }
 }
 

From 9445788f0d5df361c554497c2bc1159237446c88 Mon Sep 17 00:00:00 2001
From: Schuyler Goodman <schgoo@microsoft.com>
Date: Wed, 7 Jan 2026 14:04:32 -0500
Subject: [PATCH 13/33] Update to use async-once and dashmap. Update benchmarks

---
 CHANGELOG.md                                  |   1 +
 Cargo.lock                                    |  65 ++-
 Cargo.toml                                    |   3 +
 README.md                                     |   6 +-
 crates/uniflight/Cargo.toml                   |  10 +-
 crates/uniflight/README.md                    |  43 +-
 crates/uniflight/benches/comparison.rs        | 269 +++++++++++
 crates/uniflight/examples/cache_population.rs |   4 +-
 crates/uniflight/src/lib.rs                   | 419 ++++--------------
 crates/uniflight/tests/work.rs                | 117 +----
 10 files changed, 438 insertions(+), 499 deletions(-)
 create mode 100644 crates/uniflight/benches/comparison.rs

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e19af40b..de44ad50 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,7 @@
 
 Please see each crate's change log below:
 
+- [`async_once`](./crates/async_once/CHANGELOG.md)
 - [`bytesbuf`](./crates/bytesbuf/CHANGELOG.md)
 - [`data_privacy`](./crates/data_privacy/CHANGELOG.md)
 - [`data_privacy_macros`](./crates/data_privacy_macros/CHANGELOG.md)
diff --git a/Cargo.lock b/Cargo.lock
index 4a43b44d..9f9e91f6 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -44,6 +44,12 @@ version = "1.0.100"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61"
 
+[[package]]
+name = "async-once-cell"
+version = "0.5.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4288f83726785267c6f2ef073a3d83dc3f9b81464e9f99898240cced85fce35a"
+
 [[package]]
 name = "autocfg"
 version = "1.5.0"
@@ -197,15 +203,6 @@ version = "0.7.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
 
-[[package]]
-name = "concurrent-queue"
-version = "2.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973"
-dependencies = [
- "crossbeam-utils",
-]
-
 [[package]]
 name = "convert_case"
 version = "0.10.0"
@@ -250,6 +247,7 @@ dependencies = [
  "serde",
  "serde_json",
  "tinytemplate",
+ "tokio",
  "walkdir",
 ]
 
@@ -275,6 +273,20 @@ version = "0.2.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"
 
+[[package]]
+name = "dashmap"
+version = "6.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf"
+dependencies = [
+ "cfg-if",
+ "crossbeam-utils",
+ "hashbrown 0.14.5",
+ "lock_api",
+ "once_cell",
+ "parking_lot_core",
+]
+
 [[package]]
 name = "data_privacy"
 version = "0.9.0"
@@ -377,16 +389,6 @@ version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
 
-[[package]]
-name = "event-listener"
-version = "5.4.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e13b66accf52311f30a0db42147dadea9850cb48cd070028831ae5f5d4b856ab"
-dependencies = [
- "concurrent-queue",
- "pin-project-lite",
-]
-
 [[package]]
 name = "find-msvc-tools"
 version = "0.1.5"
@@ -566,6 +568,12 @@ dependencies = [
  "byteorder",
 ]
 
+[[package]]
+name = "hashbrown"
+version = "0.14.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
+
 [[package]]
 name = "hashbrown"
 version = "0.16.1"
@@ -613,7 +621,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2"
 dependencies = [
  "equivalent",
- "hashbrown",
+ "hashbrown 0.16.1",
 ]
 
 [[package]]
@@ -1355,6 +1363,16 @@ version = "2.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa"
 
+[[package]]
+name = "singleflight-async"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ffae0d841b8012a86beec66a3f9c57b7b331a10366c764cd40bd6faebe3ad77c"
+dependencies = [
+ "parking_lot",
+ "tokio",
+]
+
 [[package]]
 name = "siphasher"
 version = "1.0.1"
@@ -1597,6 +1615,7 @@ version = "1.48.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ff360e02eab121e0bc37a2d3b4d4dc622e6eda3a8e5253d5435ecf5bd4c68408"
 dependencies = [
+ "parking_lot",
  "pin-project-lite",
  "tokio-macros",
 ]
@@ -1724,9 +1743,11 @@ checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
 name = "uniflight"
 version = "0.1.0"
 dependencies = [
- "event-listener",
+ "async-once-cell",
+ "criterion",
+ "dashmap",
  "futures-util",
- "parking_lot",
+ "singleflight-async",
  "tick",
  "tokio",
 ]
diff --git a/Cargo.toml b/Cargo.toml
index 75eca013..44eeb7a2 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -42,11 +42,13 @@ uniflight = { path = "crates/uniflight", default-features = false, version = "0.
 
 # external dependencies
 alloc_tracker = { version = "0.5.9", default-features = false }
+async-once-cell = { version = "0.5", default-features = false }
 anyhow = { version = "1.0.100", default-features = false }
 bytes = { version = "1.11.0", default-features = false }
 chrono = { version = "0.4.40", default-features = false }
 chrono-tz = { version = "0.10.4", default-features = false }
 criterion = { version = "0.7.0", default-features = false }
+dashmap = { version = "6.1", default-features = false }
 derive_more = { version = "2.0.1", default-features = false }
 duct = { version = "1.1.1", default-features = false }
 event-listener = { version = "5.4.0", default-features = false }
@@ -75,6 +77,7 @@ regex = { version = "1.12.2", default-features = false }
 serde = { version = "1.0.228", default-features = false }
 serde_core = { version = "1.0.224", default-features = false }
 serde_json = { version = "1.0.145", default-features = false }
+singleflight-async = { version = "0.2", default-features = false }
 smallvec = { version = "1.15.1", default-features = false }
 static_assertions = { version = "1.1.0", default-features = false }
 syn = { version = "2.0.111", default-features = false }
diff --git a/README.md b/README.md
index f58b939a..2177c3b8 100644
--- a/README.md
+++ b/README.md
@@ -25,6 +25,7 @@ This repository contains a set of crates that help you build robust highly scala
 
 These are the crates built out of this repo:
 
+- [`async_once`](./crates/async_once/README.md) - Async once-cell that executes initialization at most once with parallel follower notification.
 - [`bytesbuf`](./crates/bytesbuf/README.md) - Manipulate sequences of bytes for efficient I/O.
 - [`data_privacy`](./crates/data_privacy/README.md) - Mechanisms to classify, manipulate, and redact sensitive data.
 - [`data_privacy_macros`](./crates/data_privacy_macros/README.md) - Macros for the `data_privacy` crate.
@@ -37,11 +38,8 @@ These are the crates built out of this repo:
 - [`thread_aware`](./crates/thread_aware/README.md) - Facilities to support thread-isolated state.
 - [`thread_aware_macros`](./crates/thread_aware_macros/README.md) - Macros for the `thread_aware` crate.
 - [`thread_aware_macros_impl`](./crates/thread_aware_macros_impl/README.md) - Macros for the `thread_aware` crate.
-<<<<<<< HEAD
-- [`uniflight`](./crates/uniflight/README.md) - Coalesces duplicate async tasks into a single execution.
-=======
 - [`tick`](./crates/tick/README.md) - Provides primitives to interact with and manipulate machine time.
->>>>>>> 9dbba9df9e954e1b7f14be110feb4ab25da62e86
+- [`uniflight`](./crates/uniflight/README.md) - Coalesces duplicate async tasks into a single execution.
 
 ## About this Repo
 
diff --git a/crates/uniflight/Cargo.toml b/crates/uniflight/Cargo.toml
index f395f9bf..54e44f0c 100644
--- a/crates/uniflight/Cargo.toml
+++ b/crates/uniflight/Cargo.toml
@@ -17,11 +17,13 @@ homepage.workspace = true
 repository.workspace = true
 
 [dependencies]
-event-listener.workspace = true
-parking_lot.workspace = true
+async-once-cell.workspace = true
+dashmap.workspace = true
 
 [dev-dependencies]
+criterion = { workspace = true, features = ["async_tokio"] }
 futures-util = { workspace = true, features = ["alloc", "std"] }
+singleflight-async.workspace = true
 tick = { workspace = true, features = ["tokio"] }
 tokio = { workspace = true, features = [
     "macros",
@@ -33,5 +35,9 @@ tokio = { workspace = true, features = [
 [lints]
 workspace = true
 
+[[bench]]
+name = "comparison"
+harness = false
+
 [[example]]
 name = "cache_population"
diff --git a/crates/uniflight/README.md b/crates/uniflight/README.md
index b1575891..09d79345 100644
--- a/crates/uniflight/README.md
+++ b/crates/uniflight/README.md
@@ -20,14 +20,14 @@
 
 Coalesces duplicate async tasks into a single execution.
 
-This crate provides [`UniFlight`], a mechanism for deduplicating concurrent async operations.
+This crate provides [`Merger`], a mechanism for deduplicating concurrent async operations.
 When multiple tasks request the same work (identified by a key), only the first task (the
 "leader") performs the actual work while subsequent tasks (the "followers") wait and receive
 a clone of the result.
 
 ## When to Use
 
-Use `UniFlight` when you have expensive or rate-limited operations that may be requested
+Use `Merger` when you have expensive or rate-limited operations that may be requested
 concurrently with the same parameters:
 
 - **Cache population**: Prevent thundering herd when a cache entry expires
@@ -38,9 +38,9 @@ concurrently with the same parameters:
 ## Example
 
 ```rust
-use uniflight::UniFlight;
+use uniflight::Merger;
 
-let group: UniFlight<&str, String> = UniFlight::new();
+let group: Merger<&str, String> = Merger::new();
 
 // Multiple concurrent calls with the same key will share a single execution
 let result = group.work("user:123", || async {
@@ -51,7 +51,7 @@ let result = group.work("user:123", || async {
 
 ## Cancellation and Panic Safety
 
-`UniFlight` handles task cancellation and panics gracefully:
+`Merger` handles task cancellation and panics gracefully:
 
 - If the leader task is cancelled or dropped, a follower becomes the new leader
 - If the leader task panics, a follower becomes the new leader and executes its work
@@ -59,33 +59,20 @@ let result = group.work("user:123", || async {
 
 ## Thread Safety
 
-[`UniFlight`] is `Send` and `Sync`, and can be shared across threads. The returned futures
-do not require `Send` bounds on the closure or its output.
+[`Merger`] is `Send` and `Sync`, and can be shared across threads. The returned futures
+are `Send` when the closure, future, key, and value types are `Send`.
 
-## Multiple Leaders for Redundancy
+## Performance
 
-By default, `UniFlight` uses a single leader per key. For redundancy scenarios where you want
-multiple concurrent attempts at the same operation (using whichever completes first), use
-[`UniFlight::with_max_leaders`]:
+Benchmarks comparing `uniflight` against `singleflight-async` show the following characteristics:
 
-```rust
-use uniflight::UniFlight;
-
-// Allow up to 3 concurrent leaders for redundancy
-let group: UniFlight<&str, String> = UniFlight::with_max_leaders(3);
-
-// First 3 concurrent calls become leaders and execute in parallel.
-// The first leader to complete stores the result.
-// All callers (leaders and followers) receive that result.
-let result = group.work("key", || async {
-    "result".to_string()
-}).await;
-```
+- **Concurrent workloads** (10+ tasks): uniflight is 1.2-1.3x faster, demonstrating better scalability under contention
+- **Single calls**: singleflight-async has lower per-call overhead (~2x faster for individual operations)
+- **Multiple keys**: uniflight performs 1.3x faster when handling multiple distinct keys concurrently
 
-This is useful when:
-- You want fault tolerance through redundant execution
-- Network latency varies and you want the fastest response
-- You're implementing speculative execution patterns
+uniflight's DashMap-based architecture provides excellent scaling properties for high-concurrency scenarios,
+making it well-suited for production workloads with concurrent access patterns. For low-contention scenarios
+with predominantly single calls, the performance difference is minimal (sub-microsecond range).
 
 <!-- cargo-rdme end -->
 
diff --git a/crates/uniflight/benches/comparison.rs b/crates/uniflight/benches/comparison.rs
new file mode 100644
index 00000000..fd53077d
--- /dev/null
+++ b/crates/uniflight/benches/comparison.rs
@@ -0,0 +1,269 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+//! Benchmarks comparing uniflight against singleflight-async.
+
+use std::sync::{
+    Arc,
+    atomic::{AtomicU64, Ordering},
+};
+
+use criterion::{Criterion, criterion_group, criterion_main};
+
+// Benchmark 1: Single call (no contention)
+fn bench_single_call(c: &mut Criterion) {
+    let rt = tokio::runtime::Runtime::new().unwrap();
+
+    let mut group = c.benchmark_group("single_call");
+
+    // Use atomic counter for unique keys
+    static COUNTER1: AtomicU64 = AtomicU64::new(0);
+
+    // Our implementation - pre-create the merger
+    let our_merger = Arc::new(uniflight::Merger::<String, String>::new());
+    group.bench_function("uniflight", |b| {
+        b.to_async(&rt).iter(|| {
+            let merger = Arc::clone(&our_merger);
+            async move {
+                let key = format!("key_{}", COUNTER1.fetch_add(1, Ordering::Relaxed));
+                merger.work(key, || async { "value".to_string() }).await
+            }
+        });
+    });
+
+    // singleflight-async - pre-create the group
+    let their_group = Arc::new(singleflight_async::SingleFlight::<String, String>::new());
+    group.bench_function("singleflight_async", |b| {
+        b.to_async(&rt).iter(|| {
+            let group = Arc::clone(&their_group);
+            async move {
+                let key = format!("key_{}", COUNTER1.fetch_add(1, Ordering::Relaxed));
+                group.work(key, || async { "value".to_string() }).await
+            }
+        });
+    });
+
+    group.finish();
+}
+
+// Benchmark 2: Concurrent calls (10 tasks, same key)
+fn bench_concurrent_10(c: &mut Criterion) {
+    let rt = tokio::runtime::Runtime::new().unwrap();
+
+    let mut group = c.benchmark_group("concurrent_10_tasks");
+
+    // Use atomic counter for unique keys per iteration
+    static COUNTER2: AtomicU64 = AtomicU64::new(0);
+
+    // Our implementation - pre-create the merger
+    let our_merger = Arc::new(uniflight::Merger::<String, String>::new());
+    group.bench_function("uniflight", |b| {
+        b.to_async(&rt).iter(|| {
+            let merger = Arc::clone(&our_merger);
+            async move {
+                let key = format!("key_{}", COUNTER2.fetch_add(1, Ordering::Relaxed));
+                let handles: Vec<_> = (0..10)
+                    .map(|_| {
+                        let merger = Arc::clone(&merger);
+                        let key = key.clone();
+                        tokio::spawn(async move { merger.work(key, || async { "value".to_string() }).await })
+                    })
+                    .collect();
+
+                for handle in handles {
+                    handle.await.unwrap();
+                }
+            }
+        });
+    });
+
+    // singleflight-async - pre-create the group
+    let their_group = Arc::new(singleflight_async::SingleFlight::<String, String>::new());
+    group.bench_function("singleflight_async", |b| {
+        b.to_async(&rt).iter(|| {
+            let group = Arc::clone(&their_group);
+            async move {
+                let key = format!("key_{}", COUNTER2.fetch_add(1, Ordering::Relaxed));
+                let handles: Vec<_> = (0..10)
+                    .map(|_| {
+                        let group = Arc::clone(&group);
+                        let key = key.clone();
+                        tokio::spawn(async move { group.work(key, || async { "value".to_string() }).await })
+                    })
+                    .collect();
+
+                for handle in handles {
+                    handle.await.unwrap();
+                }
+            }
+        });
+    });
+
+    group.finish();
+}
+
+// Benchmark 3: High contention (100 tasks, same key)
+fn bench_concurrent_100(c: &mut Criterion) {
+    let rt = tokio::runtime::Runtime::new().unwrap();
+
+    let mut group = c.benchmark_group("concurrent_100_tasks");
+
+    // Use atomic counter for unique keys per iteration
+    static COUNTER3: AtomicU64 = AtomicU64::new(0);
+
+    // Our implementation - pre-create the merger
+    let our_merger = Arc::new(uniflight::Merger::<String, String>::new());
+    group.bench_function("uniflight", |b| {
+        b.to_async(&rt).iter(|| {
+            let merger = Arc::clone(&our_merger);
+            async move {
+                let key = format!("key_{}", COUNTER3.fetch_add(1, Ordering::Relaxed));
+                let handles: Vec<_> = (0..100)
+                    .map(|_| {
+                        let merger = Arc::clone(&merger);
+                        let key = key.clone();
+                        tokio::spawn(async move { merger.work(key, || async { "value".to_string() }).await })
+                    })
+                    .collect();
+
+                for handle in handles {
+                    handle.await.unwrap();
+                }
+            }
+        });
+    });
+
+    // singleflight-async - pre-create the group
+    let their_group = Arc::new(singleflight_async::SingleFlight::<String, String>::new());
+    group.bench_function("singleflight_async", |b| {
+        b.to_async(&rt).iter(|| {
+            let group = Arc::clone(&their_group);
+            async move {
+                let key = format!("key_{}", COUNTER3.fetch_add(1, Ordering::Relaxed));
+                let handles: Vec<_> = (0..100)
+                    .map(|_| {
+                        let group = Arc::clone(&group);
+                        let key = key.clone();
+                        tokio::spawn(async move { group.work(key, || async { "value".to_string() }).await })
+                    })
+                    .collect();
+
+                for handle in handles {
+                    handle.await.unwrap();
+                }
+            }
+        });
+    });
+
+    group.finish();
+}
+
+// Benchmark 4: Multiple different keys (10 keys, 10 tasks each)
+fn bench_multiple_keys(c: &mut Criterion) {
+    let rt = tokio::runtime::Runtime::new().unwrap();
+
+    let mut group = c.benchmark_group("multiple_keys_10x10");
+
+    // Use atomic counter for unique key prefix per iteration
+    static COUNTER4: AtomicU64 = AtomicU64::new(0);
+
+    // Our implementation - pre-create the merger
+    let our_merger = Arc::new(uniflight::Merger::<String, String>::new());
+    group.bench_function("uniflight", |b| {
+        b.to_async(&rt).iter(|| {
+            let merger = Arc::clone(&our_merger);
+            async move {
+                let iteration = COUNTER4.fetch_add(1, Ordering::Relaxed);
+                let handles: Vec<_> = (0..10)
+                    .flat_map(|key_id| {
+                        let merger = Arc::clone(&merger);
+                        (0..10).map(move |_| {
+                            let merger = Arc::clone(&merger);
+                            let key = format!("key_{}_{key_id}", iteration);
+                            tokio::spawn(async move { merger.work(key, || async { "value".to_string() }).await })
+                        })
+                    })
+                    .collect();
+
+                for handle in handles {
+                    handle.await.unwrap();
+                }
+            }
+        });
+    });
+
+    // singleflight-async - pre-create the group
+    let their_group = Arc::new(singleflight_async::SingleFlight::<String, String>::new());
+    group.bench_function("singleflight_async", |b| {
+        b.to_async(&rt).iter(|| {
+            let group = Arc::clone(&their_group);
+            async move {
+                let iteration = COUNTER4.fetch_add(1, Ordering::Relaxed);
+                let handles: Vec<_> = (0..10)
+                    .flat_map(|key_id| {
+                        let group = Arc::clone(&group);
+                        (0..10).map(move |_| {
+                            let group = Arc::clone(&group);
+                            let key = format!("key_{}_{key_id}", iteration);
+                            tokio::spawn(async move { group.work(key, || async { "value".to_string() }).await })
+                        })
+                    })
+                    .collect();
+
+                for handle in handles {
+                    handle.await.unwrap();
+                }
+            }
+        });
+    });
+
+    group.finish();
+}
+
+// Benchmark 5: Reuse existing group (pre-created, multiple operations)
+fn bench_reuse_group(c: &mut Criterion) {
+    let rt = tokio::runtime::Runtime::new().unwrap();
+
+    let mut group = c.benchmark_group("reuse_group");
+
+    // Use atomic counter for unique keys
+    static COUNTER5: AtomicU64 = AtomicU64::new(0);
+
+    // Our implementation - pre-create the merger
+    let our_merger = Arc::new(uniflight::Merger::<String, String>::new());
+    group.bench_function("uniflight", |b| {
+        b.to_async(&rt).iter(|| {
+            let merger = Arc::clone(&our_merger);
+            async move {
+                // Each iteration uses a unique key to avoid caching effects
+                let key = format!("key_{}", COUNTER5.fetch_add(1, Ordering::Relaxed));
+                merger.work(key, || async { "value".to_string() }).await
+            }
+        });
+    });
+
+    // singleflight-async - pre-create the group
+    let their_group = Arc::new(singleflight_async::SingleFlight::<String, String>::new());
+    group.bench_function("singleflight_async", |b| {
+        b.to_async(&rt).iter(|| {
+            let group = Arc::clone(&their_group);
+            async move {
+                let key = format!("key_{}", COUNTER5.fetch_add(1, Ordering::Relaxed));
+                group.work(key, || async { "value".to_string() }).await
+            }
+        });
+    });
+
+    group.finish();
+}
+
+criterion_group!(
+    benches,
+    bench_single_call,
+    bench_concurrent_10,
+    bench_concurrent_100,
+    bench_multiple_keys,
+    bench_reuse_group,
+);
+
+criterion_main!(benches);
diff --git a/crates/uniflight/examples/cache_population.rs b/crates/uniflight/examples/cache_population.rs
index a8dfa4d0..7050b9ef 100644
--- a/crates/uniflight/examples/cache_population.rs
+++ b/crates/uniflight/examples/cache_population.rs
@@ -16,12 +16,12 @@ use std::{
 };
 
 use tick::Clock;
-use uniflight::UniFlight;
+use uniflight::Merger;
 
 #[tokio::main]
 async fn main() {
     // Create a shared UniFlight instance for cache operations
-    let cache_group = Arc::new(UniFlight::<String, String>::new());
+    let cache_group = Arc::new(Merger::<String, String>::new());
 
     // Track how many times the work closure actually executes
     let execution_count = Arc::new(AtomicUsize::new(0));
diff --git a/crates/uniflight/src/lib.rs b/crates/uniflight/src/lib.rs
index 58abbfbb..b590a26f 100644
--- a/crates/uniflight/src/lib.rs
+++ b/crates/uniflight/src/lib.rs
@@ -7,14 +7,14 @@
 
 //! Coalesces duplicate async tasks into a single execution.
 //!
-//! This crate provides [`UniFlight`], a mechanism for deduplicating concurrent async operations.
+//! This crate provides [`Merger`], a mechanism for deduplicating concurrent async operations.
 //! When multiple tasks request the same work (identified by a key), only the first task (the
 //! "leader") performs the actual work while subsequent tasks (the "followers") wait and receive
 //! a clone of the result.
 //!
 //! # When to Use
 //!
-//! Use `UniFlight` when you have expensive or rate-limited operations that may be requested
+//! Use `Merger` when you have expensive or rate-limited operations that may be requested
 //! concurrently with the same parameters:
 //!
 //! - **Cache population**: Prevent thundering herd when a cache entry expires
@@ -25,10 +25,10 @@
 //! # Example
 //!
 //! ```
-//! use uniflight::UniFlight;
+//! use uniflight::Merger;
 //!
 //! # async fn example() {
-//! let group: UniFlight<&str, String> = UniFlight::new();
+//! let group: Merger<&str, String> = Merger::new();
 //!
 //! // Multiple concurrent calls with the same key will share a single execution
 //! let result = group.work("user:123", || async {
@@ -40,7 +40,7 @@
 //!
 //! # Cancellation and Panic Safety
 //!
-//! `UniFlight` handles task cancellation and panics gracefully:
+//! `Merger` handles task cancellation and panics gracefully:
 //!
 //! - If the leader task is cancelled or dropped, a follower becomes the new leader
 //! - If the leader task panics, a follower becomes the new leader and executes its work
@@ -48,379 +48,120 @@
 //!
 //! # Thread Safety
 //!
-//! [`UniFlight`] is `Send` and `Sync`, and can be shared across threads. The returned futures
-//! do not require `Send` bounds on the closure or its output.
+//! [`Merger`] is `Send` and `Sync`, and can be shared across threads. The returned futures
+//! are `Send` when the closure, future, key, and value types are `Send`.
 //!
-//! # Multiple Leaders for Redundancy
+//! # Performance
 //!
-//! By default, `UniFlight` uses a single leader per key. For redundancy scenarios where you want
-//! multiple concurrent attempts at the same operation (using whichever completes first), use
-//! [`UniFlight::with_max_leaders`]:
+//! Benchmarks comparing `uniflight` against `singleflight-async` show the following characteristics:
 //!
-//! ```
-//! use uniflight::UniFlight;
-//!
-//! # async fn example() {
-//! // Allow up to 3 concurrent leaders for redundancy
-//! let group: UniFlight<&str, String> = UniFlight::with_max_leaders(3);
-//!
-//! // First 3 concurrent calls become leaders and execute in parallel.
-//! // The first leader to complete stores the result.
-//! // All callers (leaders and followers) receive that result.
-//! let result = group.work("key", || async {
-//!     "result".to_string()
-//! }).await;
-//! # }
-//! ```
+//! - **Concurrent workloads** (10+ tasks): uniflight is 1.2-1.3x faster, demonstrating better scalability under contention
+//! - **Single calls**: singleflight-async has lower per-call overhead (~2x faster for individual operations)
+//! - **Multiple keys**: uniflight performs 1.3x faster when handling multiple distinct keys concurrently
 //!
-//! This is useful when:
-//! - You want fault tolerance through redundant execution
-//! - Network latency varies and you want the fastest response
-//! - You're implementing speculative execution patterns
+//! uniflight's DashMap-based architecture provides excellent scaling properties for high-concurrency scenarios,
+//! making it well-suited for production workloads with concurrent access patterns. For low-contention scenarios
+//! with predominantly single calls, the performance difference is minimal (sub-microsecond range).
 
 #![doc(html_logo_url = "https://media.githubusercontent.com/media/microsoft/oxidizer/refs/heads/main/crates/uniflight/logo.png")]
 #![doc(html_favicon_url = "https://media.githubusercontent.com/media/microsoft/oxidizer/refs/heads/main/crates/uniflight/favicon.ico")]
 
 use std::{
-    collections::HashMap,
+    fmt::Debug,
     hash::Hash,
-    sync::{
-        Arc, OnceLock, Weak,
-        atomic::{AtomicUsize, Ordering},
-    },
+    sync::{Arc, Weak},
 };
 
-use event_listener::Event;
-use parking_lot::Mutex as SyncMutex;
-
-type SharedMapping<K, T> = Arc<SyncMutex<HashMap<K, BroadcastOnce<T>>>>;
+use async_once_cell::OnceCell;
+use dashmap::{DashMap, Entry::{Occupied, Vacant}};
 
 /// Represents a class of work and creates a space in which units of work
 /// can be executed with duplicate suppression.
-#[derive(Debug)]
-pub struct UniFlight<K, T> {
-    mapping: SharedMapping<K, T>,
-    max_leaders: usize,
-}
-
-impl<K, T> Default for UniFlight<K, T> {
-    fn default() -> Self {
-        Self {
-            mapping: Arc::default(),
-            max_leaders: 1,
-        }
-    }
-}
-
-struct Shared<T> {
-    /// Result storage - written once by the winning leader, then lock-free reads.
-    result: OnceLock<T>,
-    /// Event for notifying waiters when result is ready or all leaders failed.
-    ready: Event,
-    /// Number of leaders currently executing.
-    leader_count: AtomicUsize,
-    /// Maximum concurrent leaders.
-    max_leaders: usize,
-}
-
-impl<T> Shared<T> {
-    fn new(max_leaders: usize) -> Self {
-        Self {
-            result: OnceLock::new(),
-            ready: Event::new(),
-            leader_count: AtomicUsize::new(0),
-            max_leaders,
-        }
-    }
-}
-
-/// RAII guard that decrements leader count on drop.
-struct LeaderGuard<T> {
-    shared: Option<Arc<Shared<T>>>,
-}
-
-impl<T> LeaderGuard<T> {
-    /// Try to claim a leader slot. Returns `Some(guard)` if successful, `None` if max leaders reached.
-    fn try_claim(shared: &Arc<Shared<T>>) -> Option<Self> {
-        let current = shared.leader_count.load(Ordering::Acquire);
-        if current < shared.max_leaders {
-            let prev = shared.leader_count.fetch_add(1, Ordering::AcqRel);
-            if prev < shared.max_leaders {
-                return Some(Self {
-                    shared: Some(Arc::clone(shared)),
-                });
-            }
-            // Race lost - another caller claimed the last slot
-            shared.leader_count.fetch_sub(1, Ordering::AcqRel);
-        }
-        None
-    }
-
-    /// Consume the guard without decrementing (called when leader successfully stores result).
-    fn disarm(mut self) -> Arc<Shared<T>> {
-        self.shared.take().expect("LeaderGuard shared already taken")
-    }
-}
-
-impl<T> Drop for LeaderGuard<T> {
-    fn drop(&mut self) {
-        if let Some(shared) = &self.shared {
-            let prev = shared.leader_count.fetch_sub(1, Ordering::AcqRel);
-            // If we were the last leader and no result was stored, wake one follower for promotion.
-            if prev == 1 && shared.result.get().is_none() {
-                shared.ready.notify(1);
-            }
-        }
-    }
-}
-
-/// `BroadcastOnce` consists of shared slot and notify.
-#[derive(Clone)]
-struct BroadcastOnce<T> {
-    shared: Weak<Shared<T>>,
-}
-
-impl<T> BroadcastOnce<T> {
-    fn new(max_leaders: usize) -> (Self, Arc<Shared<T>>) {
-        let shared = Arc::new(Shared::new(max_leaders));
-        (
-            Self {
-                shared: Arc::downgrade(&shared),
-            },
-            shared,
-        )
-    }
-}
-
-/// Role of a caller in the work execution.
-enum Role<T, F> {
-    /// Leader executes the work closure.
-    Leader { func: F, guard: LeaderGuard<T> },
-    /// Follower waits for any leader's result. Keeps func for potential promotion.
-    Follower { func: F },
-}
-
-// After calling BroadcastOnce::waiter we can get a waiter.
-// It's in WaitList.
-struct BroadcastOnceWaiter<K, T, F> {
-    role: Role<T, F>,
-    shared: Arc<Shared<T>>,
-
-    key: K,
-    mapping: SharedMapping<K, T>,
+pub struct Merger<K, T> {
+    mapping: DashMap<K, Weak<OnceCell<T>>>,
 }
 
-impl<T> std::fmt::Debug for BroadcastOnce<T> {
+impl<K, T> Debug for Merger<K, T> {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "BroadcastOnce")
+        f.debug_struct("Merger")
+            .field("mapping", &format_args!("DashMap<...>"))
+            .finish()
     }
 }
 
-#[expect(
-    clippy::type_complexity,
-    reason = "The Result type is complex but intentionally groups related items for the retry pattern"
-)]
-impl<T> BroadcastOnce<T> {
-    /// Attempts to create a waiter for an existing broadcast.
-    ///
-    /// Returns `Ok` with a waiter (either leader or follower role) if the broadcast is still active.
-    /// Returns `Err` if all leaders have dropped (weak reference upgrade failed).
-    fn try_waiter<K, F>(
-        &self,
-        func: F,
-        key: K,
-        mapping: SharedMapping<K, T>,
-    ) -> Result<BroadcastOnceWaiter<K, T, F>, (F, K, SharedMapping<K, T>)> {
-        let Some(shared) = self.shared.upgrade() else {
-            return Err((func, key, mapping));
-        };
-
-        // Try to become a leader if slots are available
-        if let Some(guard) = LeaderGuard::try_claim(&shared) {
-            return Ok(BroadcastOnceWaiter {
-                role: Role::Leader { func, guard },
-                shared,
-                key,
-                mapping,
-            });
-        }
-
-        // Become a follower (keep func for potential promotion)
-        Ok(BroadcastOnceWaiter {
-            role: Role::Follower { func },
-            shared,
-            key,
-            mapping,
-        })
-    }
-
-    /// Creates a waiter for a new broadcast entry (first caller always becomes leader).
-    fn leader_waiter<K, F>(shared: Arc<Shared<T>>, func: F, key: K, mapping: SharedMapping<K, T>) -> BroadcastOnceWaiter<K, T, F> {
-        // Safe to unwrap: new Shared starts at 0, max_leaders >= 1
-        let guard = LeaderGuard::try_claim(&shared).expect("first leader claim should always succeed");
-        BroadcastOnceWaiter {
-            role: Role::Leader { func, guard },
-            shared,
-            key,
-            mapping,
-        }
-    }
-}
-
-// We already in WaitList, so wait will be fine, we won't miss
-// anything after Waiter generated.
-impl<K, T, F, Fut> BroadcastOnceWaiter<K, T, F>
+impl<K, T> Default for Merger<K, T>
 where
     K: Hash + Eq,
-    F: FnOnce() -> Fut,
-    Fut: Future<Output = T>,
-    T: Clone,
 {
-    async fn wait(self) -> T {
-        let Self {
-            role,
-            shared,
-            key,
-            mapping,
-        } = self;
-        match role {
-            Role::Leader { func, guard } => Self::wait_as_leader(shared, key, mapping, func, guard).await,
-            Role::Follower { func } => Self::wait_as_follower(shared, key, mapping, func).await,
-        }
-    }
-
-    async fn wait_as_leader(shared: Arc<Shared<T>>, key: K, mapping: SharedMapping<K, T>, func: F, guard: LeaderGuard<T>) -> T {
-        // Check if another leader already stored a result (lock-free read).
-        if let Some(result) = shared.result.get() {
-            guard.disarm();
-            return result.clone();
-        }
-
-        // Execute the work.
-        let value = func().await;
-
-        // Try to store the result. First writer wins via OnceLock.
-        if shared.result.set(value.clone()).is_ok() {
-            // We stored the result - clean up the mapping entry.
-            mapping.lock().remove(&key);
-        }
-
-        // Notify ALL waiting followers simultaneously.
-        shared.ready.notify(usize::MAX);
-
-        // Disarm the guard (result is stored, count doesn't matter).
-        guard.disarm();
-
-        // Return our computed value, or the winning value if we lost the race.
-        shared.result.get().cloned().unwrap_or(value)
-    }
-
-    async fn wait_as_follower(shared: Arc<Shared<T>>, key: K, mapping: SharedMapping<K, T>, func: F) -> T {
-        loop {
-            // Fast path: result already available (lock-free read).
-            if let Some(result) = shared.result.get() {
-                return result.clone();
-            }
-
-            // Register listener BEFORE checking state to avoid missed notifications.
-            let listener = shared.ready.listen();
-
-            // Double-check after registering.
-            if let Some(result) = shared.result.get() {
-                return result.clone();
-            }
-
-            // Check if all leaders have failed and we need promotion.
-            if shared.leader_count.load(Ordering::Acquire) == 0 {
-                // All leaders failed - promote ourselves.
-                let guard = LeaderGuard::try_claim(&shared).expect("follower promotion should always succeed");
-                return Self::wait_as_leader(shared, key, mapping, func, guard).await;
-            }
-
-            // Wait for notification (in parallel with other followers).
-            listener.await;
+    fn default() -> Self {
+        Self {
+            mapping: DashMap::new(),
         }
     }
 }
 
-impl<K, T> UniFlight<K, T>
+impl<K, T> Merger<K, T>
 where
-    K: Hash + Eq + Clone,
+    K: Hash + Eq + Clone + Send + Sync,
+    T: Send + Sync,
 {
-    /// Creates a new `UniFlight` instance with single-leader behavior.
+    /// Creates a new `Merger` instance.
     #[inline]
     #[must_use]
     pub fn new() -> Self {
         Self::default()
     }
 
-    /// Creates a new `UniFlight` instance allowing up to `max_leaders` concurrent executions.
-    ///
-    /// When multiple tasks request the same work concurrently, up to `max_leaders` of them
-    /// will execute in parallel. The first to complete wins, and all other tasks (both
-    /// executing leaders and waiting followers) receive that result.
-    ///
-    /// This is useful for redundancy scenarios where you want multiple attempts at the
-    /// same operation and want to use whichever completes first.
-    ///
-    /// # Panics
-    ///
-    /// Panics if `max_leaders` is 0.
-    ///
-    /// # Example
-    ///
-    /// ```
-    /// use uniflight::UniFlight;
-    ///
-    /// # async fn example() {
-    /// // Allow 3 concurrent leaders for redundancy
-    /// let group: UniFlight<&str, String> = UniFlight::with_max_leaders(3);
-    ///
-    /// // Up to 3 concurrent calls will execute in parallel
-    /// let result = group.work("key", || async {
-    ///     "result".to_string()
-    /// }).await;
-    /// # }
-    /// ```
-    #[inline]
-    #[must_use]
-    pub fn with_max_leaders(max_leaders: usize) -> Self {
-        assert!(max_leaders > 0, "max_leaders must be at least 1");
-        Self {
-            mapping: Arc::default(),
-            max_leaders,
-        }
-    }
-
-    /// Execute and return the value for a given function, making sure that only up to
-    /// `max_leaders` operations are in-flight at a given moment. If a duplicate call comes in
-    /// beyond the limit, that caller will wait until one of the leaders completes and return
-    /// the same value.
-    pub fn work<F, Fut>(&self, key: K, func: F) -> impl Future<Output = T>
+    /// Execute and return the value for a given function, making sure that only one
+    /// operation is in-flight at a given moment. If a duplicate call comes in,
+    /// that caller will wait until the leader completes and return the same value.
+    pub fn work<F, Fut>(&self, key: K, func: F) -> impl Future<Output = T> + Send
     where
-        F: FnOnce() -> Fut,
-        Fut: Future<Output = T>,
+        F: FnOnce() -> Fut + Send,
+        Fut: Future<Output = T> + Send,
         T: Clone,
     {
-        let owned_mapping = Arc::clone(&self.mapping);
-        let mut mapping = self.mapping.lock();
-        let val = mapping.get_mut(&key);
-        if let Some(call) = val {
-            let (func, key, owned_mapping) = match call.try_waiter(func, key, owned_mapping) {
-                Ok(waiter) => return waiter.wait(),
-                Err(fm) => fm,
-            };
-            // All leaders dropped - create new broadcast entry
-            let (new_call, shared) = BroadcastOnce::new(self.max_leaders);
-            *call = new_call;
-            let waiter = BroadcastOnce::leader_waiter(shared, func, key, owned_mapping);
-            waiter.wait()
-        } else {
-            // New key - create broadcast entry and become first leader
-            let (call, shared) = BroadcastOnce::new(self.max_leaders);
-            mapping.insert(key.clone(), call);
-            let waiter = BroadcastOnce::leader_waiter(shared, func, key, owned_mapping);
-            waiter.wait()
+        let cell = self.get_or_create_cell(&key);
+        let mapping = &self.mapping;
+        async move {
+            let result = cell.get_or_init(func()).await.clone();
+            // Clean up expired weak reference if present
+            // Use remove_if to atomically check and remove
+            mapping.remove_if(&key, |_, weak| weak.upgrade().is_none());
+            result
+        }
+    }
+
+    /// Gets an existing `OnceCell` for the key, or creates a new one.
+    fn get_or_create_cell(&self, key: &K) -> Arc<OnceCell<T>> {
+        // Fast path: check if entry exists and is still valid
+        if let Some(entry) = self.mapping.get(key)
+            && let Some(cell) = entry.value().upgrade()
+        {
+            return cell;
         }
+
+        // Slow path: need to insert or replace expired entry
+        let cell = Arc::new(OnceCell::new());
+        let weak = Arc::downgrade(&cell);
+
+        // Use Entry enum to atomically check-and-return or insert
+        match self.mapping.entry(key.clone()) {
+            Occupied(mut entry) => {
+                // Entry exists - check if still alive
+                if let Some(existing) = entry.get().upgrade() {
+                    // Another thread's cell is still alive - use it
+                    return existing;
+                }
+                // Expired - replace with ours
+                entry.insert(weak);
+            }
+            Vacant(entry) => {
+                entry.insert(weak);
+            }
+        }
+
+        // We inserted our cell, return it
+        cell
     }
 }
diff --git a/crates/uniflight/tests/work.rs b/crates/uniflight/tests/work.rs
index a8e5956f..6033011c 100644
--- a/crates/uniflight/tests/work.rs
+++ b/crates/uniflight/tests/work.rs
@@ -15,7 +15,7 @@ use std::{
 };
 
 use futures_util::{StreamExt, stream::FuturesUnordered};
-use uniflight::UniFlight;
+use uniflight::Merger;
 
 fn unreachable_future() -> std::future::Pending<String> {
     std::future::pending()
@@ -23,7 +23,7 @@ fn unreachable_future() -> std::future::Pending<String> {
 
 #[tokio::test]
 async fn direct_call() {
-    let group = UniFlight::new();
+    let group = Merger::new();
     let result = group
         .work("key", || async {
             tokio::time::sleep(Duration::from_millis(10)).await;
@@ -37,7 +37,7 @@ async fn direct_call() {
 async fn parallel_call() {
     let call_counter = AtomicUsize::default();
 
-    let group = UniFlight::new();
+    let group = Merger::new();
     let futures = FuturesUnordered::new();
     for _ in 0..10 {
         futures.push(group.work("key", || async {
@@ -55,7 +55,7 @@ async fn parallel_call() {
 async fn parallel_call_seq_await() {
     let call_counter = AtomicUsize::default();
 
-    let group = UniFlight::new();
+    let group = Merger::new();
     let mut futures = Vec::new();
     for _ in 0..10 {
         futures.push(group.work("key", || async {
@@ -73,7 +73,7 @@ async fn parallel_call_seq_await() {
 
 #[tokio::test]
 async fn call_with_static_str_key() {
-    let group = UniFlight::new();
+    let group = Merger::new();
     let result = group
         .work("key".to_string(), || async {
             tokio::time::sleep(Duration::from_millis(1)).await;
@@ -85,7 +85,7 @@ async fn call_with_static_str_key() {
 
 #[tokio::test]
 async fn call_with_static_string_key() {
-    let group = UniFlight::new();
+    let group = Merger::new();
     let result = group
         .work("key".to_string(), || async {
             tokio::time::sleep(Duration::from_millis(1)).await;
@@ -99,7 +99,7 @@ async fn call_with_static_string_key() {
 async fn call_with_custom_key() {
     #[derive(Clone, PartialEq, Eq, Hash)]
     struct K(i32);
-    let group = UniFlight::new();
+    let group = Merger::new();
     let result = group
         .work(K(1), || async {
             tokio::time::sleep(Duration::from_millis(1)).await;
@@ -111,7 +111,7 @@ async fn call_with_custom_key() {
 
 #[tokio::test]
 async fn late_wait() {
-    let group = UniFlight::new();
+    let group = Merger::new();
     let fut_early = group.work("key".to_string(), || async {
         tokio::time::sleep(Duration::from_millis(20)).await;
         "Result".to_string()
@@ -124,7 +124,7 @@ async fn late_wait() {
 
 #[tokio::test]
 async fn cancel() {
-    let group = UniFlight::new();
+    let group = Merger::new();
 
     // the executer cancelled and the other awaiter will create a new future and execute.
     let fut_cancel = group.work("key".to_string(), unreachable_future);
@@ -148,7 +148,7 @@ async fn cancel() {
 #[tokio::test]
 async fn leader_panic_in_spawned_task() {
     let call_counter = AtomicUsize::default();
-    let group: Arc<UniFlight<String, String>> = Arc::new(UniFlight::new());
+    let group: Arc<Merger<String, String>> = Arc::new(Merger::new());
 
     // First task will panic in a spawned task (no catch_unwind)
     let group_clone = Arc::clone(&group);
@@ -186,11 +186,11 @@ async fn leader_panic_in_spawned_task() {
 
 #[tokio::test]
 async fn debug_impl() {
-    let group: UniFlight<String, String> = UniFlight::new();
+    let group: Merger<String, String> = Merger::new();
 
     // Test Debug on empty group
     let debug_str = format!("{:?}", group);
-    assert!(debug_str.contains("UniFlight"));
+    assert!(debug_str.contains("Merger"));
 
     // Create a pending work item to populate the mapping with a BroadcastOnce
     let fut = group.work("key".to_string(), || async {
@@ -200,97 +200,10 @@ async fn debug_impl() {
 
     // Debug should still work with entries in the mapping
     let debug_str = format!("{:?}", group);
-    assert!(debug_str.contains("UniFlight"));
-    assert!(debug_str.contains("BroadcastOnce"));
+    assert!(debug_str.contains("Merger"));
+    // The mapping is a DashMap
+    assert!(debug_str.contains("DashMap"));
 
     // Complete the work
     assert_eq!(fut.await, "Result");
 }
-
-// N-leader tests
-
-#[tokio::test]
-async fn with_max_leaders_basic() {
-    let group: UniFlight<&str, String> = UniFlight::with_max_leaders(3);
-    let result = group
-        .work("key", || async {
-            tokio::time::sleep(Duration::from_millis(10)).await;
-            "Result".to_string()
-        })
-        .await;
-    assert_eq!(result, "Result");
-}
-
-#[tokio::test]
-async fn multiple_leaders_all_get_same_result() {
-    let call_counter = AtomicUsize::default();
-
-    // Allow up to 3 concurrent leaders
-    let group = UniFlight::with_max_leaders(3);
-    let futures = FuturesUnordered::new();
-
-    // Start 5 concurrent calls - up to 3 become leaders, 2 become followers
-    for i in 0..5 {
-        let counter = &call_counter;
-        futures.push(group.work("key", move || async move {
-            tokio::time::sleep(Duration::from_millis(50)).await;
-            counter.fetch_add(1, AcqRel);
-            format!("Result-{i}")
-        }));
-    }
-
-    // All should complete with the same result (first to finish wins)
-    let results: Vec<_> = futures.collect().await;
-    let first_result = &results[0];
-    assert!(results.iter().all(|r| r == first_result));
-}
-
-#[tokio::test]
-async fn followers_get_first_leader_result() {
-    let group = UniFlight::with_max_leaders(2);
-
-    // Start first leader (slow)
-    let fut1 = group.work("key".to_string(), || async {
-        tokio::time::sleep(Duration::from_millis(100)).await;
-        "slow".to_string()
-    });
-
-    // Start second leader (fast)
-    let fut2 = group.work("key".to_string(), || async {
-        tokio::time::sleep(Duration::from_millis(10)).await;
-        "fast".to_string()
-    });
-
-    // Start followers (should get whichever leader finishes first)
-    let fut3 = group.work("key".to_string(), unreachable_future);
-    let fut4 = group.work("key".to_string(), unreachable_future);
-
-    // Note: Due to current implementation, leaders serialize on slot lock,
-    // so execution order is deterministic. The first to acquire the lock wins.
-    let (r1, r2, r3, r4) = tokio::join!(fut1, fut2, fut3, fut4);
-
-    // All should have the same result
-    assert_eq!(r1, r2);
-    assert_eq!(r2, r3);
-    assert_eq!(r3, r4);
-}
-
-#[tokio::test]
-async fn leader_cancel_with_multiple_leaders() {
-    let group: Arc<UniFlight<String, String>> = Arc::new(UniFlight::with_max_leaders(2));
-
-    // First leader will be cancelled
-    let group_clone = Arc::clone(&group);
-    let fut_cancel = group_clone.work("key".to_string(), unreachable_future);
-    let _ = tokio::time::timeout(Duration::from_millis(10), fut_cancel).await;
-
-    // Second leader should succeed
-    let result = group.work("key".to_string(), || async { "Success".to_string() }).await;
-    assert_eq!(result, "Success");
-}
-
-#[tokio::test]
-#[should_panic(expected = "max_leaders must be at least 1")]
-async fn with_max_leaders_zero_panics() {
-    let _group: UniFlight<&str, String> = UniFlight::with_max_leaders(0);
-}

From 8efdde215f6922b4d43d35b459824bd616f2c6bc Mon Sep 17 00:00:00 2001
From: Schuyler Goodman <schgoo@microsoft.com>
Date: Tue, 13 Jan 2026 16:09:36 -0500
Subject: [PATCH 14/33] Fix comments

---
 crates/uniflight/benches/comparison.rs        |  10 +-
 crates/uniflight/examples/cache_population.rs |   2 +-
 crates/uniflight/src/lib.rs                   | 153 +++++++++++++++---
 crates/uniflight/tests/work.rs                |  24 +--
 4 files changed, 149 insertions(+), 40 deletions(-)

diff --git a/crates/uniflight/benches/comparison.rs b/crates/uniflight/benches/comparison.rs
index fd53077d..1dda550e 100644
--- a/crates/uniflight/benches/comparison.rs
+++ b/crates/uniflight/benches/comparison.rs
@@ -26,7 +26,7 @@ fn bench_single_call(c: &mut Criterion) {
             let merger = Arc::clone(&our_merger);
             async move {
                 let key = format!("key_{}", COUNTER1.fetch_add(1, Ordering::Relaxed));
-                merger.work(key, || async { "value".to_string() }).await
+                merger.work(&key, || async { "value".to_string() }).await
             }
         });
     });
@@ -66,7 +66,7 @@ fn bench_concurrent_10(c: &mut Criterion) {
                     .map(|_| {
                         let merger = Arc::clone(&merger);
                         let key = key.clone();
-                        tokio::spawn(async move { merger.work(key, || async { "value".to_string() }).await })
+                        tokio::spawn(async move { merger.work(&key, || async { "value".to_string() }).await })
                     })
                     .collect();
 
@@ -122,7 +122,7 @@ fn bench_concurrent_100(c: &mut Criterion) {
                     .map(|_| {
                         let merger = Arc::clone(&merger);
                         let key = key.clone();
-                        tokio::spawn(async move { merger.work(key, || async { "value".to_string() }).await })
+                        tokio::spawn(async move { merger.work(&key, || async { "value".to_string() }).await })
                     })
                     .collect();
 
@@ -180,7 +180,7 @@ fn bench_multiple_keys(c: &mut Criterion) {
                         (0..10).map(move |_| {
                             let merger = Arc::clone(&merger);
                             let key = format!("key_{}_{key_id}", iteration);
-                            tokio::spawn(async move { merger.work(key, || async { "value".to_string() }).await })
+                            tokio::spawn(async move { merger.work(&key, || async { "value".to_string() }).await })
                         })
                     })
                     .collect();
@@ -237,7 +237,7 @@ fn bench_reuse_group(c: &mut Criterion) {
             async move {
                 // Each iteration uses a unique key to avoid caching effects
                 let key = format!("key_{}", COUNTER5.fetch_add(1, Ordering::Relaxed));
-                merger.work(key, || async { "value".to_string() }).await
+                merger.work(&key, || async { "value".to_string() }).await
             }
         });
     });
diff --git a/crates/uniflight/examples/cache_population.rs b/crates/uniflight/examples/cache_population.rs
index 7050b9ef..6137bdea 100644
--- a/crates/uniflight/examples/cache_population.rs
+++ b/crates/uniflight/examples/cache_population.rs
@@ -38,7 +38,7 @@ async fn main() {
             let start = clock.instant();
 
             let result = group
-                .work("user:123".to_string(), || async {
+                .work("user:123", || async {
                     let count = counter.fetch_add(1, Ordering::SeqCst) + 1;
                     println!("  [Request {i}] I'm the leader! Fetching from database... (execution #{count})");
 
diff --git a/crates/uniflight/src/lib.rs b/crates/uniflight/src/lib.rs
index b590a26f..67bb0758 100644
--- a/crates/uniflight/src/lib.rs
+++ b/crates/uniflight/src/lib.rs
@@ -28,9 +28,10 @@
 //! use uniflight::Merger;
 //!
 //! # async fn example() {
-//! let group: Merger<&str, String> = Merger::new();
+//! let group: Merger<String, String> = Merger::new();
 //!
-//! // Multiple concurrent calls with the same key will share a single execution
+//! // Multiple concurrent calls with the same key will share a single execution.
+//! // Note: you can pass &str directly when the key type is String.
 //! let result = group.work("user:123", || async {
 //!     // This expensive operation runs only once, even if called concurrently
 //!     "expensive_result".to_string()
@@ -38,6 +39,22 @@
 //! # }
 //! ```
 //!
+//! # Flexible Key Types
+//!
+//! The [`Merger::work`] method accepts keys using [`Borrow`] semantics, allowing you to pass
+//! borrowed forms of the key type. For example, with `Merger<String, T>`, you can pass `&str`
+//! directly without allocating:
+//!
+//! ```
+//! # use uniflight::Merger;
+//! # async fn example() {
+//! let merger: Merger<String, i32> = Merger::new();
+//!
+//! // Pass &str directly - no need to call .to_string()
+//! merger.work("my-key", || async { 42 }).await;
+//! # }
+//! ```
+//!
 //! # Cancellation and Panic Safety
 //!
 //! `Merger` handles task cancellation and panics gracefully:
@@ -46,6 +63,11 @@
 //! - If the leader task panics, a follower becomes the new leader and executes its work
 //! - Followers that join before the leader completes receive the cached result
 //!
+//! # Memory Management
+//!
+//! Completed entries are automatically removed from the internal map when the last caller
+//! finishes. This ensures no stale entries accumulate over time.
+//!
 //! # Thread Safety
 //!
 //! [`Merger`] is `Send` and `Sync`, and can be shared across threads. The returned futures
@@ -53,20 +75,25 @@
 //!
 //! # Performance
 //!
-//! Benchmarks comparing `uniflight` against `singleflight-async` show the following characteristics:
+//! Benchmarks comparing `uniflight` against `singleflight-async`:
 //!
-//! - **Concurrent workloads** (10+ tasks): uniflight is 1.2-1.3x faster, demonstrating better scalability under contention
-//! - **Single calls**: singleflight-async has lower per-call overhead (~2x faster for individual operations)
-//! - **Multiple keys**: uniflight performs 1.3x faster when handling multiple distinct keys concurrently
+//! | Benchmark | uniflight | singleflight-async | Winner |
+//! |-----------|-----------|-------------------|--------|
+//! | Single call | 723 ns | 664 ns | ~equal |
+//! | 10 concurrent tasks | 50 µs | 56 µs | uniflight 1.1x |
+//! | 100 concurrent tasks | 177 µs | 190 µs | uniflight 1.1x |
+//! | 10 keys × 10 tasks | 176 µs | 230 µs | uniflight 1.3x |
+//! | Sequential reuse | 757 ns | 1.0 µs | uniflight 1.3x |
 //!
-//! uniflight's DashMap-based architecture provides excellent scaling properties for high-concurrency scenarios,
-//! making it well-suited for production workloads with concurrent access patterns. For low-contention scenarios
-//! with predominantly single calls, the performance difference is minimal (sub-microsecond range).
+//! uniflight's `DashMap`-based architecture scales well under contention, making it
+//! well-suited for high-concurrency workloads. For single-call scenarios, both libraries
+//! perform similarly (sub-microsecond).
 
 #![doc(html_logo_url = "https://media.githubusercontent.com/media/microsoft/oxidizer/refs/heads/main/crates/uniflight/logo.png")]
 #![doc(html_favicon_url = "https://media.githubusercontent.com/media/microsoft/oxidizer/refs/heads/main/crates/uniflight/favicon.ico")]
 
 use std::{
+    borrow::Borrow,
     fmt::Debug,
     hash::Hash,
     sync::{Arc, Weak},
@@ -78,7 +105,7 @@ use dashmap::{DashMap, Entry::{Occupied, Vacant}};
 /// Represents a class of work and creates a space in which units of work
 /// can be executed with duplicate suppression.
 pub struct Merger<K, T> {
-    mapping: DashMap<K, Weak<OnceCell<T>>>,
+    mapping: Arc<DashMap<K, Weak<OnceCell<T>>>>,
 }
 
 impl<K, T> Debug for Merger<K, T> {
@@ -95,15 +122,14 @@ where
 {
     fn default() -> Self {
         Self {
-            mapping: DashMap::new(),
+            mapping: Arc::new(DashMap::new()),
         }
     }
 }
 
 impl<K, T> Merger<K, T>
 where
-    K: Hash + Eq + Clone + Send + Sync,
-    T: Send + Sync,
+    K: Hash + Eq,
 {
     /// Creates a new `Merger` instance.
     #[inline]
@@ -112,28 +138,63 @@ where
         Self::default()
     }
 
+    /// Returns the number of in-flight operations.
+    #[cfg(test)]
+    fn len(&self) -> usize {
+        self.mapping.len()
+    }
+
+    /// Returns `true` if there are no in-flight operations.
+    #[cfg(test)]
+    fn is_empty(&self) -> bool {
+        self.mapping.is_empty()
+    }
+}
+
+impl<K, T> Merger<K, T>
+where
+    K: Hash + Eq + Send + Sync,
+    T: Clone + Send + Sync,
+{
     /// Execute and return the value for a given function, making sure that only one
     /// operation is in-flight at a given moment. If a duplicate call comes in,
     /// that caller will wait until the leader completes and return the same value.
-    pub fn work<F, Fut>(&self, key: K, func: F) -> impl Future<Output = T> + Send
+    ///
+    /// The key can be passed as any borrowed form of `K`. For example, if `K` is `String`,
+    /// you can pass `&str` directly:
+    ///
+    /// ```
+    /// # use uniflight::Merger;
+    /// # async fn example() {
+    /// let merger: Merger<String, i32> = Merger::new();
+    /// let result = merger.work("my-key", || async { 42 }).await;
+    /// # }
+    /// ```
+    pub fn work<Q, F, Fut>(&self, key: &Q, func: F) -> impl Future<Output = T> + Send + use<Q, F, Fut, K, T>
     where
+        K: Borrow<Q>,
+        Q: Hash + Eq + ToOwned<Owned = K> + ?Sized,
         F: FnOnce() -> Fut + Send,
         Fut: Future<Output = T> + Send,
-        T: Clone,
     {
-        let cell = self.get_or_create_cell(&key);
-        let mapping = &self.mapping;
+        let cell = self.get_or_create_cell(key);
+        let owned_key = key.to_owned();
+        let mapping = Arc::clone(&self.mapping);
         async move {
             let result = cell.get_or_init(func()).await.clone();
-            // Clean up expired weak reference if present
-            // Use remove_if to atomically check and remove
-            mapping.remove_if(&key, |_, weak| weak.upgrade().is_none());
+            drop(cell); // Release our Arc before cleanup check
+            // Remove entry if no one else is using it (weak can't upgrade)
+            mapping.remove_if(owned_key.borrow(), |_, weak| weak.upgrade().is_none());
             result
         }
     }
 
     /// Gets an existing `OnceCell` for the key, or creates a new one.
-    fn get_or_create_cell(&self, key: &K) -> Arc<OnceCell<T>> {
+    fn get_or_create_cell<Q>(&self, key: &Q) -> Arc<OnceCell<T>>
+    where
+        K: Borrow<Q>,
+        Q: Hash + Eq + ToOwned<Owned = K> + ?Sized,
+    {
         // Fast path: check if entry exists and is still valid
         if let Some(entry) = self.mapping.get(key)
             && let Some(cell) = entry.value().upgrade()
@@ -146,7 +207,7 @@ where
         let weak = Arc::downgrade(&cell);
 
         // Use Entry enum to atomically check-and-return or insert
-        match self.mapping.entry(key.clone()) {
+        match self.mapping.entry(key.to_owned()) {
             Occupied(mut entry) => {
                 // Entry exists - check if still alive
                 if let Some(existing) = entry.get().upgrade() {
@@ -165,3 +226,51 @@ where
         cell
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::time::Duration;
+
+    #[tokio::test]
+    async fn cleanup_after_completion() {
+        let group: Merger<String, String> = Merger::new();
+        assert!(group.is_empty());
+
+        // Single call should clean up after completion
+        let result = group.work("key1", || async { "Result".to_string() }).await;
+        assert_eq!(result, "Result");
+        assert!(group.is_empty(), "Map should be empty after single call completes");
+
+        // Multiple concurrent calls should clean up after all complete
+        let futures: Vec<_> = (0..10)
+            .map(|_| group.work("key2", || async {
+                tokio::time::sleep(Duration::from_millis(50)).await;
+                "Result".to_string()
+            }))
+            .collect();
+
+        // While in flight, map should have an entry
+        assert_eq!(group.len(), 1);
+
+        for fut in futures {
+            assert_eq!(fut.await, "Result");
+        }
+
+        assert!(group.is_empty(), "Map should be empty after all concurrent calls complete");
+
+        // Multiple different keys should all be cleaned up
+        let fut1 = group.work("a", || async { "A".to_string() });
+        let fut2 = group.work("b", || async { "B".to_string() });
+        let fut3 = group.work("c", || async { "C".to_string() });
+
+        assert_eq!(group.len(), 3);
+
+        let (r1, r2, r3) = tokio::join!(fut1, fut2, fut3);
+        assert_eq!(r1, "A");
+        assert_eq!(r2, "B");
+        assert_eq!(r3, "C");
+
+        assert!(group.is_empty(), "Map should be empty after all keys complete");
+    }
+}
diff --git a/crates/uniflight/tests/work.rs b/crates/uniflight/tests/work.rs
index 6033011c..c617fb09 100644
--- a/crates/uniflight/tests/work.rs
+++ b/crates/uniflight/tests/work.rs
@@ -75,7 +75,7 @@ async fn parallel_call_seq_await() {
 async fn call_with_static_str_key() {
     let group = Merger::new();
     let result = group
-        .work("key".to_string(), || async {
+        .work("key", || async {
             tokio::time::sleep(Duration::from_millis(1)).await;
             "Result".to_string()
         })
@@ -87,7 +87,7 @@ async fn call_with_static_str_key() {
 async fn call_with_static_string_key() {
     let group = Merger::new();
     let result = group
-        .work("key".to_string(), || async {
+        .work("key", || async {
             tokio::time::sleep(Duration::from_millis(1)).await;
             "Result".to_string()
         })
@@ -101,7 +101,7 @@ async fn call_with_custom_key() {
     struct K(i32);
     let group = Merger::new();
     let result = group
-        .work(K(1), || async {
+        .work(&K(1), || async {
             tokio::time::sleep(Duration::from_millis(1)).await;
             "Result".to_string()
         })
@@ -112,11 +112,11 @@ async fn call_with_custom_key() {
 #[tokio::test]
 async fn late_wait() {
     let group = Merger::new();
-    let fut_early = group.work("key".to_string(), || async {
+    let fut_early = group.work("key", || async {
         tokio::time::sleep(Duration::from_millis(20)).await;
         "Result".to_string()
     });
-    let fut_late = group.work("key".into(), unreachable_future);
+    let fut_late = group.work("key", unreachable_future);
     assert_eq!(fut_early.await, "Result");
     tokio::time::sleep(Duration::from_millis(50)).await;
     assert_eq!(fut_late.await, "Result");
@@ -127,18 +127,18 @@ async fn cancel() {
     let group = Merger::new();
 
     // the executer cancelled and the other awaiter will create a new future and execute.
-    let fut_cancel = group.work("key".to_string(), unreachable_future);
+    let fut_cancel = group.work(&"key".to_string(), unreachable_future);
     let _ = tokio::time::timeout(Duration::from_millis(10), fut_cancel).await;
-    let fut_late = group.work("key".to_string(), || async { "Result2".to_string() });
+    let fut_late = group.work("key", || async { "Result2".to_string() });
     assert_eq!(fut_late.await, "Result2");
 
     // the first executer is slow but not dropped, so the result will be the first ones.
     let begin = tokio::time::Instant::now();
-    let fut_1 = group.work("key".to_string(), || async {
+    let fut_1 = group.work("key", || async {
         tokio::time::sleep(Duration::from_millis(2000)).await;
         "Result1".to_string()
     });
-    let fut_2 = group.work("key".to_string(), unreachable_future);
+    let fut_2 = group.work(&"key".to_string(), unreachable_future);
     let (v1, v2) = tokio::join!(fut_1, fut_2);
     assert_eq!(v1, "Result1");
     assert_eq!(v2, "Result1");
@@ -154,7 +154,7 @@ async fn leader_panic_in_spawned_task() {
     let group_clone = Arc::clone(&group);
     let handle = tokio::spawn(async move {
         group_clone
-            .work("key".to_string(), || async {
+            .work("key", || async {
                 tokio::time::sleep(Duration::from_millis(50)).await;
                 panic!("leader panicked in spawned task");
                 #[expect(unreachable_code, reason = "Required to satisfy return type after panic")]
@@ -169,7 +169,7 @@ async fn leader_panic_in_spawned_task() {
     // Second task should become the new leader after the first panics
     let group_clone = Arc::clone(&group);
     let call_counter_ref = &call_counter;
-    let fut_follower = group_clone.work("key".to_string(), || async {
+    let fut_follower = group_clone.work("key", || async {
         call_counter_ref.fetch_add(1, AcqRel);
         "Result".to_string()
     });
@@ -193,7 +193,7 @@ async fn debug_impl() {
     assert!(debug_str.contains("Merger"));
 
     // Create a pending work item to populate the mapping with a BroadcastOnce
-    let fut = group.work("key".to_string(), || async {
+    let fut = group.work("key", || async {
         tokio::time::sleep(Duration::from_millis(100)).await;
         "Result".to_string()
     });

From 9ef70392283122496d4a74d749dc9a70dcb26bd8 Mon Sep 17 00:00:00 2001
From: Schuyler Goodman <schgoo@microsoft.com>
Date: Tue, 13 Jan 2026 17:13:31 -0500
Subject: [PATCH 15/33] Make it thread_aware

---
 Cargo.lock                     |   1 +
 crates/uniflight/Cargo.toml    |   1 +
 crates/uniflight/src/lib.rs    | 136 ++++++++++++++++++++++++++++-----
 crates/uniflight/tests/work.rs |  62 +++++++++++++--
 4 files changed, 173 insertions(+), 27 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 22039bbb..7e7c67b3 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1826,6 +1826,7 @@ dependencies = [
  "dashmap",
  "futures-util",
  "singleflight-async",
+ "thread_aware",
  "tick",
  "tokio",
 ]
diff --git a/crates/uniflight/Cargo.toml b/crates/uniflight/Cargo.toml
index 54e44f0c..76c8d938 100644
--- a/crates/uniflight/Cargo.toml
+++ b/crates/uniflight/Cargo.toml
@@ -19,6 +19,7 @@ repository.workspace = true
 [dependencies]
 async-once-cell.workspace = true
 dashmap.workspace = true
+thread_aware.workspace = true
 
 [dev-dependencies]
 criterion = { workspace = true, features = ["async_tokio"] }
diff --git a/crates/uniflight/src/lib.rs b/crates/uniflight/src/lib.rs
index 67bb0758..7796935c 100644
--- a/crates/uniflight/src/lib.rs
+++ b/crates/uniflight/src/lib.rs
@@ -55,6 +55,24 @@
 //! # }
 //! ```
 //!
+//! # Thread-Aware Scoping
+//!
+//! `Merger` supports thread-aware scoping via a [`Strategy`](thread_aware::storage::Strategy)
+//! type parameter. This controls how the internal state is partitioned across threads/NUMA nodes:
+//!
+//! - [`PerProcess`] (default): Single global state, maximum deduplication
+//! - [`PerNuma`]: Separate state per NUMA node, NUMA-local memory access
+//! - [`PerCore`]: Separate state per core, no deduplication (useful for already-partitioned work)
+//!
+//! ```
+//! use uniflight::{Merger, PerNuma};
+//!
+//! # async fn example() {
+//! // NUMA-aware merger - each NUMA node gets its own deduplication scope
+//! let merger: Merger<String, String, PerNuma> = Merger::new_per_numa();
+//! # }
+//! ```
+//!
 //! # Cancellation and Panic Safety
 //!
 //! `Merger` handles task cancellation and panics gracefully:
@@ -101,60 +119,137 @@ use std::{
 
 use async_once_cell::OnceCell;
 use dashmap::{DashMap, Entry::{Occupied, Vacant}};
+use thread_aware::{
+    Arc as TaArc,
+    ThreadAware,
+    affinity::{MemoryAffinity, PinnedAffinity},
+    storage::Strategy,
+};
+
+// Re-export strategies for convenience
+pub use thread_aware::{PerCore, PerNuma, PerProcess};
 
 /// Represents a class of work and creates a space in which units of work
 /// can be executed with duplicate suppression.
-pub struct Merger<K, T> {
-    mapping: Arc<DashMap<K, Weak<OnceCell<T>>>>,
+///
+/// The `S` type parameter controls the thread-aware scoping strategy:
+/// - [`PerProcess`]: Single global scope (default, maximum deduplication)
+/// - [`PerNuma`]: Per-NUMA-node scope (NUMA-local memory access)
+/// - [`PerCore`]: Per-core scope (no deduplication)
+pub struct Merger<K, T, S: Strategy = PerProcess> {
+    inner: TaArc<DashMap<K, Weak<OnceCell<T>>>, S>,
 }
 
-impl<K, T> Debug for Merger<K, T> {
+impl<K, T, S: Strategy> Debug for Merger<K, T, S> {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         f.debug_struct("Merger")
-            .field("mapping", &format_args!("DashMap<...>"))
+            .field("inner", &format_args!("DashMap<...>"))
             .finish()
     }
 }
 
-impl<K, T> Default for Merger<K, T>
+impl<K, T, S: Strategy> Clone for Merger<K, T, S> {
+    fn clone(&self) -> Self {
+        Self {
+            inner: self.inner.clone(),
+        }
+    }
+}
+
+impl<K, T, S> Default for Merger<K, T, S>
 where
-    K: Hash + Eq,
+    K: Hash + Eq + Send + Sync + 'static,
+    T: Clone + Send + Sync + 'static,
+    S: Strategy + Send + Sync,
 {
     fn default() -> Self {
         Self {
-            mapping: Arc::new(DashMap::new()),
+            inner: TaArc::new(DashMap::new),
         }
     }
 }
 
-impl<K, T> Merger<K, T>
+impl<K, T> Merger<K, T, PerProcess>
 where
-    K: Hash + Eq,
+    K: Hash + Eq + Send + Sync + 'static,
+    T: Clone + Send + Sync + 'static,
 {
-    /// Creates a new `Merger` instance.
+    /// Creates a new `Merger` instance with process-wide (global) scope.
+    ///
+    /// This is the default strategy providing maximum deduplication across all threads.
     #[inline]
     #[must_use]
     pub fn new() -> Self {
         Self::default()
     }
+}
+
+impl<K, T> Merger<K, T, PerNuma>
+where
+    K: Hash + Eq + Send + Sync + 'static,
+    T: Clone + Send + Sync + 'static,
+{
+    /// Creates a new `Merger` instance with per-NUMA-node scope.
+    ///
+    /// Each NUMA node gets its own deduplication scope, keeping all memory accesses
+    /// local to the node for better performance on multi-socket systems.
+    #[inline]
+    #[must_use]
+    pub fn new_per_numa() -> Self {
+        Self::default()
+    }
+}
+
+impl<K, T> Merger<K, T, PerCore>
+where
+    K: Hash + Eq + Send + Sync + 'static,
+    T: Clone + Send + Sync + 'static,
+{
+    /// Creates a new `Merger` instance with per-core scope.
+    ///
+    /// Each core gets its own deduplication scope. This provides no cross-core
+    /// deduplication but eliminates all contention. Useful when work is already
+    /// partitioned by core.
+    #[inline]
+    #[must_use]
+    pub fn new_per_core() -> Self {
+        Self::default()
+    }
+}
 
+impl<K, T, S: Strategy> Merger<K, T, S>
+where
+    K: Hash + Eq,
+{
     /// Returns the number of in-flight operations.
     #[cfg(test)]
     fn len(&self) -> usize {
-        self.mapping.len()
+        self.inner.len()
     }
 
     /// Returns `true` if there are no in-flight operations.
     #[cfg(test)]
     fn is_empty(&self) -> bool {
-        self.mapping.is_empty()
+        self.inner.is_empty()
+    }
+}
+
+impl<K, T, S> ThreadAware for Merger<K, T, S>
+where
+    S: Strategy,
+{
+    fn relocated(self, source: MemoryAffinity, destination: PinnedAffinity) -> Self {
+        Self {
+            inner: self.inner.relocated(source, destination),
+        }
     }
 }
 
-impl<K, T> Merger<K, T>
+impl<K, T, S> Merger<K, T, S>
 where
     K: Hash + Eq + Send + Sync,
     T: Clone + Send + Sync,
+    S: Strategy + Send + Sync,
 {
     /// Execute and return the value for a given function, making sure that only one
     /// operation is in-flight at a given moment. If a duplicate call comes in,
@@ -170,33 +265,34 @@ where
     /// let result = merger.work("my-key", || async { 42 }).await;
     /// # }
     /// ```
-    pub fn work<Q, F, Fut>(&self, key: &Q, func: F) -> impl Future<Output = T> + Send + use<Q, F, Fut, K, T>
+    pub fn work<Q, F, Fut>(&self, key: &Q, func: F) -> impl Future<Output = T> + Send + use<Q, F, Fut, K, T, S>
     where
         K: Borrow<Q>,
         Q: Hash + Eq + ToOwned<Owned = K> + ?Sized,
         F: FnOnce() -> Fut + Send,
         Fut: Future<Output = T> + Send,
     {
-        let cell = self.get_or_create_cell(key);
+        // Clone the TaArc - the async block owns this clone
+        let inner = self.inner.clone();
+        let cell = Self::get_or_create_cell(&inner, key);
         let owned_key = key.to_owned();
-        let mapping = Arc::clone(&self.mapping);
         async move {
             let result = cell.get_or_init(func()).await.clone();
             drop(cell); // Release our Arc before cleanup check
             // Remove entry if no one else is using it (weak can't upgrade)
-            mapping.remove_if(owned_key.borrow(), |_, weak| weak.upgrade().is_none());
+            inner.remove_if(owned_key.borrow(), |_, weak| weak.upgrade().is_none());
             result
         }
     }
 
     /// Gets an existing `OnceCell` for the key, or creates a new one.
-    fn get_or_create_cell<Q>(&self, key: &Q) -> Arc<OnceCell<T>>
+    fn get_or_create_cell<Q>(map: &DashMap<K, Weak<OnceCell<T>>>, key: &Q) -> Arc<OnceCell<T>>
     where
         K: Borrow<Q>,
         Q: Hash + Eq + ToOwned<Owned = K> + ?Sized,
     {
         // Fast path: check if entry exists and is still valid
-        if let Some(entry) = self.mapping.get(key)
+        if let Some(entry) = map.get(key)
             && let Some(cell) = entry.value().upgrade()
         {
             return cell;
@@ -207,7 +303,7 @@ where
         let weak = Arc::downgrade(&cell);
 
         // Use Entry enum to atomically check-and-return or insert
-        match self.mapping.entry(key.to_owned()) {
+        match map.entry(key.to_owned()) {
             Occupied(mut entry) => {
                 // Entry exists - check if still alive
                 if let Some(existing) = entry.get().upgrade() {
diff --git a/crates/uniflight/tests/work.rs b/crates/uniflight/tests/work.rs
index c617fb09..acb450f1 100644
--- a/crates/uniflight/tests/work.rs
+++ b/crates/uniflight/tests/work.rs
@@ -1,7 +1,7 @@
 // Copyright (c) Microsoft Corporation.
 // Licensed under the MIT License.
 
-//! Integration tests for `UniFlight::work()`.
+//! Integration tests for [`Merger::work()`].
 
 use std::{
     sync::{
@@ -175,8 +175,7 @@ async fn leader_panic_in_spawned_task() {
     });
 
     // Wait for the spawned task to panic
-    let spawn_result = handle.await;
-    assert!(spawn_result.is_err());
+    handle.await.unwrap_err();
 
     // The follower should succeed - Rust's drop semantics ensure the mutex is released
     let result = fut_follower.await;
@@ -189,21 +188,70 @@ async fn debug_impl() {
     let group: Merger<String, String> = Merger::new();
 
     // Test Debug on empty group
-    let debug_str = format!("{:?}", group);
+    let debug_str = format!("{group:?}");
     assert!(debug_str.contains("Merger"));
 
-    // Create a pending work item to populate the mapping with a BroadcastOnce
+    // Create a pending work item to populate the mapping
     let fut = group.work("key", || async {
         tokio::time::sleep(Duration::from_millis(100)).await;
         "Result".to_string()
     });
 
     // Debug should still work with entries in the mapping
-    let debug_str = format!("{:?}", group);
+    let debug_str = format!("{group:?}");
     assert!(debug_str.contains("Merger"));
-    // The mapping is a DashMap
+    // The inner storage is a DashMap
     assert!(debug_str.contains("DashMap"));
 
     // Complete the work
     assert_eq!(fut.await, "Result");
 }
+
+#[tokio::test]
+async fn per_numa_strategy() {
+    use uniflight::PerNuma;
+
+    let group: Merger<String, String, PerNuma> = Merger::new_per_numa();
+    let result = group
+        .work("key", || async { "Result".to_string() })
+        .await;
+    assert_eq!(result, "Result");
+}
+
+#[tokio::test]
+async fn per_core_strategy() {
+    use uniflight::PerCore;
+
+    let group: Merger<String, String, PerCore> = Merger::new_per_core();
+    let result = group
+        .work("key", || async { "Result".to_string() })
+        .await;
+    assert_eq!(result, "Result");
+}
+
+#[tokio::test]
+async fn clone_shares_state() {
+    let group1 = Merger::new();
+    let group2 = group1.clone();
+
+    let call_counter = AtomicUsize::default();
+
+    // Start work on clone 1
+    let fut1 = group1.work("key", || async {
+        tokio::time::sleep(Duration::from_millis(50)).await;
+        call_counter.fetch_add(1, AcqRel);
+        "Result".to_string()
+    });
+
+    // Clone 2 should join the same work
+    let fut2 = group2.work("key", || async {
+        call_counter.fetch_add(1, AcqRel);
+        "Unreachable".to_string()
+    });
+
+    let (r1, r2) = tokio::join!(fut1, fut2);
+    assert_eq!(r1, "Result");
+    assert_eq!(r2, "Result");
+    // Work should only execute once
+    assert_eq!(call_counter.load(Acquire), 1);
+}

From aeb1cb1a28e9ae6ba22a1a9d7915eb90dc0b62de Mon Sep 17 00:00:00 2001
From: Schuyler Goodman <schgoo@microsoft.com>
Date: Tue, 13 Jan 2026 17:16:36 -0500
Subject: [PATCH 16/33] Fix merge issue

---
 Cargo.toml | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 6b68a37f..d362f736 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -54,11 +54,7 @@ criterion = { version = "0.7.0", default-features = false }
 dashmap = { version = "6.1", default-features = false }
 derive_more = { version = "2.0.1", default-features = false }
 duct = { version = "1.1.1", default-features = false }
-<<<<<<< HEAD
-event-listener = { version = "5.4.0", default-features = false }
-=======
 dynosaur = { version = "0.3.0", default-features = false }
->>>>>>> ac521838007a2f9da47dd02dc2edbe3cbd0cbc18
 futures = { version = "0.3.31", default-features = false }
 futures-core = { version = "0.3.31", default-features = false }
 futures-util = { version = "0.3.31", default-features = false }

From 4cc0a76d27130c743783471ad75970007b6fab1a Mon Sep 17 00:00:00 2001
From: Schuyler Goodman <schgoo@microsoft.com>
Date: Tue, 13 Jan 2026 17:38:37 -0500
Subject: [PATCH 17/33] Fix external types check

---
 crates/uniflight/Cargo.toml | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/crates/uniflight/Cargo.toml b/crates/uniflight/Cargo.toml
index 76c8d938..2d336025 100644
--- a/crates/uniflight/Cargo.toml
+++ b/crates/uniflight/Cargo.toml
@@ -16,6 +16,15 @@ license.workspace = true
 homepage.workspace = true
 repository.workspace = true
 
+[package.metadata.cargo_check_external_types]
+allowed_external_types = [
+    "thread_aware::cell::builtin::PerCore",
+    "thread_aware::cell::builtin::PerNuma",
+    "thread_aware::cell::builtin::PerProcess",
+    "thread_aware::cell::storage::Strategy",
+    "thread_aware::core::ThreadAware",
+]
+
 [dependencies]
 async-once-cell.workspace = true
 dashmap.workspace = true

From 74cbc11fdadb79088cb176190c9aa464e70d5dcf Mon Sep 17 00:00:00 2001
From: Schuyler Goodman <schgoo@microsoft.com>
Date: Tue, 13 Jan 2026 17:54:39 -0500
Subject: [PATCH 18/33] Fix pr issues

---
 .spelling                   |   7 ++-
 crates/uniflight/README.md  | 116 ++++++++++++++++++++++++------------
 crates/uniflight/src/lib.rs |   2 +-
 3 files changed, 86 insertions(+), 39 deletions(-)

diff --git a/.spelling b/.spelling
index 447f9019..8c54a07a 100644
--- a/.spelling
+++ b/.spelling
@@ -1,4 +1,4 @@
-299
+304
 →
 0.X.Y
 100k
@@ -37,6 +37,7 @@ btree_map
 buildable
 bytesbuf
 callee
+cancelled
 Cargo.toml
 C-BITFLAG
 C-CONV
@@ -80,6 +81,8 @@ C-SERDE
 C-SMART-PTR
 deallocate
 Debuggability
+Deduplicate
+deduplicating
 deduplication
 deque
 Deque
@@ -271,6 +274,8 @@ unconfigured
 uncontended
 unhandleable
 unicode
+uniflight
+uniflight's
 Uninit
 unordered
 unredacted
diff --git a/crates/uniflight/README.md b/crates/uniflight/README.md
index 09d79345..211920ca 100644
--- a/crates/uniflight/README.md
+++ b/crates/uniflight/README.md
@@ -1,28 +1,23 @@
 <div align="center">
- <img src="./logo.png" alt="Uniflight Logo" width="128">
+ <img src="./logo.png" alt="Uniflight Logo" width="96">
 
 # Uniflight
 
 [![crate.io](https://img.shields.io/crates/v/uniflight.svg)](https://crates.io/crates/uniflight)
 [![docs.rs](https://docs.rs/uniflight/badge.svg)](https://docs.rs/uniflight)
 [![MSRV](https://img.shields.io/crates/msrv/uniflight)](https://crates.io/crates/uniflight)
-[![CI](https://github.com/microsoft/oxidizer/workflows/main/badge.svg)](https://github.com/microsoft/oxidizer/actions)
+[![CI](https://github.com/microsoft/oxidizer/actions/workflows/main.yml/badge.svg?event=push)](https://github.com/microsoft/oxidizer/actions/workflows/main.yml)
 [![Coverage](https://codecov.io/gh/microsoft/oxidizer/graph/badge.svg?token=FCUG0EL5TI)](https://codecov.io/gh/microsoft/oxidizer)
-[![License](https://img.shields.io/badge/license-MIT-blue.svg)](../LICENSE)
+[![License](https://img.shields.io/badge/license-MIT-blue.svg)](../../LICENSE)
+<a href="../.."><img src="../../logo.svg" alt="This crate was developed as part of the Oxidizer project" width="20"></a>
 
 </div>
 
-* [Summary](#summary)
-
-## Summary
-
-<!-- cargo-rdme start -->
-
 Coalesces duplicate async tasks into a single execution.
 
-This crate provides [`Merger`], a mechanism for deduplicating concurrent async operations.
+This crate provides [`Merger`][__link0], a mechanism for deduplicating concurrent async operations.
 When multiple tasks request the same work (identified by a key), only the first task (the
-"leader") performs the actual work while subsequent tasks (the "followers") wait and receive
+“leader”) performs the actual work while subsequent tasks (the “followers”) wait and receive
 a clone of the result.
 
 ## When to Use
@@ -30,54 +25,101 @@ a clone of the result.
 Use `Merger` when you have expensive or rate-limited operations that may be requested
 concurrently with the same parameters:
 
-- **Cache population**: Prevent thundering herd when a cache entry expires
-- **API calls**: Deduplicate concurrent requests to the same endpoint
-- **Database queries**: Coalesce identical queries issued simultaneously
-- **File I/O**: Avoid reading the same file multiple times concurrently
+* **Cache population**: Prevent thundering herd when a cache entry expires
+* **API calls**: Deduplicate concurrent requests to the same endpoint
+* **Database queries**: Coalesce identical queries issued simultaneously
+* **File I/O**: Avoid reading the same file multiple times concurrently
 
 ## Example
 
 ```rust
 use uniflight::Merger;
 
-let group: Merger<&str, String> = Merger::new();
+let group: Merger<String, String> = Merger::new();
 
-// Multiple concurrent calls with the same key will share a single execution
+// Multiple concurrent calls with the same key will share a single execution.
+// Note: you can pass &str directly when the key type is String.
 let result = group.work("user:123", || async {
     // This expensive operation runs only once, even if called concurrently
     "expensive_result".to_string()
 }).await;
 ```
 
-## Cancellation and Panic Safety
+## Flexible Key Types
 
-`Merger` handles task cancellation and panics gracefully:
+The [`Merger::work`][__link1] method accepts keys using [`Borrow`][__link2] semantics, allowing you to pass
+borrowed forms of the key type. For example, with `Merger<String, T>`, you can pass `&str`
+directly without allocating:
 
-- If the leader task is cancelled or dropped, a follower becomes the new leader
-- If the leader task panics, a follower becomes the new leader and executes its work
-- Followers that join before the leader completes receive the cached result
+```rust
+let merger: Merger<String, i32> = Merger::new();
 
-## Thread Safety
+// Pass &str directly - no need to call .to_string()
+merger.work("my-key", || async { 42 }).await;
+```
 
-[`Merger`] is `Send` and `Sync`, and can be shared across threads. The returned futures
-are `Send` when the closure, future, key, and value types are `Send`.
+## Thread-Aware Scoping
 
-## Performance
+`Merger` supports thread-aware scoping via a [`Strategy`][__link3]
+type parameter. This controls how the internal state is partitioned across threads/NUMA nodes:
 
-Benchmarks comparing `uniflight` against `singleflight-async` show the following characteristics:
+* [`PerProcess`][__link4] (default): Single global state, maximum deduplication
+* [`PerNuma`][__link5]: Separate state per NUMA node, NUMA-local memory access
+* [`PerCore`][__link6]: Separate state per core, no deduplication (useful for already-partitioned work)
 
-- **Concurrent workloads** (10+ tasks): uniflight is 1.2-1.3x faster, demonstrating better scalability under contention
-- **Single calls**: singleflight-async has lower per-call overhead (~2x faster for individual operations)
-- **Multiple keys**: uniflight performs 1.3x faster when handling multiple distinct keys concurrently
+```rust
+use uniflight::{Merger, PerNuma};
 
-uniflight's DashMap-based architecture provides excellent scaling properties for high-concurrency scenarios,
-making it well-suited for production workloads with concurrent access patterns. For low-contention scenarios
-with predominantly single calls, the performance difference is minimal (sub-microsecond range).
+// NUMA-aware merger - each NUMA node gets its own deduplication scope
+let merger: Merger<String, String, PerNuma> = Merger::new_per_numa();
+```
 
-<!-- cargo-rdme end -->
+## Cancellation and Panic Safety
 
-<div style="font-size: 75%" ><hr/>
+`Merger` handles task cancellation and panics gracefully:
 
-This crate was developed as part of [The Oxidizer Project](https://github.com/microsoft/oxidizer).
+* If the leader task is cancelled or dropped, a follower becomes the new leader
+* If the leader task panics, a follower becomes the new leader and executes its work
+* Followers that join before the leader completes receive the cached result
 
-</div>
+## Memory Management
+
+Completed entries are automatically removed from the internal map when the last caller
+finishes. This ensures no stale entries accumulate over time.
+
+## Thread Safety
+
+[`Merger`][__link7] is `Send` and `Sync`, and can be shared across threads. The returned futures
+are `Send` when the closure, future, key, and value types are `Send`.
+
+## Performance
+
+Benchmarks comparing `uniflight` against `singleflight-async`:
+
+|Benchmark|uniflight|singleflight-async|Winner|
+|---------|---------|------------------|------|
+|Single call|723 ns|664 ns|~equal|
+|10 concurrent tasks|50 µs|56 µs|uniflight 1.1x|
+|100 concurrent tasks|177 µs|190 µs|uniflight 1.1x|
+|10 keys × 10 tasks|176 µs|230 µs|uniflight 1.3x|
+|Sequential reuse|757 ns|1.0 µs|uniflight 1.3x|
+
+uniflight’s `DashMap`-based architecture scales well under contention, making it
+well-suited for high-concurrency workloads. For single-call scenarios, both libraries
+perform similarly (sub-microsecond).
+
+
+<hr/>
+<sub>
+This crate was developed as part of <a href="../..">The Oxidizer Project</a>. Browse this crate's <a href="https://github.com/microsoft/oxidizer/tree/main/crates/uniflight">source code</a>.
+</sub>
+
+ [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEGy4k8ldDFPOhG2VNeXtD5nnKG6EPY6OfW5wBG8g18NOFNdxpYXKEG4rQxRqzWASPG-1iTT3PvBVBG6_OsurRzxT0GwQvs0pj2ro6YWSCgmx0aHJlYWRfYXdhcmVlMC42LjCCaXVuaWZsaWdodGUwLjEuMA
+ [__link0]: https://docs.rs/uniflight/0.1.0/uniflight/struct.Merger.html
+ [__link1]: https://docs.rs/uniflight/0.1.0/uniflight/?search=Merger::work
+ [__link2]: https://doc.rust-lang.org/stable/std/?search=borrow::Borrow
+ [__link3]: https://docs.rs/thread_aware/0.6.0/thread_aware/?search=storage::Strategy
+ [__link4]: https://docs.rs/thread_aware/0.6.0/thread_aware/?search=PerProcess
+ [__link5]: https://docs.rs/thread_aware/0.6.0/thread_aware/?search=PerNuma
+ [__link6]: https://docs.rs/thread_aware/0.6.0/thread_aware/?search=PerCore
+ [__link7]: https://docs.rs/uniflight/0.1.0/uniflight/struct.Merger.html
diff --git a/crates/uniflight/src/lib.rs b/crates/uniflight/src/lib.rs
index 7796935c..5cb5392d 100644
--- a/crates/uniflight/src/lib.rs
+++ b/crates/uniflight/src/lib.rs
@@ -57,7 +57,7 @@
 //!
 //! # Thread-Aware Scoping
 //!
-//! `Merger` supports thread-aware scoping via a [`Strategy`](thread_aware::storage::Strategy)
+//! `Merger` supports thread-aware scoping via a [`Strategy`]
 //! type parameter. This controls how the internal state is partitioned across threads/NUMA nodes:
 //!
 //! - [`PerProcess`] (default): Single global state, maximum deduplication

From da0eebeb3b23c494db3228892dc270a32bb92f39 Mon Sep 17 00:00:00 2001
From: Schuyler Goodman <schgoo@microsoft.com>
Date: Tue, 13 Jan 2026 18:49:37 -0500
Subject: [PATCH 19/33] Fix pr issues

---
 Cargo.toml                     |  2 +-
 crates/uniflight/README.md     | 32 +++--------
 crates/uniflight/src/lib.rs    | 98 ++++++++++++++++++++++++++++------
 crates/uniflight/tests/work.rs | 35 ++++++------
 4 files changed, 109 insertions(+), 58 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index d362f736..93571f75 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -45,8 +45,8 @@ uniflight = { path = "crates/uniflight", default-features = false, version = "0.
 
 # external dependencies
 alloc_tracker = { version = "0.5.9", default-features = false }
-async-once-cell = { version = "0.5", default-features = false }
 anyhow = { version = "1.0.100", default-features = false }
+async-once-cell = { version = "0.5", default-features = false }
 bytes = { version = "1.11.0", default-features = false }
 chrono = { version = "0.4.40", default-features = false }
 chrono-tz = { version = "0.10.4", default-features = false }
diff --git a/crates/uniflight/README.md b/crates/uniflight/README.md
index 211920ca..55d45f17 100644
--- a/crates/uniflight/README.md
+++ b/crates/uniflight/README.md
@@ -1,17 +1,4 @@
-<div align="center">
- <img src="./logo.png" alt="Uniflight Logo" width="96">
-
-# Uniflight
-
-[![crate.io](https://img.shields.io/crates/v/uniflight.svg)](https://crates.io/crates/uniflight)
-[![docs.rs](https://docs.rs/uniflight/badge.svg)](https://docs.rs/uniflight)
-[![MSRV](https://img.shields.io/crates/msrv/uniflight)](https://crates.io/crates/uniflight)
-[![CI](https://github.com/microsoft/oxidizer/actions/workflows/main.yml/badge.svg?event=push)](https://github.com/microsoft/oxidizer/actions/workflows/main.yml)
-[![Coverage](https://codecov.io/gh/microsoft/oxidizer/graph/badge.svg?token=FCUG0EL5TI)](https://codecov.io/gh/microsoft/oxidizer)
-[![License](https://img.shields.io/badge/license-MIT-blue.svg)](../../LICENSE)
-<a href="../.."><img src="../../logo.svg" alt="This crate was developed as part of the Oxidizer project" width="20"></a>
-
-</div>
+# uniflight ![License: MIT](https://img.shields.io/badge/license-MIT-blue) [![uniflight on crates.io](https://img.shields.io/crates/v/uniflight)](https://crates.io/crates/uniflight) [![uniflight on docs.rs](https://docs.rs/uniflight/badge.svg)](https://docs.rs/uniflight) [![Source Code Repository](https://img.shields.io/badge/Code-On%20GitHub-blue?logo=GitHub)](https://github.com/microsoft/oxidizer) ![Rust Version: 1.88.0](https://img.shields.io/badge/rustc-1.88.0-orange.svg)
 
 Coalesces duplicate async tasks into a single execution.
 
@@ -98,23 +85,18 @@ Benchmarks comparing `uniflight` against `singleflight-async`:
 
 |Benchmark|uniflight|singleflight-async|Winner|
 |---------|---------|------------------|------|
-|Single call|723 ns|664 ns|~equal|
-|10 concurrent tasks|50 µs|56 µs|uniflight 1.1x|
-|100 concurrent tasks|177 µs|190 µs|uniflight 1.1x|
-|10 keys × 10 tasks|176 µs|230 µs|uniflight 1.3x|
-|Sequential reuse|757 ns|1.0 µs|uniflight 1.3x|
+|Single call|777 ns|691 ns|~equal|
+|10 concurrent tasks|58 µs|57 µs|~equal|
+|100 concurrent tasks|218 µs|219 µs|~equal|
+|10 keys × 10 tasks|186 µs|270 µs|uniflight 1.4x|
+|Sequential reuse|799 ns|759 ns|~equal|
 
 uniflight’s `DashMap`-based architecture scales well under contention, making it
 well-suited for high-concurrency workloads. For single-call scenarios, both libraries
 perform similarly (sub-microsecond).
 
 
-<hr/>
-<sub>
-This crate was developed as part of <a href="../..">The Oxidizer Project</a>. Browse this crate's <a href="https://github.com/microsoft/oxidizer/tree/main/crates/uniflight">source code</a>.
-</sub>
-
- [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEGy4k8ldDFPOhG2VNeXtD5nnKG6EPY6OfW5wBG8g18NOFNdxpYXKEG4rQxRqzWASPG-1iTT3PvBVBG6_OsurRzxT0GwQvs0pj2ro6YWSCgmx0aHJlYWRfYXdhcmVlMC42LjCCaXVuaWZsaWdodGUwLjEuMA
+ [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEG_W_Gn_kaocAGwCcVPfenh7eGy6gYLEwyIe4G6-xw_FwcbpjYXKEG4LesgjATyplG19XnTih3LPaG769pKvyUwBfG7yCiQP8BUYRYWSCgmx0aHJlYWRfYXdhcmVlMC42LjCCaXVuaWZsaWdodGUwLjEuMA
  [__link0]: https://docs.rs/uniflight/0.1.0/uniflight/struct.Merger.html
  [__link1]: https://docs.rs/uniflight/0.1.0/uniflight/?search=Merger::work
  [__link2]: https://doc.rust-lang.org/stable/std/?search=borrow::Borrow
diff --git a/crates/uniflight/src/lib.rs b/crates/uniflight/src/lib.rs
index 5cb5392d..48dbbebf 100644
--- a/crates/uniflight/src/lib.rs
+++ b/crates/uniflight/src/lib.rs
@@ -97,11 +97,11 @@
 //!
 //! | Benchmark | uniflight | singleflight-async | Winner |
 //! |-----------|-----------|-------------------|--------|
-//! | Single call | 723 ns | 664 ns | ~equal |
-//! | 10 concurrent tasks | 50 µs | 56 µs | uniflight 1.1x |
-//! | 100 concurrent tasks | 177 µs | 190 µs | uniflight 1.1x |
-//! | 10 keys × 10 tasks | 176 µs | 230 µs | uniflight 1.3x |
-//! | Sequential reuse | 757 ns | 1.0 µs | uniflight 1.3x |
+//! | Single call | 777 ns | 691 ns | ~equal |
+//! | 10 concurrent tasks | 58 µs | 57 µs | ~equal |
+//! | 100 concurrent tasks | 218 µs | 219 µs | ~equal |
+//! | 10 keys × 10 tasks | 186 µs | 270 µs | uniflight 1.4x |
+//! | Sequential reuse | 799 ns | 759 ns | ~equal |
 //!
 //! uniflight's `DashMap`-based architecture scales well under contention, making it
 //! well-suited for high-concurrency workloads. For single-call scenarios, both libraries
@@ -169,14 +169,33 @@ where
     }
 }
 
-impl<K, T> Merger<K, T, PerProcess>
+impl<K, T, S> Merger<K, T, S>
 where
     K: Hash + Eq + Send + Sync + 'static,
     T: Clone + Send + Sync + 'static,
+    S: Strategy + Send + Sync,
 {
-    /// Creates a new `Merger` instance with process-wide (global) scope.
+    /// Creates a new `Merger` instance.
+    ///
+    /// The scoping strategy is determined by the type parameter `S`:
+    /// - [`PerProcess`] (default): Process-wide scope, maximum deduplication
+    /// - [`PerNuma`]: Per-NUMA-node scope, NUMA-local memory access
+    /// - [`PerCore`]: Per-core scope, no cross-core deduplication
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use uniflight::{Merger, PerNuma, PerCore};
+    ///
+    /// // Default (PerProcess) - type can be inferred
+    /// let global: Merger<String, String> = Merger::new();
+    ///
+    /// // NUMA-local scope
+    /// let numa: Merger<String, String, PerNuma> = Merger::new();
     ///
-    /// This is the default strategy providing maximum deduplication across all threads.
+    /// // Per-core scope
+    /// let core: Merger<String, String, PerCore> = Merger::new();
+    /// ```
     #[inline]
     #[must_use]
     pub fn new() -> Self {
@@ -184,19 +203,55 @@ where
     }
 }
 
+impl<K, T> Merger<K, T, PerProcess>
+where
+    K: Hash + Eq + Send + Sync + 'static,
+    T: Clone + Send + Sync + 'static,
+{
+    /// Creates a new `Merger` with process-wide scoping (default).
+    ///
+    /// All threads share a single deduplication scope, providing maximum
+    /// work deduplication across the entire process.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use uniflight::Merger;
+    ///
+    /// let merger = Merger::<String, String, _>::new_per_process();
+    /// ```
+    #[inline]
+    #[must_use]
+    pub fn new_per_process() -> Self {
+        Self {
+            inner: TaArc::new(DashMap::new),
+        }
+    }
+}
+
 impl<K, T> Merger<K, T, PerNuma>
 where
     K: Hash + Eq + Send + Sync + 'static,
     T: Clone + Send + Sync + 'static,
 {
-    /// Creates a new `Merger` instance with per-NUMA-node scope.
+    /// Creates a new `Merger` with per-NUMA-node scoping.
+    ///
+    /// Each NUMA node gets its own deduplication scope, ensuring memory
+    /// locality for cached results while still deduplicating within each node.
     ///
-    /// Each NUMA node gets its own deduplication scope, keeping all memory accesses
-    /// local to the node for better performance on multi-socket systems.
+    /// # Example
+    ///
+    /// ```
+    /// use uniflight::Merger;
+    ///
+    /// let merger = Merger::<String, String, _>::new_per_numa();
+    /// ```
     #[inline]
     #[must_use]
     pub fn new_per_numa() -> Self {
-        Self::default()
+        Self {
+            inner: TaArc::new(DashMap::new),
+        }
     }
 }
 
@@ -205,15 +260,24 @@ where
     K: Hash + Eq + Send + Sync + 'static,
     T: Clone + Send + Sync + 'static,
 {
-    /// Creates a new `Merger` instance with per-core scope.
+    /// Creates a new `Merger` with per-core scoping.
+    ///
+    /// Each core gets its own deduplication scope. This is useful when work
+    /// is already partitioned by core and cross-core deduplication is not needed.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use uniflight::Merger;
     ///
-    /// Each core gets its own deduplication scope. This provides no cross-core
-    /// deduplication but eliminates all contention. Useful when work is already
-    /// partitioned by core.
+    /// let merger = Merger::<String, String, _>::new_per_core();
+    /// ```
     #[inline]
     #[must_use]
     pub fn new_per_core() -> Self {
-        Self::default()
+        Self {
+            inner: TaArc::new(DashMap::new),
+        }
     }
 }
 
diff --git a/crates/uniflight/tests/work.rs b/crates/uniflight/tests/work.rs
index acb450f1..efc7bbe5 100644
--- a/crates/uniflight/tests/work.rs
+++ b/crates/uniflight/tests/work.rs
@@ -23,7 +23,7 @@ fn unreachable_future() -> std::future::Pending<String> {
 
 #[tokio::test]
 async fn direct_call() {
-    let group = Merger::new();
+    let group = Merger::<String, String, _>::new_per_process();
     let result = group
         .work("key", || async {
             tokio::time::sleep(Duration::from_millis(10)).await;
@@ -37,7 +37,7 @@ async fn direct_call() {
 async fn parallel_call() {
     let call_counter = AtomicUsize::default();
 
-    let group = Merger::new();
+    let group = Merger::<String, String, _>::new_per_process();
     let futures = FuturesUnordered::new();
     for _ in 0..10 {
         futures.push(group.work("key", || async {
@@ -55,7 +55,7 @@ async fn parallel_call() {
 async fn parallel_call_seq_await() {
     let call_counter = AtomicUsize::default();
 
-    let group = Merger::new();
+    let group = Merger::<String, String, _>::new_per_process();
     let mut futures = Vec::new();
     for _ in 0..10 {
         futures.push(group.work("key", || async {
@@ -73,7 +73,7 @@ async fn parallel_call_seq_await() {
 
 #[tokio::test]
 async fn call_with_static_str_key() {
-    let group = Merger::new();
+    let group = Merger::<String, String, _>::new_per_process();
     let result = group
         .work("key", || async {
             tokio::time::sleep(Duration::from_millis(1)).await;
@@ -85,7 +85,7 @@ async fn call_with_static_str_key() {
 
 #[tokio::test]
 async fn call_with_static_string_key() {
-    let group = Merger::new();
+    let group = Merger::<String, String, _>::new_per_process();
     let result = group
         .work("key", || async {
             tokio::time::sleep(Duration::from_millis(1)).await;
@@ -99,7 +99,7 @@ async fn call_with_static_string_key() {
 async fn call_with_custom_key() {
     #[derive(Clone, PartialEq, Eq, Hash)]
     struct K(i32);
-    let group = Merger::new();
+    let group = Merger::<K, String, _>::new_per_process();
     let result = group
         .work(&K(1), || async {
             tokio::time::sleep(Duration::from_millis(1)).await;
@@ -111,7 +111,7 @@ async fn call_with_custom_key() {
 
 #[tokio::test]
 async fn late_wait() {
-    let group = Merger::new();
+    let group = Merger::<String, String, _>::new_per_process();
     let fut_early = group.work("key", || async {
         tokio::time::sleep(Duration::from_millis(20)).await;
         "Result".to_string()
@@ -124,7 +124,7 @@ async fn late_wait() {
 
 #[tokio::test]
 async fn cancel() {
-    let group = Merger::new();
+    let group = Merger::<String, String, _>::new_per_process();
 
     // the executer cancelled and the other awaiter will create a new future and execute.
     let fut_cancel = group.work(&"key".to_string(), unreachable_future);
@@ -208,10 +208,17 @@ async fn debug_impl() {
 }
 
 #[tokio::test]
-async fn per_numa_strategy() {
-    use uniflight::PerNuma;
+async fn per_process_strategy() {
+    let group = Merger::<String, String, _>::new_per_process();
+    let result = group
+        .work("key", || async { "Result".to_string() })
+        .await;
+    assert_eq!(result, "Result");
+}
 
-    let group: Merger<String, String, PerNuma> = Merger::new_per_numa();
+#[tokio::test]
+async fn per_numa_strategy() {
+    let group = Merger::<String, String, _>::new_per_numa();
     let result = group
         .work("key", || async { "Result".to_string() })
         .await;
@@ -220,9 +227,7 @@ async fn per_numa_strategy() {
 
 #[tokio::test]
 async fn per_core_strategy() {
-    use uniflight::PerCore;
-
-    let group: Merger<String, String, PerCore> = Merger::new_per_core();
+    let group = Merger::<String, String, _>::new_per_core();
     let result = group
         .work("key", || async { "Result".to_string() })
         .await;
@@ -231,7 +236,7 @@ async fn per_core_strategy() {
 
 #[tokio::test]
 async fn clone_shares_state() {
-    let group1 = Merger::new();
+    let group1 = Merger::<String, String, _>::new_per_process();
     let group2 = group1.clone();
 
     let call_counter = AtomicUsize::default();

From e0f88e526436b3c11c3e8f2c8b2a4c45fe4aee59 Mon Sep 17 00:00:00 2001
From: Schuyler Goodman <schgoo@microsoft.com>
Date: Wed, 14 Jan 2026 12:30:56 -0500
Subject: [PATCH 20/33] cargo fmt

---
 crates/uniflight/src/lib.rs    | 26 +++++++++++++-------------
 crates/uniflight/tests/work.rs | 12 +++---------
 2 files changed, 16 insertions(+), 22 deletions(-)

diff --git a/crates/uniflight/src/lib.rs b/crates/uniflight/src/lib.rs
index 48dbbebf..dff4d872 100644
--- a/crates/uniflight/src/lib.rs
+++ b/crates/uniflight/src/lib.rs
@@ -118,10 +118,12 @@ use std::{
 };
 
 use async_once_cell::OnceCell;
-use dashmap::{DashMap, Entry::{Occupied, Vacant}};
+use dashmap::{
+    DashMap,
+    Entry::{Occupied, Vacant},
+};
 use thread_aware::{
-    Arc as TaArc,
-    ThreadAware,
+    Arc as TaArc, ThreadAware,
     affinity::{MemoryAffinity, PinnedAffinity},
     storage::Strategy,
 };
@@ -142,17 +144,13 @@ pub struct Merger<K, T, S: Strategy = PerProcess> {
 
 impl<K, T, S: Strategy> Debug for Merger<K, T, S> {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.debug_struct("Merger")
-            .field("inner", &format_args!("DashMap<...>"))
-            .finish()
+        f.debug_struct("Merger").field("inner", &format_args!("DashMap<...>")).finish()
     }
 }
 
 impl<K, T, S: Strategy> Clone for Merger<K, T, S> {
     fn clone(&self) -> Self {
-        Self {
-            inner: self.inner.clone(),
-        }
+        Self { inner: self.inner.clone() }
     }
 }
 
@@ -404,10 +402,12 @@ mod tests {
 
         // Multiple concurrent calls should clean up after all complete
         let futures: Vec<_> = (0..10)
-            .map(|_| group.work("key2", || async {
-                tokio::time::sleep(Duration::from_millis(50)).await;
-                "Result".to_string()
-            }))
+            .map(|_| {
+                group.work("key2", || async {
+                    tokio::time::sleep(Duration::from_millis(50)).await;
+                    "Result".to_string()
+                })
+            })
             .collect();
 
         // While in flight, map should have an entry
diff --git a/crates/uniflight/tests/work.rs b/crates/uniflight/tests/work.rs
index efc7bbe5..56b863c4 100644
--- a/crates/uniflight/tests/work.rs
+++ b/crates/uniflight/tests/work.rs
@@ -210,27 +210,21 @@ async fn debug_impl() {
 #[tokio::test]
 async fn per_process_strategy() {
     let group = Merger::<String, String, _>::new_per_process();
-    let result = group
-        .work("key", || async { "Result".to_string() })
-        .await;
+    let result = group.work("key", || async { "Result".to_string() }).await;
     assert_eq!(result, "Result");
 }
 
 #[tokio::test]
 async fn per_numa_strategy() {
     let group = Merger::<String, String, _>::new_per_numa();
-    let result = group
-        .work("key", || async { "Result".to_string() })
-        .await;
+    let result = group.work("key", || async { "Result".to_string() }).await;
     assert_eq!(result, "Result");
 }
 
 #[tokio::test]
 async fn per_core_strategy() {
     let group = Merger::<String, String, _>::new_per_core();
-    let result = group
-        .work("key", || async { "Result".to_string() })
-        .await;
+    let result = group.work("key", || async { "Result".to_string() }).await;
     assert_eq!(result, "Result");
 }
 

From 575baf0e940383e6a14e937f60f96e1900058641 Mon Sep 17 00:00:00 2001
From: Schuyler Goodman <schgoo@microsoft.com>
Date: Wed, 14 Jan 2026 12:36:34 -0500
Subject: [PATCH 21/33] Fix clippy issues

---
 crates/uniflight/benches/comparison.rs | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/crates/uniflight/benches/comparison.rs b/crates/uniflight/benches/comparison.rs
index 1dda550e..fb9817ad 100644
--- a/crates/uniflight/benches/comparison.rs
+++ b/crates/uniflight/benches/comparison.rs
@@ -3,6 +3,13 @@
 
 //! Benchmarks comparing uniflight against singleflight-async.
 
+#![allow(
+    clippy::items_after_statements,
+    clippy::unwrap_used,
+    missing_docs,
+    reason = "Benchmarks have relaxed requirements"
+)]
+
 use std::sync::{
     Arc,
     atomic::{AtomicU64, Ordering},
@@ -179,7 +186,7 @@ fn bench_multiple_keys(c: &mut Criterion) {
                         let merger = Arc::clone(&merger);
                         (0..10).map(move |_| {
                             let merger = Arc::clone(&merger);
-                            let key = format!("key_{}_{key_id}", iteration);
+                            let key = format!("key_{iteration}_{key_id}");
                             tokio::spawn(async move { merger.work(&key, || async { "value".to_string() }).await })
                         })
                     })
@@ -204,7 +211,7 @@ fn bench_multiple_keys(c: &mut Criterion) {
                         let group = Arc::clone(&group);
                         (0..10).map(move |_| {
                             let group = Arc::clone(&group);
-                            let key = format!("key_{}_{key_id}", iteration);
+                            let key = format!("key_{iteration}_{key_id}");
                             tokio::spawn(async move { group.work(key, || async { "value".to_string() }).await })
                         })
                     })

From beaacdb1d0eec073b2750e52fcd3a018ec730632 Mon Sep 17 00:00:00 2001
From: Schuyler Goodman <schgoo@microsoft.com>
Date: Wed, 14 Jan 2026 16:06:47 -0500
Subject: [PATCH 22/33] Fix PR issues

---
 Cargo.lock                  |  1 +
 crates/uniflight/Cargo.toml |  1 +
 crates/uniflight/README.md  | 22 ++++++++++++++++++++--
 crates/uniflight/src/lib.rs | 15 ++++++---------
 4 files changed, 28 insertions(+), 11 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index f003f493..538d823e 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1918,6 +1918,7 @@ dependencies = [
  "criterion",
  "dashmap",
  "futures-util",
+ "mutants",
  "singleflight-async",
  "thread_aware",
  "tick",
diff --git a/crates/uniflight/Cargo.toml b/crates/uniflight/Cargo.toml
index 2d336025..6df4e2da 100644
--- a/crates/uniflight/Cargo.toml
+++ b/crates/uniflight/Cargo.toml
@@ -33,6 +33,7 @@ thread_aware.workspace = true
 [dev-dependencies]
 criterion = { workspace = true, features = ["async_tokio"] }
 futures-util = { workspace = true, features = ["alloc", "std"] }
+mutants.workspace = true
 singleflight-async.workspace = true
 tick = { workspace = true, features = ["tokio"] }
 tokio = { workspace = true, features = [
diff --git a/crates/uniflight/README.md b/crates/uniflight/README.md
index 55d45f17..55c24fb6 100644
--- a/crates/uniflight/README.md
+++ b/crates/uniflight/README.md
@@ -1,4 +1,17 @@
-# uniflight ![License: MIT](https://img.shields.io/badge/license-MIT-blue) [![uniflight on crates.io](https://img.shields.io/crates/v/uniflight)](https://crates.io/crates/uniflight) [![uniflight on docs.rs](https://docs.rs/uniflight/badge.svg)](https://docs.rs/uniflight) [![Source Code Repository](https://img.shields.io/badge/Code-On%20GitHub-blue?logo=GitHub)](https://github.com/microsoft/oxidizer) ![Rust Version: 1.88.0](https://img.shields.io/badge/rustc-1.88.0-orange.svg)
+<div align="center">
+ <img src="./logo.png" alt="Uniflight Logo" width="96">
+
+# Uniflight
+
+[![crate.io](https://img.shields.io/crates/v/uniflight.svg)](https://crates.io/crates/uniflight)
+[![docs.rs](https://docs.rs/uniflight/badge.svg)](https://docs.rs/uniflight)
+[![MSRV](https://img.shields.io/crates/msrv/uniflight)](https://crates.io/crates/uniflight)
+[![CI](https://github.com/microsoft/oxidizer/actions/workflows/main.yml/badge.svg?event=push)](https://github.com/microsoft/oxidizer/actions/workflows/main.yml)
+[![Coverage](https://codecov.io/gh/microsoft/oxidizer/graph/badge.svg?token=FCUG0EL5TI)](https://codecov.io/gh/microsoft/oxidizer)
+[![License](https://img.shields.io/badge/license-MIT-blue.svg)](../../LICENSE)
+<a href="../.."><img src="../../logo.svg" alt="This crate was developed as part of the Oxidizer project" width="20"></a>
+
+</div>
 
 Coalesces duplicate async tasks into a single execution.
 
@@ -96,7 +109,12 @@ well-suited for high-concurrency workloads. For single-call scenarios, both libr
 perform similarly (sub-microsecond).
 
 
- [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEG_W_Gn_kaocAGwCcVPfenh7eGy6gYLEwyIe4G6-xw_FwcbpjYXKEG4LesgjATyplG19XnTih3LPaG769pKvyUwBfG7yCiQP8BUYRYWSCgmx0aHJlYWRfYXdhcmVlMC42LjCCaXVuaWZsaWdodGUwLjEuMA
+<hr/>
+<sub>
+This crate was developed as part of <a href="../..">The Oxidizer Project</a>. Browse this crate's <a href="https://github.com/microsoft/oxidizer/tree/main/crates/uniflight">source code</a>.
+</sub>
+
+ [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEGy4k8ldDFPOhG2VNeXtD5nnKG6EPY6OfW5wBG8g18NOFNdxpYXKEG4LesgjATyplG19XnTih3LPaG769pKvyUwBfG7yCiQP8BUYRYWSCgmx0aHJlYWRfYXdhcmVlMC42LjCCaXVuaWZsaWdodGUwLjEuMA
  [__link0]: https://docs.rs/uniflight/0.1.0/uniflight/struct.Merger.html
  [__link1]: https://docs.rs/uniflight/0.1.0/uniflight/?search=Merger::work
  [__link2]: https://doc.rust-lang.org/stable/std/?search=borrow::Borrow
diff --git a/crates/uniflight/src/lib.rs b/crates/uniflight/src/lib.rs
index dff4d872..6f5b3c11 100644
--- a/crates/uniflight/src/lib.rs
+++ b/crates/uniflight/src/lib.rs
@@ -220,10 +220,9 @@ where
     /// ```
     #[inline]
     #[must_use]
+    #[cfg_attr(test, mutants::skip)] // Equivalent mutant: delegates to Default
     pub fn new_per_process() -> Self {
-        Self {
-            inner: TaArc::new(DashMap::new),
-        }
+        Self::default()
     }
 }
 
@@ -246,10 +245,9 @@ where
     /// ```
     #[inline]
     #[must_use]
+    #[cfg_attr(test, mutants::skip)] // Equivalent mutant: delegates to Default
     pub fn new_per_numa() -> Self {
-        Self {
-            inner: TaArc::new(DashMap::new),
-        }
+        Self::default()
     }
 }
 
@@ -272,10 +270,9 @@ where
     /// ```
     #[inline]
     #[must_use]
+    #[cfg_attr(test, mutants::skip)] // Equivalent mutant: delegates to Default
     pub fn new_per_core() -> Self {
-        Self {
-            inner: TaArc::new(DashMap::new),
-        }
+        Self::default()
     }
 }
 

From bb97d175e053c3e84b9fa96b03abc0fa2148351d Mon Sep 17 00:00:00 2001
From: Schuyler Goodman <schgoo@microsoft.com>
Date: Thu, 15 Jan 2026 13:37:26 -0500
Subject: [PATCH 23/33] Remove excessive comments. Improve code coverage

---
 crates/uniflight/src/lib.rs    | 62 ++++++++++++++++++++++++++++++++++
 crates/uniflight/tests/work.rs |  2 +-
 2 files changed, 63 insertions(+), 1 deletion(-)

diff --git a/crates/uniflight/src/lib.rs b/crates/uniflight/src/lib.rs
index 6f5b3c11..2e31b93c 100644
--- a/crates/uniflight/src/lib.rs
+++ b/crates/uniflight/src/lib.rs
@@ -358,6 +358,19 @@ where
         }
 
         // Slow path: need to insert or replace expired entry
+        Self::insert_or_get_existing(map, key)
+    }
+
+    /// Inserts a new cell or returns an existing live cell (handling races).
+    ///
+    /// This is the slow path of `get_or_create_cell`, separated for testability.
+    /// It handles the case where another thread may have inserted a cell between
+    /// our fast-path check and this insertion attempt.
+    fn insert_or_get_existing<Q>(map: &DashMap<K, Weak<OnceCell<T>>>, key: &Q) -> Arc<OnceCell<T>>
+    where
+        K: Borrow<Q>,
+        Q: Hash + Eq + ToOwned<Owned = K> + ?Sized,
+    {
         let cell = Arc::new(OnceCell::new());
         let weak = Arc::downgrade(&cell);
 
@@ -386,6 +399,55 @@ where
 mod tests {
     use super::*;
     use std::time::Duration;
+    use thread_aware::affinity::pinned_affinities;
+
+    #[test]
+    fn relocated_delegates_to_inner() {
+        let affinities = pinned_affinities(&[2]);
+        let source = affinities[0].into();
+        let destination = affinities[1];
+
+        let merger: Merger<String, String> = Merger::new();
+        let relocated = merger.relocated(source, destination);
+
+        // Verify the relocated merger still works
+        assert!(relocated.is_empty());
+    }
+
+    #[test]
+    fn fast_path_returns_existing() {
+        let map: DashMap<String, Weak<OnceCell<String>>> = DashMap::new();
+        let existing_cell = Arc::new(OnceCell::new());
+        map.insert("key".to_string(), Arc::downgrade(&existing_cell));
+
+        let result = Merger::<String, String>::get_or_create_cell(&map, "key");
+
+        assert!(Arc::ptr_eq(&result, &existing_cell));
+    }
+
+    #[test]
+    fn replaces_expired_entry() {
+        let map: DashMap<String, Weak<OnceCell<String>>> = DashMap::new();
+        let expired_weak = Arc::downgrade(&Arc::new(OnceCell::<String>::new()));
+        map.insert("key".to_string(), expired_weak);
+
+        let result = Merger::<String, String>::get_or_create_cell(&map, "key");
+
+        let entry = map.get("key").unwrap();
+        assert!(Arc::ptr_eq(&result, &entry.value().upgrade().unwrap()));
+    }
+
+    /// Simulates a race where another thread inserted between fast-path check and entry().
+    #[test]
+    fn race_returns_existing() {
+        let map: DashMap<String, Weak<OnceCell<String>>> = DashMap::new();
+        let other_cell = Arc::new(OnceCell::new());
+        map.insert("key".to_string(), Arc::downgrade(&other_cell));
+
+        let result = Merger::<String, String>::insert_or_get_existing(&map, "key");
+
+        assert!(Arc::ptr_eq(&result, &other_cell));
+    }
 
     #[tokio::test]
     async fn cleanup_after_completion() {
diff --git a/crates/uniflight/tests/work.rs b/crates/uniflight/tests/work.rs
index 56b863c4..f36f89d4 100644
--- a/crates/uniflight/tests/work.rs
+++ b/crates/uniflight/tests/work.rs
@@ -126,7 +126,7 @@ async fn late_wait() {
 async fn cancel() {
     let group = Merger::<String, String, _>::new_per_process();
 
-    // the executer cancelled and the other awaiter will create a new future and execute.
+    // The executor was cancelled; the other awaiter will create a new future and execute.
     let fut_cancel = group.work(&"key".to_string(), unreachable_future);
     let _ = tokio::time::timeout(Duration::from_millis(10), fut_cancel).await;
     let fut_late = group.work("key", || async { "Result2".to_string() });

From 693d224441674b1739906504b69ca0bc9383f470 Mon Sep 17 00:00:00 2001
From: Schuyler Goodman <schgoo@microsoft.com>
Date: Thu, 15 Jan 2026 13:56:28 -0500
Subject: [PATCH 24/33] Use tick::Clock::delay instead of tokio::time::sleep

---
 crates/uniflight/examples/cache_population.rs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/crates/uniflight/examples/cache_population.rs b/crates/uniflight/examples/cache_population.rs
index 6137bdea..25552025 100644
--- a/crates/uniflight/examples/cache_population.rs
+++ b/crates/uniflight/examples/cache_population.rs
@@ -56,7 +56,8 @@ async fn main() {
         handles.push(handle);
 
         // Stagger the requests slightly to see the deduplication in action
-        tokio::time::sleep(Duration::from_millis(10)).await;
+        let clock = Clock::new_tokio();
+        clock.delay(Duration::from_millis(10)).await;
     }
 
     // Wait for all requests to complete

From e6b76f5096c1ea7648a7ea78295cc5963722a07d Mon Sep 17 00:00:00 2001
From: Schuyler Goodman <schgoo@microsoft.com>
Date: Thu, 15 Jan 2026 14:05:52 -0500
Subject: [PATCH 25/33] clippy

---
 crates/uniflight/src/lib.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/uniflight/src/lib.rs b/crates/uniflight/src/lib.rs
index 2e31b93c..5f161f82 100644
--- a/crates/uniflight/src/lib.rs
+++ b/crates/uniflight/src/lib.rs
@@ -437,7 +437,7 @@ mod tests {
         assert!(Arc::ptr_eq(&result, &entry.value().upgrade().unwrap()));
     }
 
-    /// Simulates a race where another thread inserted between fast-path check and entry().
+    /// Simulates a race where another thread inserted between fast-path check and `entry()`.
     #[test]
     fn race_returns_existing() {
         let map: DashMap<String, Weak<OnceCell<String>>> = DashMap::new();

From 7f28cfe26e240cf671b78834dcd4683c90dc9e7b Mon Sep 17 00:00:00 2001
From: Schuyler Goodman <schgoo@microsoft.com>
Date: Thu, 15 Jan 2026 17:27:21 -0500
Subject: [PATCH 26/33] Rename work to execute. Remove unnecessary attribution

---
 crates/uniflight/README.md                    | 10 ++---
 crates/uniflight/benches/comparison.rs        | 10 ++---
 crates/uniflight/examples/cache_population.rs |  2 +-
 crates/uniflight/src/lib.rs                   | 24 +++++------
 crates/uniflight/tests/work.rs                | 42 +++++++++----------
 5 files changed, 42 insertions(+), 46 deletions(-)

diff --git a/crates/uniflight/README.md b/crates/uniflight/README.md
index 55c24fb6..20704ab9 100644
--- a/crates/uniflight/README.md
+++ b/crates/uniflight/README.md
@@ -39,7 +39,7 @@ let group: Merger<String, String> = Merger::new();
 
 // Multiple concurrent calls with the same key will share a single execution.
 // Note: you can pass &str directly when the key type is String.
-let result = group.work("user:123", || async {
+let result = group.execute("user:123", || async {
     // This expensive operation runs only once, even if called concurrently
     "expensive_result".to_string()
 }).await;
@@ -47,7 +47,7 @@ let result = group.work("user:123", || async {
 
 ## Flexible Key Types
 
-The [`Merger::work`][__link1] method accepts keys using [`Borrow`][__link2] semantics, allowing you to pass
+The [`Merger::execute`][__link1] method accepts keys using [`Borrow`][__link2] semantics, allowing you to pass
 borrowed forms of the key type. For example, with `Merger<String, T>`, you can pass `&str`
 directly without allocating:
 
@@ -55,7 +55,7 @@ directly without allocating:
 let merger: Merger<String, i32> = Merger::new();
 
 // Pass &str directly - no need to call .to_string()
-merger.work("my-key", || async { 42 }).await;
+merger.execute("my-key", || async { 42 }).await;
 ```
 
 ## Thread-Aware Scoping
@@ -114,9 +114,9 @@ perform similarly (sub-microsecond).
 This crate was developed as part of <a href="../..">The Oxidizer Project</a>. Browse this crate's <a href="https://github.com/microsoft/oxidizer/tree/main/crates/uniflight">source code</a>.
 </sub>
 
- [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEGy4k8ldDFPOhG2VNeXtD5nnKG6EPY6OfW5wBG8g18NOFNdxpYXKEG4LesgjATyplG19XnTih3LPaG769pKvyUwBfG7yCiQP8BUYRYWSCgmx0aHJlYWRfYXdhcmVlMC42LjCCaXVuaWZsaWdodGUwLjEuMA
+ [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEGy4k8ldDFPOhG2VNeXtD5nnKG6EPY6OfW5wBG8g18NOFNdxpYXKEG04a0viCY92EG7dKIcfT3ii0G7aPoi1gqOy7Gxzt8DWyGmp1YWSCgmx0aHJlYWRfYXdhcmVlMC42LjCCaXVuaWZsaWdodGUwLjEuMA
  [__link0]: https://docs.rs/uniflight/0.1.0/uniflight/struct.Merger.html
- [__link1]: https://docs.rs/uniflight/0.1.0/uniflight/?search=Merger::work
+ [__link1]: https://docs.rs/uniflight/0.1.0/uniflight/?search=Merger::execute
  [__link2]: https://doc.rust-lang.org/stable/std/?search=borrow::Borrow
  [__link3]: https://docs.rs/thread_aware/0.6.0/thread_aware/?search=storage::Strategy
  [__link4]: https://docs.rs/thread_aware/0.6.0/thread_aware/?search=PerProcess
diff --git a/crates/uniflight/benches/comparison.rs b/crates/uniflight/benches/comparison.rs
index fb9817ad..2bdd7f6d 100644
--- a/crates/uniflight/benches/comparison.rs
+++ b/crates/uniflight/benches/comparison.rs
@@ -33,7 +33,7 @@ fn bench_single_call(c: &mut Criterion) {
             let merger = Arc::clone(&our_merger);
             async move {
                 let key = format!("key_{}", COUNTER1.fetch_add(1, Ordering::Relaxed));
-                merger.work(&key, || async { "value".to_string() }).await
+                merger.execute(&key, || async { "value".to_string() }).await
             }
         });
     });
@@ -73,7 +73,7 @@ fn bench_concurrent_10(c: &mut Criterion) {
                     .map(|_| {
                         let merger = Arc::clone(&merger);
                         let key = key.clone();
-                        tokio::spawn(async move { merger.work(&key, || async { "value".to_string() }).await })
+                        tokio::spawn(async move { merger.execute(&key, || async { "value".to_string() }).await })
                     })
                     .collect();
 
@@ -129,7 +129,7 @@ fn bench_concurrent_100(c: &mut Criterion) {
                     .map(|_| {
                         let merger = Arc::clone(&merger);
                         let key = key.clone();
-                        tokio::spawn(async move { merger.work(&key, || async { "value".to_string() }).await })
+                        tokio::spawn(async move { merger.execute(&key, || async { "value".to_string() }).await })
                     })
                     .collect();
 
@@ -187,7 +187,7 @@ fn bench_multiple_keys(c: &mut Criterion) {
                         (0..10).map(move |_| {
                             let merger = Arc::clone(&merger);
                             let key = format!("key_{iteration}_{key_id}");
-                            tokio::spawn(async move { merger.work(&key, || async { "value".to_string() }).await })
+                            tokio::spawn(async move { merger.execute(&key, || async { "value".to_string() }).await })
                         })
                     })
                     .collect();
@@ -244,7 +244,7 @@ fn bench_reuse_group(c: &mut Criterion) {
             async move {
                 // Each iteration uses a unique key to avoid caching effects
                 let key = format!("key_{}", COUNTER5.fetch_add(1, Ordering::Relaxed));
-                merger.work(&key, || async { "value".to_string() }).await
+                merger.execute(&key, || async { "value".to_string() }).await
             }
         });
     });
diff --git a/crates/uniflight/examples/cache_population.rs b/crates/uniflight/examples/cache_population.rs
index 25552025..0e113863 100644
--- a/crates/uniflight/examples/cache_population.rs
+++ b/crates/uniflight/examples/cache_population.rs
@@ -38,7 +38,7 @@ async fn main() {
             let start = clock.instant();
 
             let result = group
-                .work("user:123", || async {
+                .execute("user:123", || async {
                     let count = counter.fetch_add(1, Ordering::SeqCst) + 1;
                     println!("  [Request {i}] I'm the leader! Fetching from database... (execution #{count})");
 
diff --git a/crates/uniflight/src/lib.rs b/crates/uniflight/src/lib.rs
index 5f161f82..84abae5f 100644
--- a/crates/uniflight/src/lib.rs
+++ b/crates/uniflight/src/lib.rs
@@ -1,10 +1,6 @@
 // Copyright (c) Microsoft Corporation.
 // Licensed under the MIT License.
 
-// Based on singleflight-async by ihciah
-// Original: https://github.com/ihciah/singleflight-async
-// Licensed under MIT/Apache-2.0
-
 //! Coalesces duplicate async tasks into a single execution.
 //!
 //! This crate provides [`Merger`], a mechanism for deduplicating concurrent async operations.
@@ -32,7 +28,7 @@
 //!
 //! // Multiple concurrent calls with the same key will share a single execution.
 //! // Note: you can pass &str directly when the key type is String.
-//! let result = group.work("user:123", || async {
+//! let result = group.execute("user:123", || async {
 //!     // This expensive operation runs only once, even if called concurrently
 //!     "expensive_result".to_string()
 //! }).await;
@@ -41,7 +37,7 @@
 //!
 //! # Flexible Key Types
 //!
-//! The [`Merger::work`] method accepts keys using [`Borrow`] semantics, allowing you to pass
+//! The [`Merger::execute`] method accepts keys using [`Borrow`] semantics, allowing you to pass
 //! borrowed forms of the key type. For example, with `Merger<String, T>`, you can pass `&str`
 //! directly without allocating:
 //!
@@ -51,7 +47,7 @@
 //! let merger: Merger<String, i32> = Merger::new();
 //!
 //! // Pass &str directly - no need to call .to_string()
-//! merger.work("my-key", || async { 42 }).await;
+//! merger.execute("my-key", || async { 42 }).await;
 //! # }
 //! ```
 //!
@@ -321,10 +317,10 @@ where
     /// # use uniflight::Merger;
     /// # async fn example() {
     /// let merger: Merger<String, i32> = Merger::new();
-    /// let result = merger.work("my-key", || async { 42 }).await;
+    /// let result = merger.execute("my-key", || async { 42 }).await;
     /// # }
     /// ```
-    pub fn work<Q, F, Fut>(&self, key: &Q, func: F) -> impl Future<Output = T> + Send + use<Q, F, Fut, K, T, S>
+    pub fn execute<Q, F, Fut>(&self, key: &Q, func: F) -> impl Future<Output = T> + Send + use<Q, F, Fut, K, T, S>
     where
         K: Borrow<Q>,
         Q: Hash + Eq + ToOwned<Owned = K> + ?Sized,
@@ -455,14 +451,14 @@ mod tests {
         assert!(group.is_empty());
 
         // Single call should clean up after completion
-        let result = group.work("key1", || async { "Result".to_string() }).await;
+        let result = group.execute("key1", || async { "Result".to_string() }).await;
         assert_eq!(result, "Result");
         assert!(group.is_empty(), "Map should be empty after single call completes");
 
         // Multiple concurrent calls should clean up after all complete
         let futures: Vec<_> = (0..10)
             .map(|_| {
-                group.work("key2", || async {
+                group.execute("key2", || async {
                     tokio::time::sleep(Duration::from_millis(50)).await;
                     "Result".to_string()
                 })
@@ -479,9 +475,9 @@ mod tests {
         assert!(group.is_empty(), "Map should be empty after all concurrent calls complete");
 
         // Multiple different keys should all be cleaned up
-        let fut1 = group.work("a", || async { "A".to_string() });
-        let fut2 = group.work("b", || async { "B".to_string() });
-        let fut3 = group.work("c", || async { "C".to_string() });
+        let fut1 = group.execute("a", || async { "A".to_string() });
+        let fut2 = group.execute("b", || async { "B".to_string() });
+        let fut3 = group.execute("c", || async { "C".to_string() });
 
         assert_eq!(group.len(), 3);
 
diff --git a/crates/uniflight/tests/work.rs b/crates/uniflight/tests/work.rs
index f36f89d4..47231d20 100644
--- a/crates/uniflight/tests/work.rs
+++ b/crates/uniflight/tests/work.rs
@@ -1,7 +1,7 @@
 // Copyright (c) Microsoft Corporation.
 // Licensed under the MIT License.
 
-//! Integration tests for [`Merger::work()`].
+//! Integration tests for [`Merger::execute()`].
 
 use std::{
     sync::{
@@ -25,7 +25,7 @@ fn unreachable_future() -> std::future::Pending<String> {
 async fn direct_call() {
     let group = Merger::<String, String, _>::new_per_process();
     let result = group
-        .work("key", || async {
+        .execute("key", || async {
             tokio::time::sleep(Duration::from_millis(10)).await;
             "Result".to_string()
         })
@@ -40,7 +40,7 @@ async fn parallel_call() {
     let group = Merger::<String, String, _>::new_per_process();
     let futures = FuturesUnordered::new();
     for _ in 0..10 {
-        futures.push(group.work("key", || async {
+        futures.push(group.execute("key", || async {
             tokio::time::sleep(Duration::from_millis(100)).await;
             call_counter.fetch_add(1, AcqRel);
             "Result".to_string()
@@ -58,7 +58,7 @@ async fn parallel_call_seq_await() {
     let group = Merger::<String, String, _>::new_per_process();
     let mut futures = Vec::new();
     for _ in 0..10 {
-        futures.push(group.work("key", || async {
+        futures.push(group.execute("key", || async {
             tokio::time::sleep(Duration::from_millis(100)).await;
             call_counter.fetch_add(1, AcqRel);
             "Result".to_string()
@@ -75,7 +75,7 @@ async fn parallel_call_seq_await() {
 async fn call_with_static_str_key() {
     let group = Merger::<String, String, _>::new_per_process();
     let result = group
-        .work("key", || async {
+        .execute("key", || async {
             tokio::time::sleep(Duration::from_millis(1)).await;
             "Result".to_string()
         })
@@ -87,7 +87,7 @@ async fn call_with_static_str_key() {
 async fn call_with_static_string_key() {
     let group = Merger::<String, String, _>::new_per_process();
     let result = group
-        .work("key", || async {
+        .execute("key", || async {
             tokio::time::sleep(Duration::from_millis(1)).await;
             "Result".to_string()
         })
@@ -101,7 +101,7 @@ async fn call_with_custom_key() {
     struct K(i32);
     let group = Merger::<K, String, _>::new_per_process();
     let result = group
-        .work(&K(1), || async {
+        .execute(&K(1), || async {
             tokio::time::sleep(Duration::from_millis(1)).await;
             "Result".to_string()
         })
@@ -112,11 +112,11 @@ async fn call_with_custom_key() {
 #[tokio::test]
 async fn late_wait() {
     let group = Merger::<String, String, _>::new_per_process();
-    let fut_early = group.work("key", || async {
+    let fut_early = group.execute("key", || async {
         tokio::time::sleep(Duration::from_millis(20)).await;
         "Result".to_string()
     });
-    let fut_late = group.work("key", unreachable_future);
+    let fut_late = group.execute("key", unreachable_future);
     assert_eq!(fut_early.await, "Result");
     tokio::time::sleep(Duration::from_millis(50)).await;
     assert_eq!(fut_late.await, "Result");
@@ -127,18 +127,18 @@ async fn cancel() {
     let group = Merger::<String, String, _>::new_per_process();
 
     // The executor was cancelled; the other awaiter will create a new future and execute.
-    let fut_cancel = group.work(&"key".to_string(), unreachable_future);
+    let fut_cancel = group.execute(&"key".to_string(), unreachable_future);
     let _ = tokio::time::timeout(Duration::from_millis(10), fut_cancel).await;
-    let fut_late = group.work("key", || async { "Result2".to_string() });
+    let fut_late = group.execute("key", || async { "Result2".to_string() });
     assert_eq!(fut_late.await, "Result2");
 
     // the first executer is slow but not dropped, so the result will be the first ones.
     let begin = tokio::time::Instant::now();
-    let fut_1 = group.work("key", || async {
+    let fut_1 = group.execute("key", || async {
         tokio::time::sleep(Duration::from_millis(2000)).await;
         "Result1".to_string()
     });
-    let fut_2 = group.work(&"key".to_string(), unreachable_future);
+    let fut_2 = group.execute(&"key".to_string(), unreachable_future);
     let (v1, v2) = tokio::join!(fut_1, fut_2);
     assert_eq!(v1, "Result1");
     assert_eq!(v2, "Result1");
@@ -154,7 +154,7 @@ async fn leader_panic_in_spawned_task() {
     let group_clone = Arc::clone(&group);
     let handle = tokio::spawn(async move {
         group_clone
-            .work("key", || async {
+            .execute("key", || async {
                 tokio::time::sleep(Duration::from_millis(50)).await;
                 panic!("leader panicked in spawned task");
                 #[expect(unreachable_code, reason = "Required to satisfy return type after panic")]
@@ -169,7 +169,7 @@ async fn leader_panic_in_spawned_task() {
     // Second task should become the new leader after the first panics
     let group_clone = Arc::clone(&group);
     let call_counter_ref = &call_counter;
-    let fut_follower = group_clone.work("key", || async {
+    let fut_follower = group_clone.execute("key", || async {
         call_counter_ref.fetch_add(1, AcqRel);
         "Result".to_string()
     });
@@ -192,7 +192,7 @@ async fn debug_impl() {
     assert!(debug_str.contains("Merger"));
 
     // Create a pending work item to populate the mapping
-    let fut = group.work("key", || async {
+    let fut = group.execute("key", || async {
         tokio::time::sleep(Duration::from_millis(100)).await;
         "Result".to_string()
     });
@@ -210,21 +210,21 @@ async fn debug_impl() {
 #[tokio::test]
 async fn per_process_strategy() {
     let group = Merger::<String, String, _>::new_per_process();
-    let result = group.work("key", || async { "Result".to_string() }).await;
+    let result = group.execute("key", || async { "Result".to_string() }).await;
     assert_eq!(result, "Result");
 }
 
 #[tokio::test]
 async fn per_numa_strategy() {
     let group = Merger::<String, String, _>::new_per_numa();
-    let result = group.work("key", || async { "Result".to_string() }).await;
+    let result = group.execute("key", || async { "Result".to_string() }).await;
     assert_eq!(result, "Result");
 }
 
 #[tokio::test]
 async fn per_core_strategy() {
     let group = Merger::<String, String, _>::new_per_core();
-    let result = group.work("key", || async { "Result".to_string() }).await;
+    let result = group.execute("key", || async { "Result".to_string() }).await;
     assert_eq!(result, "Result");
 }
 
@@ -236,14 +236,14 @@ async fn clone_shares_state() {
     let call_counter = AtomicUsize::default();
 
     // Start work on clone 1
-    let fut1 = group1.work("key", || async {
+    let fut1 = group1.execute("key", || async {
         tokio::time::sleep(Duration::from_millis(50)).await;
         call_counter.fetch_add(1, AcqRel);
         "Result".to_string()
     });
 
     // Clone 2 should join the same work
-    let fut2 = group2.work("key", || async {
+    let fut2 = group2.execute("key", || async {
         call_counter.fetch_add(1, AcqRel);
         "Unreachable".to_string()
     });

From 61f8ce641ecf1840df917d497da1536bc50f7b30 Mon Sep 17 00:00:00 2001
From: Schuyler Goodman <schgoo@microsoft.com>
Date: Fri, 16 Jan 2026 11:44:09 -0500
Subject: [PATCH 27/33] better logo

---
 crates/uniflight/favicon.ico | 2 +-
 crates/uniflight/logo.png    | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/crates/uniflight/favicon.ico b/crates/uniflight/favicon.ico
index 2ed275b8..f1a3f34c 100644
--- a/crates/uniflight/favicon.ico
+++ b/crates/uniflight/favicon.ico
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bd1ebeca79834229253d38008b7d0491947042e808664646f907472acbb9ba01
+oid sha256:ed616c6fc5b1c95300147e226f28d1b88f193babdf3bb1669422a93f55339304
 size 15406
diff --git a/crates/uniflight/logo.png b/crates/uniflight/logo.png
index 90a48880..1562ae9f 100644
--- a/crates/uniflight/logo.png
+++ b/crates/uniflight/logo.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ec6fa0a1a07f8c21fb9946bc8ea78f127f027554fd8ca67f0b296d8c3d234b68
-size 46554
+oid sha256:6bf624b54edbaeb8bf0d961e895a86e09b18502fe6c761d00748317883dd09b8
+size 62560

From dd104eafa15391411afb134c076a1e91bb68e8ee Mon Sep 17 00:00:00 2001
From: Schuyler Goodman <schgoo@microsoft.com>
Date: Tue, 20 Jan 2026 12:23:01 -0500
Subject: [PATCH 28/33] Update readme

---
 crates/uniflight/README.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/crates/uniflight/README.md b/crates/uniflight/README.md
index 20704ab9..a54a2a09 100644
--- a/crates/uniflight/README.md
+++ b/crates/uniflight/README.md
@@ -114,12 +114,12 @@ perform similarly (sub-microsecond).
 This crate was developed as part of <a href="../..">The Oxidizer Project</a>. Browse this crate's <a href="https://github.com/microsoft/oxidizer/tree/main/crates/uniflight">source code</a>.
 </sub>
 
- [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEGy4k8ldDFPOhG2VNeXtD5nnKG6EPY6OfW5wBG8g18NOFNdxpYXKEG04a0viCY92EG7dKIcfT3ii0G7aPoi1gqOy7Gxzt8DWyGmp1YWSCgmx0aHJlYWRfYXdhcmVlMC42LjCCaXVuaWZsaWdodGUwLjEuMA
+ [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEGy4k8ldDFPOhG2VNeXtD5nnKG6EPY6OfW5wBG8g18NOFNdxpYXKEG04a0viCY92EG7dKIcfT3ii0G7aPoi1gqOy7Gxzt8DWyGmp1YWSCgmx0aHJlYWRfYXdhcmVlMC42LjGCaXVuaWZsaWdodGUwLjEuMA
  [__link0]: https://docs.rs/uniflight/0.1.0/uniflight/struct.Merger.html
  [__link1]: https://docs.rs/uniflight/0.1.0/uniflight/?search=Merger::execute
  [__link2]: https://doc.rust-lang.org/stable/std/?search=borrow::Borrow
- [__link3]: https://docs.rs/thread_aware/0.6.0/thread_aware/?search=storage::Strategy
- [__link4]: https://docs.rs/thread_aware/0.6.0/thread_aware/?search=PerProcess
- [__link5]: https://docs.rs/thread_aware/0.6.0/thread_aware/?search=PerNuma
- [__link6]: https://docs.rs/thread_aware/0.6.0/thread_aware/?search=PerCore
+ [__link3]: https://docs.rs/thread_aware/0.6.1/thread_aware/?search=storage::Strategy
+ [__link4]: https://docs.rs/thread_aware/0.6.1/thread_aware/?search=PerProcess
+ [__link5]: https://docs.rs/thread_aware/0.6.1/thread_aware/?search=PerNuma
+ [__link6]: https://docs.rs/thread_aware/0.6.1/thread_aware/?search=PerCore
  [__link7]: https://docs.rs/uniflight/0.1.0/uniflight/struct.Merger.html

From 53457529356a9fbd82bd2f9a1b0edd0c4b3a9afd Mon Sep 17 00:00:00 2001
From: Schuyler Goodman <schgoo@microsoft.com>
Date: Wed, 21 Jan 2026 12:15:54 -0500
Subject: [PATCH 29/33] Undo ToC changes to README.md

---
 README.md | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index b5997021..64503649 100644
--- a/README.md
+++ b/README.md
@@ -11,9 +11,8 @@
 
 This repository contains a set of crates that help you build robust highly scalable services in Rust.
 
-- [The Oxidizer Project](#the-oxidizer-project)
-  - [Crates](#crates)
-  - [About this Repo](#about-this-repo)
+- [Crates](#crates)
+- [About this Repo](#about-this-repo)
     - [Adding New Crates](#adding-new-crates)
     - [Publishing Crates](#publishing-crates)
     - [Documenting Crates](#documenting-crates)

From ab92d59c3e678a8a8e27eb48ee4ccafd609eb77a Mon Sep 17 00:00:00 2001
From: Schuyler Goodman <schgoo@microsoft.com>
Date: Thu, 22 Jan 2026 10:19:51 -0500
Subject: [PATCH 30/33] Improve benchmarks for incremental development. Remove
 thread_aware exports

---
 Cargo.lock                              |  12 --
 Cargo.toml                              |   1 -
 crates/uniflight/Cargo.toml             |  12 +-
 crates/uniflight/README.md              |  20 +-
 crates/uniflight/benches/comparison.rs  | 276 ------------------------
 crates/uniflight/benches/performance.rs | 112 ++++++++++
 crates/uniflight/src/lib.rs             |  30 +--
 7 files changed, 131 insertions(+), 332 deletions(-)
 delete mode 100644 crates/uniflight/benches/comparison.rs
 create mode 100644 crates/uniflight/benches/performance.rs

diff --git a/Cargo.lock b/Cargo.lock
index ca610fdf..8b096dde 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1438,16 +1438,6 @@ version = "2.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa"
 
-[[package]]
-name = "singleflight-async"
-version = "0.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ffae0d841b8012a86beec66a3f9c57b7b331a10366c764cd40bd6faebe3ad77c"
-dependencies = [
- "parking_lot",
- "tokio",
-]
-
 [[package]]
 name = "siphasher"
 version = "1.0.1"
@@ -1710,7 +1700,6 @@ version = "1.49.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "72a2903cd7736441aac9df9d7688bd0ce48edccaadf181c3b90be801e81d3d86"
 dependencies = [
- "parking_lot",
  "pin-project-lite",
  "tokio-macros",
 ]
@@ -1896,7 +1885,6 @@ dependencies = [
  "dashmap",
  "futures-util",
  "mutants",
- "singleflight-async",
  "thread_aware",
  "tick",
  "tokio",
diff --git a/Cargo.toml b/Cargo.toml
index 32927709..0e4fbf4e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -82,7 +82,6 @@ rustc-hash = { version = "2.1.0", default-features = false }
 serde = { version = "1.0.228", default-features = false }
 serde_core = { version = "1.0.228", default-features = false }
 serde_json = { version = "1.0.145", default-features = false }
-singleflight-async = { version = "0.2", default-features = false }
 smallvec = { version = "1.15.1", default-features = false }
 static_assertions = { version = "1.1.0", default-features = false }
 syn = { version = "2.0.111", default-features = false }
diff --git a/crates/uniflight/Cargo.toml b/crates/uniflight/Cargo.toml
index 6df4e2da..cb3d8ab4 100644
--- a/crates/uniflight/Cargo.toml
+++ b/crates/uniflight/Cargo.toml
@@ -16,15 +16,6 @@ license.workspace = true
 homepage.workspace = true
 repository.workspace = true
 
-[package.metadata.cargo_check_external_types]
-allowed_external_types = [
-    "thread_aware::cell::builtin::PerCore",
-    "thread_aware::cell::builtin::PerNuma",
-    "thread_aware::cell::builtin::PerProcess",
-    "thread_aware::cell::storage::Strategy",
-    "thread_aware::core::ThreadAware",
-]
-
 [dependencies]
 async-once-cell.workspace = true
 dashmap.workspace = true
@@ -34,7 +25,6 @@ thread_aware.workspace = true
 criterion = { workspace = true, features = ["async_tokio"] }
 futures-util = { workspace = true, features = ["alloc", "std"] }
 mutants.workspace = true
-singleflight-async.workspace = true
 tick = { workspace = true, features = ["tokio"] }
 tokio = { workspace = true, features = [
     "macros",
@@ -47,7 +37,7 @@ tokio = { workspace = true, features = [
 workspace = true
 
 [[bench]]
-name = "comparison"
+name = "performance"
 harness = false
 
 [[example]]
diff --git a/crates/uniflight/README.md b/crates/uniflight/README.md
index a54a2a09..b7e0b6c9 100644
--- a/crates/uniflight/README.md
+++ b/crates/uniflight/README.md
@@ -94,19 +94,13 @@ are `Send` when the closure, future, key, and value types are `Send`.
 
 ## Performance
 
-Benchmarks comparing `uniflight` against `singleflight-async`:
-
-|Benchmark|uniflight|singleflight-async|Winner|
-|---------|---------|------------------|------|
-|Single call|777 ns|691 ns|~equal|
-|10 concurrent tasks|58 µs|57 µs|~equal|
-|100 concurrent tasks|218 µs|219 µs|~equal|
-|10 keys × 10 tasks|186 µs|270 µs|uniflight 1.4x|
-|Sequential reuse|799 ns|759 ns|~equal|
-
-uniflight’s `DashMap`-based architecture scales well under contention, making it
-well-suited for high-concurrency workloads. For single-call scenarios, both libraries
-perform similarly (sub-microsecond).
+Run benchmarks with `cargo bench -p uniflight`. The suite covers:
+
+- **single_call**: Baseline latency with no contention
+- **high_contention_100**: 100 concurrent tasks on the same key
+- **distributed_10x10**: 10 keys with 10 tasks each
+
+Use `--save-baseline` and `--baseline` flags to track regressions over time.
 
 
 <hr/>
diff --git a/crates/uniflight/benches/comparison.rs b/crates/uniflight/benches/comparison.rs
deleted file mode 100644
index 2bdd7f6d..00000000
--- a/crates/uniflight/benches/comparison.rs
+++ /dev/null
@@ -1,276 +0,0 @@
-// Copyright (c) Microsoft Corporation.
-// Licensed under the MIT License.
-
-//! Benchmarks comparing uniflight against singleflight-async.
-
-#![allow(
-    clippy::items_after_statements,
-    clippy::unwrap_used,
-    missing_docs,
-    reason = "Benchmarks have relaxed requirements"
-)]
-
-use std::sync::{
-    Arc,
-    atomic::{AtomicU64, Ordering},
-};
-
-use criterion::{Criterion, criterion_group, criterion_main};
-
-// Benchmark 1: Single call (no contention)
-fn bench_single_call(c: &mut Criterion) {
-    let rt = tokio::runtime::Runtime::new().unwrap();
-
-    let mut group = c.benchmark_group("single_call");
-
-    // Use atomic counter for unique keys
-    static COUNTER1: AtomicU64 = AtomicU64::new(0);
-
-    // Our implementation - pre-create the merger
-    let our_merger = Arc::new(uniflight::Merger::<String, String>::new());
-    group.bench_function("uniflight", |b| {
-        b.to_async(&rt).iter(|| {
-            let merger = Arc::clone(&our_merger);
-            async move {
-                let key = format!("key_{}", COUNTER1.fetch_add(1, Ordering::Relaxed));
-                merger.execute(&key, || async { "value".to_string() }).await
-            }
-        });
-    });
-
-    // singleflight-async - pre-create the group
-    let their_group = Arc::new(singleflight_async::SingleFlight::<String, String>::new());
-    group.bench_function("singleflight_async", |b| {
-        b.to_async(&rt).iter(|| {
-            let group = Arc::clone(&their_group);
-            async move {
-                let key = format!("key_{}", COUNTER1.fetch_add(1, Ordering::Relaxed));
-                group.work(key, || async { "value".to_string() }).await
-            }
-        });
-    });
-
-    group.finish();
-}
-
-// Benchmark 2: Concurrent calls (10 tasks, same key)
-fn bench_concurrent_10(c: &mut Criterion) {
-    let rt = tokio::runtime::Runtime::new().unwrap();
-
-    let mut group = c.benchmark_group("concurrent_10_tasks");
-
-    // Use atomic counter for unique keys per iteration
-    static COUNTER2: AtomicU64 = AtomicU64::new(0);
-
-    // Our implementation - pre-create the merger
-    let our_merger = Arc::new(uniflight::Merger::<String, String>::new());
-    group.bench_function("uniflight", |b| {
-        b.to_async(&rt).iter(|| {
-            let merger = Arc::clone(&our_merger);
-            async move {
-                let key = format!("key_{}", COUNTER2.fetch_add(1, Ordering::Relaxed));
-                let handles: Vec<_> = (0..10)
-                    .map(|_| {
-                        let merger = Arc::clone(&merger);
-                        let key = key.clone();
-                        tokio::spawn(async move { merger.execute(&key, || async { "value".to_string() }).await })
-                    })
-                    .collect();
-
-                for handle in handles {
-                    handle.await.unwrap();
-                }
-            }
-        });
-    });
-
-    // singleflight-async - pre-create the group
-    let their_group = Arc::new(singleflight_async::SingleFlight::<String, String>::new());
-    group.bench_function("singleflight_async", |b| {
-        b.to_async(&rt).iter(|| {
-            let group = Arc::clone(&their_group);
-            async move {
-                let key = format!("key_{}", COUNTER2.fetch_add(1, Ordering::Relaxed));
-                let handles: Vec<_> = (0..10)
-                    .map(|_| {
-                        let group = Arc::clone(&group);
-                        let key = key.clone();
-                        tokio::spawn(async move { group.work(key, || async { "value".to_string() }).await })
-                    })
-                    .collect();
-
-                for handle in handles {
-                    handle.await.unwrap();
-                }
-            }
-        });
-    });
-
-    group.finish();
-}
-
-// Benchmark 3: High contention (100 tasks, same key)
-fn bench_concurrent_100(c: &mut Criterion) {
-    let rt = tokio::runtime::Runtime::new().unwrap();
-
-    let mut group = c.benchmark_group("concurrent_100_tasks");
-
-    // Use atomic counter for unique keys per iteration
-    static COUNTER3: AtomicU64 = AtomicU64::new(0);
-
-    // Our implementation - pre-create the merger
-    let our_merger = Arc::new(uniflight::Merger::<String, String>::new());
-    group.bench_function("uniflight", |b| {
-        b.to_async(&rt).iter(|| {
-            let merger = Arc::clone(&our_merger);
-            async move {
-                let key = format!("key_{}", COUNTER3.fetch_add(1, Ordering::Relaxed));
-                let handles: Vec<_> = (0..100)
-                    .map(|_| {
-                        let merger = Arc::clone(&merger);
-                        let key = key.clone();
-                        tokio::spawn(async move { merger.execute(&key, || async { "value".to_string() }).await })
-                    })
-                    .collect();
-
-                for handle in handles {
-                    handle.await.unwrap();
-                }
-            }
-        });
-    });
-
-    // singleflight-async - pre-create the group
-    let their_group = Arc::new(singleflight_async::SingleFlight::<String, String>::new());
-    group.bench_function("singleflight_async", |b| {
-        b.to_async(&rt).iter(|| {
-            let group = Arc::clone(&their_group);
-            async move {
-                let key = format!("key_{}", COUNTER3.fetch_add(1, Ordering::Relaxed));
-                let handles: Vec<_> = (0..100)
-                    .map(|_| {
-                        let group = Arc::clone(&group);
-                        let key = key.clone();
-                        tokio::spawn(async move { group.work(key, || async { "value".to_string() }).await })
-                    })
-                    .collect();
-
-                for handle in handles {
-                    handle.await.unwrap();
-                }
-            }
-        });
-    });
-
-    group.finish();
-}
-
-// Benchmark 4: Multiple different keys (10 keys, 10 tasks each)
-fn bench_multiple_keys(c: &mut Criterion) {
-    let rt = tokio::runtime::Runtime::new().unwrap();
-
-    let mut group = c.benchmark_group("multiple_keys_10x10");
-
-    // Use atomic counter for unique key prefix per iteration
-    static COUNTER4: AtomicU64 = AtomicU64::new(0);
-
-    // Our implementation - pre-create the merger
-    let our_merger = Arc::new(uniflight::Merger::<String, String>::new());
-    group.bench_function("uniflight", |b| {
-        b.to_async(&rt).iter(|| {
-            let merger = Arc::clone(&our_merger);
-            async move {
-                let iteration = COUNTER4.fetch_add(1, Ordering::Relaxed);
-                let handles: Vec<_> = (0..10)
-                    .flat_map(|key_id| {
-                        let merger = Arc::clone(&merger);
-                        (0..10).map(move |_| {
-                            let merger = Arc::clone(&merger);
-                            let key = format!("key_{iteration}_{key_id}");
-                            tokio::spawn(async move { merger.execute(&key, || async { "value".to_string() }).await })
-                        })
-                    })
-                    .collect();
-
-                for handle in handles {
-                    handle.await.unwrap();
-                }
-            }
-        });
-    });
-
-    // singleflight-async - pre-create the group
-    let their_group = Arc::new(singleflight_async::SingleFlight::<String, String>::new());
-    group.bench_function("singleflight_async", |b| {
-        b.to_async(&rt).iter(|| {
-            let group = Arc::clone(&their_group);
-            async move {
-                let iteration = COUNTER4.fetch_add(1, Ordering::Relaxed);
-                let handles: Vec<_> = (0..10)
-                    .flat_map(|key_id| {
-                        let group = Arc::clone(&group);
-                        (0..10).map(move |_| {
-                            let group = Arc::clone(&group);
-                            let key = format!("key_{iteration}_{key_id}");
-                            tokio::spawn(async move { group.work(key, || async { "value".to_string() }).await })
-                        })
-                    })
-                    .collect();
-
-                for handle in handles {
-                    handle.await.unwrap();
-                }
-            }
-        });
-    });
-
-    group.finish();
-}
-
-// Benchmark 5: Reuse existing group (pre-created, multiple operations)
-fn bench_reuse_group(c: &mut Criterion) {
-    let rt = tokio::runtime::Runtime::new().unwrap();
-
-    let mut group = c.benchmark_group("reuse_group");
-
-    // Use atomic counter for unique keys
-    static COUNTER5: AtomicU64 = AtomicU64::new(0);
-
-    // Our implementation - pre-create the merger
-    let our_merger = Arc::new(uniflight::Merger::<String, String>::new());
-    group.bench_function("uniflight", |b| {
-        b.to_async(&rt).iter(|| {
-            let merger = Arc::clone(&our_merger);
-            async move {
-                // Each iteration uses a unique key to avoid caching effects
-                let key = format!("key_{}", COUNTER5.fetch_add(1, Ordering::Relaxed));
-                merger.execute(&key, || async { "value".to_string() }).await
-            }
-        });
-    });
-
-    // singleflight-async - pre-create the group
-    let their_group = Arc::new(singleflight_async::SingleFlight::<String, String>::new());
-    group.bench_function("singleflight_async", |b| {
-        b.to_async(&rt).iter(|| {
-            let group = Arc::clone(&their_group);
-            async move {
-                let key = format!("key_{}", COUNTER5.fetch_add(1, Ordering::Relaxed));
-                group.work(key, || async { "value".to_string() }).await
-            }
-        });
-    });
-
-    group.finish();
-}
-
-criterion_group!(
-    benches,
-    bench_single_call,
-    bench_concurrent_10,
-    bench_concurrent_100,
-    bench_multiple_keys,
-    bench_reuse_group,
-);
-
-criterion_main!(benches);
diff --git a/crates/uniflight/benches/performance.rs b/crates/uniflight/benches/performance.rs
new file mode 100644
index 00000000..6e14ff31
--- /dev/null
+++ b/crates/uniflight/benches/performance.rs
@@ -0,0 +1,112 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+//! Performance benchmarks for uniflight.
+//!
+//! Run with: cargo bench -p uniflight
+//! Save baseline: cargo bench -p uniflight -- --save-baseline main
+//! Compare to baseline: cargo bench -p uniflight -- --baseline main
+
+#![allow(missing_docs)]
+
+use std::sync::{
+    Arc,
+    atomic::{AtomicU64, Ordering},
+};
+
+use criterion::{Criterion, criterion_group, criterion_main};
+use uniflight::Merger;
+
+static KEY_COUNTER: AtomicU64 = AtomicU64::new(0);
+
+fn unique_key() -> String {
+    format!("key_{}", KEY_COUNTER.fetch_add(1, Ordering::Relaxed))
+}
+
+/// Baseline: single call, no contention.
+/// This measures the fixed overhead of the merger.
+fn bench_single_call(c: &mut Criterion) {
+    let rt = tokio::runtime::Runtime::new().unwrap();
+    let merger = Arc::new(Merger::<String, String>::new());
+
+    c.bench_function("single_call", |b| {
+        b.to_async(&rt).iter(|| {
+            let merger = Arc::clone(&merger);
+            async move {
+                merger
+                    .execute(&unique_key(), || async { "value".to_string() })
+                    .await
+            }
+        });
+    });
+}
+
+/// Stress test: 100 concurrent tasks on the same key.
+/// This hammers the synchronization primitives.
+fn bench_high_contention(c: &mut Criterion) {
+    let rt = tokio::runtime::Runtime::new().unwrap();
+    let merger = Arc::new(Merger::<String, String>::new());
+
+    c.bench_function("high_contention_100", |b| {
+        b.to_async(&rt).iter(|| {
+            let merger = Arc::clone(&merger);
+            async move {
+                let key = unique_key();
+                let tasks: Vec<_> = (0..100)
+                    .map(|_| {
+                        let merger = Arc::clone(&merger);
+                        let key = key.clone();
+                        tokio::spawn(async move {
+                            merger.execute(&key, || async { "value".to_string() }).await
+                        })
+                    })
+                    .collect();
+
+                for task in tasks {
+                    task.await.unwrap();
+                }
+            }
+        });
+    });
+}
+
+/// Distributed load: 10 keys with 10 concurrent tasks each.
+/// This exercises the hash map under concurrent access.
+fn bench_distributed_keys(c: &mut Criterion) {
+    let rt = tokio::runtime::Runtime::new().unwrap();
+    let merger = Arc::new(Merger::<String, String>::new());
+
+    c.bench_function("distributed_10x10", |b| {
+        b.to_async(&rt).iter(|| {
+            let merger = Arc::clone(&merger);
+            async move {
+                let prefix = KEY_COUNTER.fetch_add(1, Ordering::Relaxed);
+                let tasks: Vec<_> = (0..10)
+                    .flat_map(|key_id| {
+                        let merger = Arc::clone(&merger);
+                        (0..10).map(move |_| {
+                            let merger = Arc::clone(&merger);
+                            let key = format!("key_{prefix}_{key_id}");
+                            tokio::spawn(async move {
+                                merger.execute(&key, || async { "value".to_string() }).await
+                            })
+                        })
+                    })
+                    .collect();
+
+                for task in tasks {
+                    task.await.unwrap();
+                }
+            }
+        });
+    });
+}
+
+criterion_group!(
+    benches,
+    bench_single_call,
+    bench_high_contention,
+    bench_distributed_keys,
+);
+
+criterion_main!(benches);
diff --git a/crates/uniflight/src/lib.rs b/crates/uniflight/src/lib.rs
index 84abae5f..7ccc6ec7 100644
--- a/crates/uniflight/src/lib.rs
+++ b/crates/uniflight/src/lib.rs
@@ -61,7 +61,8 @@
 //! - [`PerCore`]: Separate state per core, no deduplication (useful for already-partitioned work)
 //!
 //! ```
-//! use uniflight::{Merger, PerNuma};
+//! use uniflight::Merger;
+//! use thread_aware::PerNuma;
 //!
 //! # async fn example() {
 //! // NUMA-aware merger - each NUMA node gets its own deduplication scope
@@ -89,19 +90,13 @@
 //!
 //! # Performance
 //!
-//! Benchmarks comparing `uniflight` against `singleflight-async`:
+//! Run benchmarks with `cargo bench -p uniflight`. The suite covers:
 //!
-//! | Benchmark | uniflight | singleflight-async | Winner |
-//! |-----------|-----------|-------------------|--------|
-//! | Single call | 777 ns | 691 ns | ~equal |
-//! | 10 concurrent tasks | 58 µs | 57 µs | ~equal |
-//! | 100 concurrent tasks | 218 µs | 219 µs | ~equal |
-//! | 10 keys × 10 tasks | 186 µs | 270 µs | uniflight 1.4x |
-//! | Sequential reuse | 799 ns | 759 ns | ~equal |
+//! - **single_call**: Baseline latency with no contention
+//! - **high_contention_100**: 100 concurrent tasks on the same key
+//! - **distributed_10x10**: 10 keys with 10 tasks each
 //!
-//! uniflight's `DashMap`-based architecture scales well under contention, making it
-//! well-suited for high-concurrency workloads. For single-call scenarios, both libraries
-//! perform similarly (sub-microsecond).
+//! Use `--save-baseline` and `--baseline` flags to track regressions over time.
 
 #![doc(html_logo_url = "https://media.githubusercontent.com/media/microsoft/oxidizer/refs/heads/main/crates/uniflight/logo.png")]
 #![doc(html_favicon_url = "https://media.githubusercontent.com/media/microsoft/oxidizer/refs/heads/main/crates/uniflight/favicon.ico")]
@@ -119,16 +114,12 @@ use dashmap::{
     Entry::{Occupied, Vacant},
 };
 use thread_aware::{
-    Arc as TaArc, ThreadAware,
+    Arc as TaArc, PerCore, PerNuma, PerProcess, ThreadAware,
     affinity::{MemoryAffinity, PinnedAffinity},
     storage::Strategy,
 };
 
-// Re-export strategies for convenience
-pub use thread_aware::{PerCore, PerNuma, PerProcess};
-
-/// Represents a class of work and creates a space in which units of work
-/// can be executed with duplicate suppression.
+/// Suppresses duplicate async operations identified by a key.
 ///
 /// The `S` type parameter controls the thread-aware scoping strategy:
 /// - [`PerProcess`]: Single global scope (default, maximum deduplication)
@@ -179,7 +170,8 @@ where
     /// # Examples
     ///
     /// ```
-    /// use uniflight::{Merger, PerNuma, PerCore};
+    /// use uniflight::Merger;
+    /// use thread_aware::{PerNuma, PerCore};
     ///
     /// // Default (PerProcess) - type can be inferred
     /// let global: Merger<String, String> = Merger::new();

From f19b67436e8569ff0b353a1716d445f9b583bc13 Mon Sep 17 00:00:00 2001
From: Schuyler Goodman <schgoo@microsoft.com>
Date: Thu, 22 Jan 2026 10:20:03 -0500
Subject: [PATCH 31/33] Update readme

---
 crates/uniflight/README.md | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/crates/uniflight/README.md b/crates/uniflight/README.md
index b7e0b6c9..24829c1c 100644
--- a/crates/uniflight/README.md
+++ b/crates/uniflight/README.md
@@ -68,7 +68,8 @@ type parameter. This controls how the internal state is partitioned across threa
 * [`PerCore`][__link6]: Separate state per core, no deduplication (useful for already-partitioned work)
 
 ```rust
-use uniflight::{Merger, PerNuma};
+use uniflight::Merger;
+use thread_aware::PerNuma;
 
 // NUMA-aware merger - each NUMA node gets its own deduplication scope
 let merger: Merger<String, String, PerNuma> = Merger::new_per_numa();
@@ -96,9 +97,9 @@ are `Send` when the closure, future, key, and value types are `Send`.
 
 Run benchmarks with `cargo bench -p uniflight`. The suite covers:
 
-- **single_call**: Baseline latency with no contention
-- **high_contention_100**: 100 concurrent tasks on the same key
-- **distributed_10x10**: 10 keys with 10 tasks each
+* **single_call**: Baseline latency with no contention
+* **high_contention_100**: 100 concurrent tasks on the same key
+* **distributed_10x10**: 10 keys with 10 tasks each
 
 Use `--save-baseline` and `--baseline` flags to track regressions over time.
 
@@ -108,7 +109,7 @@ Use `--save-baseline` and `--baseline` flags to track regressions over time.
 This crate was developed as part of <a href="../..">The Oxidizer Project</a>. Browse this crate's <a href="https://github.com/microsoft/oxidizer/tree/main/crates/uniflight">source code</a>.
 </sub>
 
- [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEGy4k8ldDFPOhG2VNeXtD5nnKG6EPY6OfW5wBG8g18NOFNdxpYXKEG04a0viCY92EG7dKIcfT3ii0G7aPoi1gqOy7Gxzt8DWyGmp1YWSCgmx0aHJlYWRfYXdhcmVlMC42LjGCaXVuaWZsaWdodGUwLjEuMA
+ [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEGy4k8ldDFPOhG2VNeXtD5nnKG6EPY6OfW5wBG8g18NOFNdxpYXKEG7377t2ZpTeEGzAOjWIcwZYsG8xBz1ZJEGjgG3_fpjW3KImqYWSCgmx0aHJlYWRfYXdhcmVlMC42LjGCaXVuaWZsaWdodGUwLjEuMA
  [__link0]: https://docs.rs/uniflight/0.1.0/uniflight/struct.Merger.html
  [__link1]: https://docs.rs/uniflight/0.1.0/uniflight/?search=Merger::execute
  [__link2]: https://doc.rust-lang.org/stable/std/?search=borrow::Borrow

From 168acbbdab31d65974e1462745db359966b8663f Mon Sep 17 00:00:00 2001
From: Schuyler Goodman <schgoo@microsoft.com>
Date: Thu, 22 Jan 2026 12:50:08 -0500
Subject: [PATCH 32/33] Add thread aware types to external type check

---
 crates/uniflight/Cargo.toml | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/crates/uniflight/Cargo.toml b/crates/uniflight/Cargo.toml
index cb3d8ab4..29773101 100644
--- a/crates/uniflight/Cargo.toml
+++ b/crates/uniflight/Cargo.toml
@@ -16,6 +16,13 @@ license.workspace = true
 homepage.workspace = true
 repository.workspace = true
 
+[package.metadata.cargo_check_external_types]
+allowed_external_types = [
+    "thread_aware::cell::builtin::PerProcess",
+    "thread_aware::cell::storage::Strategy",
+    "thread_aware::core::ThreadAware",
+]
+
 [dependencies]
 async-once-cell.workspace = true
 dashmap.workspace = true

From 3792000229031729ef5c5ca45d1f48bfef2b9991 Mon Sep 17 00:00:00 2001
From: Schuyler Goodman <schgoo@microsoft.com>
Date: Thu, 22 Jan 2026 12:55:01 -0500
Subject: [PATCH 33/33] Clippy, fmt

---
 crates/uniflight/benches/performance.rs | 21 ++++-----------------
 crates/uniflight/src/lib.rs             |  6 +++---
 2 files changed, 7 insertions(+), 20 deletions(-)

diff --git a/crates/uniflight/benches/performance.rs b/crates/uniflight/benches/performance.rs
index 6e14ff31..c96574c5 100644
--- a/crates/uniflight/benches/performance.rs
+++ b/crates/uniflight/benches/performance.rs
@@ -32,11 +32,7 @@ fn bench_single_call(c: &mut Criterion) {
     c.bench_function("single_call", |b| {
         b.to_async(&rt).iter(|| {
             let merger = Arc::clone(&merger);
-            async move {
-                merger
-                    .execute(&unique_key(), || async { "value".to_string() })
-                    .await
-            }
+            async move { merger.execute(&unique_key(), || async { "value".to_string() }).await }
         });
     });
 }
@@ -56,9 +52,7 @@ fn bench_high_contention(c: &mut Criterion) {
                     .map(|_| {
                         let merger = Arc::clone(&merger);
                         let key = key.clone();
-                        tokio::spawn(async move {
-                            merger.execute(&key, || async { "value".to_string() }).await
-                        })
+                        tokio::spawn(async move { merger.execute(&key, || async { "value".to_string() }).await })
                     })
                     .collect();
 
@@ -87,9 +81,7 @@ fn bench_distributed_keys(c: &mut Criterion) {
                         (0..10).map(move |_| {
                             let merger = Arc::clone(&merger);
                             let key = format!("key_{prefix}_{key_id}");
-                            tokio::spawn(async move {
-                                merger.execute(&key, || async { "value".to_string() }).await
-                            })
+                            tokio::spawn(async move { merger.execute(&key, || async { "value".to_string() }).await })
                         })
                     })
                     .collect();
@@ -102,11 +94,6 @@ fn bench_distributed_keys(c: &mut Criterion) {
     });
 }
 
-criterion_group!(
-    benches,
-    bench_single_call,
-    bench_high_contention,
-    bench_distributed_keys,
-);
+criterion_group!(benches, bench_single_call, bench_high_contention, bench_distributed_keys,);
 
 criterion_main!(benches);
diff --git a/crates/uniflight/src/lib.rs b/crates/uniflight/src/lib.rs
index 7ccc6ec7..987b4c2e 100644
--- a/crates/uniflight/src/lib.rs
+++ b/crates/uniflight/src/lib.rs
@@ -92,9 +92,9 @@
 //!
 //! Run benchmarks with `cargo bench -p uniflight`. The suite covers:
 //!
-//! - **single_call**: Baseline latency with no contention
-//! - **high_contention_100**: 100 concurrent tasks on the same key
-//! - **distributed_10x10**: 10 keys with 10 tasks each
+//! - `single_call`: Baseline latency with no contention
+//! - `high_contention_100`: 100 concurrent tasks on the same key
+//! - `distributed_10x10`: 10 keys with 10 tasks each
 //!
 //! Use `--save-baseline` and `--baseline` flags to track regressions over time.